[Mesa-dev] [PATCH] mesa/glformats: Remove redundant helper _mesa_base_format_component_count

2018-10-22 Thread Eduardo Lima Mitev
There exists _mesa_components_in_format() which already includes
all cases handled in _mesa_base_format_component_count().
---
 src/mesa/drivers/dri/i965/brw_blorp.c |  2 +-
 src/mesa/main/glformats.c | 27 ---
 src/mesa/main/glformats.h |  3 ---
 src/mesa/main/teximage.c  |  4 ++--
 4 files changed, 3 insertions(+), 33 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c 
b/src/mesa/drivers/dri/i965/brw_blorp.c
index ad3a47ef035..b286b231537 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -1195,7 +1195,7 @@ set_write_disables(const struct intel_renderbuffer *irb,
 * RGB we can treat alpha as not used and write whatever we like into it.
 */
const GLenum base_format = irb->Base.Base._BaseFormat;
-   const int components = _mesa_base_format_component_count(base_format);
+   const int components = _mesa_components_in_format(base_format);
bool disables = false;
 
assert(components > 0);
diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c
index bbeb6034dd7..6cb3435dea2 100644
--- a/src/mesa/main/glformats.c
+++ b/src/mesa/main/glformats.c
@@ -1630,33 +1630,6 @@ _mesa_base_format_has_channel(GLenum base_format, GLenum 
pname)
 }
 
 
-/**
- * Returns the number of channels/components for a base format.
- */
-GLint
-_mesa_base_format_component_count(GLenum base_format)
-{
-   switch (base_format) {
-   case GL_LUMINANCE:
-   case GL_RED:
-   case GL_ALPHA:
-   case GL_INTENSITY:
-   case GL_DEPTH_COMPONENT:
-  return 1;
-   case GL_RG:
-   case GL_LUMINANCE_ALPHA:
-   case GL_DEPTH_STENCIL:
-  return 2;
-   case GL_RGB:
-  return 3;
-   case GL_RGBA:
-  return 4;
-   default:
-  return -1;
-   }
-}
-
-
 /**
  * If format is a generic compressed format, return the corresponding
  * non-compressed format.  For other formats, return the format as-is.
diff --git a/src/mesa/main/glformats.h b/src/mesa/main/glformats.h
index 5a21317159d..0aefdf50fef 100644
--- a/src/mesa/main/glformats.h
+++ b/src/mesa/main/glformats.h
@@ -119,9 +119,6 @@ _mesa_unpack_format_to_base_format(GLenum format);
 extern GLboolean
 _mesa_base_format_has_channel(GLenum base_format, GLenum pname);
 
-extern GLint
-_mesa_base_format_component_count(GLenum base_format);
-
 extern GLenum
 _mesa_generic_compressed_format_to_uncompressed_format(GLenum format);
 
diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index d45854bd17f..6805b47c728 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -2407,8 +2407,8 @@ copytexture_error_check( struct gl_context *ctx, GLuint 
dimensions,
 
if (_mesa_is_gles(ctx)) {
   bool valid = true;
-  if (_mesa_base_format_component_count(baseFormat) >
-  _mesa_base_format_component_count(rb_base_format)) {
+  if (_mesa_components_in_format(baseFormat) >
+  _mesa_components_in_format(rb_base_format)) {
  valid = false;
   }
   if (baseFormat == GL_DEPTH_COMPONENT ||
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 31/31] nir: Add a bool to float32 lowering pass

2018-10-22 Thread Christian Gmeiner
Am Di., 23. Okt. 2018 um 01:43 Uhr schrieb Jason Ekstrand
:
>
> On Mon, Oct 22, 2018 at 6:20 PM Alyssa Rosenzweig  
> wrote:
>>
>> For what it's worth, Midgard has real integers (including int32
>> support), using hardware-level D3D10 boolean conventions. I'm trying to
>> wrap my head around how this interacts with 5d85a0a.
>
>
> Right.  Sorry.  I forgot who's working on what chip these days.  Maybe Ilia 
> can CC the right person.
>

I really love this pass and will help me/etnaviv to
 - reduce my hand crafted handling for such cases
 - make it easy to support vivante gpus with and without integer support

I can give this pass a try over the weekend if you want.
-- 
greets
--
Christian Gmeiner, MSc

https://christian-gmeiner.info
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] i965/miptree: Use cpu tiling/detiling when mapping

2018-10-22 Thread Tapani Pälli



On 10/22/18 11:47 PM, Matt Turner wrote:

On Mon, Sep 24, 2018 at 4:20 AM Tapani Pälli  wrote:


From: Scott D Phillips 

Rename the (un)map_gtt functions to (un)map_map (map by
returning a map) and add new functions (un)map_tiled_memcpy that
return a shadow buffer populated with the intel_tiled_memcpy
functions.

Tiling/detiling with the cpu will be the only way to handle Yf/Ys
tiling, when support is added for those formats.

v2: Compute extents properly in the x|y-rounded-down case (Chris Wilson)

v3: Add units to parameter names of tile_extents (Nanley Chery)
 Use _mesa_align_malloc for the shadow copy (Nanley)
 Continue using gtt maps on gen4 (Nanley)

v4: Use streaming_load_memcpy when detiling

v5: (edited by Ken) Move map_tiled_memcpy above map_movntdqa, so it
 takes precedence.  Add intel_miptree_access_raw, needed after
 rebasing on commit b499b85b0f2cc0c82b7c9af91502c2814fdc8e67.

v6: refactor to changes done for sse41 separation (Tapani)

Reviewed-by: Chris Wilson  (v5)
Reviewed-by: Kenneth Graunke  (v5)

Signed-off-by: Tapani Pälli 
---
  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 110 +-
  1 file changed, 106 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 36681352ba7..4c2cee8ebba 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -31,6 +31,8 @@
  #include "intel_image.h"
  #include "intel_mipmap_tree.h"
  #include "intel_tex.h"
+#include "intel_tiled_memcpy.h"
+#include "intel_tiled_memcpy_sse41.h"
  #include "intel_blit.h"
  #include "intel_fbo.h"

@@ -2998,7 +3000,7 @@ intel_miptree_unmap_raw(struct intel_mipmap_tree *mt)
  }

  static void
-intel_miptree_unmap_gtt(struct brw_context *brw,
+intel_miptree_unmap_map(struct brw_context *brw,
  struct intel_mipmap_tree *mt,
  struct intel_miptree_map *map,
  unsigned int level, unsigned int slice)
@@ -3007,7 +3009,7 @@ intel_miptree_unmap_gtt(struct brw_context *brw,
  }

  static void
-intel_miptree_map_gtt(struct brw_context *brw,
+intel_miptree_map_map(struct brw_context *brw,
   struct intel_mipmap_tree *mt,
   struct intel_miptree_map *map,
   unsigned int level, unsigned int slice)
@@ -3055,7 +3057,7 @@ intel_miptree_map_gtt(struct brw_context *brw,
 mt, _mesa_get_format_name(mt->format),
 x, y, map->ptr, map->stride);

-   map->unmap = intel_miptree_unmap_gtt;
+   map->unmap = intel_miptree_unmap_map;
  }

  static void
@@ -3087,6 +3089,101 @@ intel_miptree_unmap_blit(struct brw_context *brw,
 intel_miptree_release(>linear_mt);
  }

+/* Compute extent parameters for use with tiled_memcpy functions.
+ * xs are in units of bytes and ys are in units of strides.
+ */
+static inline void
+tile_extents(struct intel_mipmap_tree *mt, struct intel_miptree_map *map,
+ unsigned int level, unsigned int slice, unsigned int *x1_B,
+ unsigned int *x2_B, unsigned int *y1_el, unsigned int *y2_el)
+{
+   unsigned int block_width, block_height;
+   unsigned int x0_el, y0_el;
+
+   _mesa_get_format_block_size(mt->format, _width, _height);
+
+   assert(map->x % block_width == 0);
+   assert(map->y % block_height == 0);
+
+   intel_miptree_get_image_offset(mt, level, slice, _el, _el);
+   *x1_B = (map->x / block_width + x0_el) * mt->cpp;
+   *y1_el = map->y / block_height + y0_el;
+   *x2_B = (DIV_ROUND_UP(map->x + map->w, block_width) + x0_el) * mt->cpp;
+   *y2_el = DIV_ROUND_UP(map->y + map->h, block_height) + y0_el;
+}
+
+static void
+intel_miptree_unmap_tiled_memcpy(struct brw_context *brw,
+ struct intel_mipmap_tree *mt,
+ struct intel_miptree_map *map,
+ unsigned int level,
+ unsigned int slice)
+{
+   if (map->mode & GL_MAP_WRITE_BIT) {
+  unsigned int x1, x2, y1, y2;
+  tile_extents(mt, map, level, slice, , , , );
+
+  char *dst = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
+  dst += mt->offset;
+
+  linear_to_tiled(x1, x2, y1, y2, dst, map->ptr, mt->surf.row_pitch,
+  map->stride, brw->has_swizzling, mt->surf.tiling,
+  INTEL_COPY_MEMCPY);
+
+  intel_miptree_unmap_raw(mt);
+   }
+   _mesa_align_free(map->buffer);
+   map->buffer = map->ptr = NULL;
+}
+
+static void
+intel_miptree_map_tiled_memcpy(struct brw_context *brw,
+   struct intel_mipmap_tree *mt,
+   struct intel_miptree_map *map,
+   unsigned int level, unsigned int slice)
+{
+   intel_miptree_access_raw(brw, mt, level, slice,
+map->mode & GL_MAP_WRITE_BIT);
+
+   unsigned int x1, x2, y1, y2;
+   tile_extents(mt, 

Re: [Mesa-dev] [PATCH] mesa: allow EXT_texture_compression_s3tc on ES2

2018-10-22 Thread Erik Faye-Lund
On Thu, 2018-10-18 at 14:31 -0400, Ilia Mirkin wrote:
> On Thu, Oct 18, 2018 at 2:26 PM Erik Faye-Lund
>  wrote:
> > On Oct 18, 2018 18:21, Ilia Mirkin  wrote:
> > 
> > Have you verified that this works OK on GLES2? This extension
> > enables
> > online compression, which isn't normally available in GLES, and I
> > wouldn't be surprised if the current mesa TexImage handlers prevent
> > it.
> > 
> > The way I read the extension spec, it doesn't enable online
> > compression on gles2. And from testing, it seems like mesa does the
> > right thing. I'll double check, though...
> 
> """
> In extended OpenGL ES 2.0.25 these new tokens are accepted by the
>  parameter of TexImage2D, CompressedTexImage2D
> and
> the 
> parameter of CompressedTexSubImage2D.
> """
> 
> and in the ES 2 section of the modifications:
> 
> """
> Modify Section 3.7.2, Alternate Texture Image Specification
> Commands
> 
> (add to the end of section)
> 
> When the internal format of the texture object is
> COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT,
> COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT,
> the
> update region specified in TexSubImage2D must be aligned to 4x4
> pixel blocks. If  or  are not multiples of 4 an
> INVALID_OPERATION error is generated. If  is not a
> multiple
> of 4 and  +  is not equal to the width of the LOD
> then an INVALID_OPERATION error is generated.  If  is not
> a multiple of 4 and  +  is not equal to the
> height of the LOD then an INVALID_OPERATION error is generated.
> """
> 
> Since you can pass the compressed internal format to TexImage2D, that
> implies online compression.
> 

Right. Yeah, I think you're right, and this needs some more work.

Thanks for pointing that out.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 31/31] nir: Add a bool to float32 lowering pass

2018-10-22 Thread Alyssa Rosenzweig
I think so?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 31/31] nir: Add a bool to float32 lowering pass

2018-10-22 Thread Jason Ekstrand

On October 22, 2018 20:13:25 Alyssa Rosenzweig  wrote:


If you want to set me straight, that's probably the better patch to
argue it out. :)


No, I thought about it; you're right. I understand my hardware (and NIR)
much better now than what I wrote the earlier patch; you have now shown
me the error of my ways :)

Nothing special should be needed for Midgard.


Does that mean I should revise things to delete lower_b2f entirely?

--Jason



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 31/31] nir: Add a bool to float32 lowering pass

2018-10-22 Thread Alyssa Rosenzweig
> If you want to set me straight, that's probably the better patch to
> argue it out. :)

No, I thought about it; you're right. I understand my hardware (and NIR)
much better now than what I wrote the earlier patch; you have now shown
me the error of my ways :)

Nothing special should be needed for Midgard.

Thank you.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH mesa] util: use *unsigned* ints for bit operations

2018-10-22 Thread Eric Anholt
Eric Engestrom  writes:

> Fixes errors thrown by GCC's Undefined Behaviour sanitizer (ubsan) every
> time this macro is used.

Reviewed-by: Eric Anholt 


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nir: Allow using nir_lower_io_to_scalar_early on VS input vars.

2018-10-22 Thread Eric Anholt
Eric Anholt  writes:

> This will be used on V3D to cut down the size of the VS inputs in the VPM
> (memory area for sharing data between shader stages).

Ping on this one.  I'd love to land the series behind it.


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3] intel/decoders: fix end of batch limit

2018-10-22 Thread Kenneth Graunke
On Monday, October 22, 2018 5:39:39 PM PDT Lionel Landwerlin wrote:
> Pointer arithmetic...
> 
> v2: s/4/sizeof(uint32_t)/ (Eric)
> 
> v3: Give bytes to print_batch() in error_decode (Lionel)
> Make clear what values we're dealing with in error_decode (Lionel)
> 
> Signed-off-by: Lionel Landwerlin 
> Reviewed-by: Eric Engestrom  (v2)
> ---
>  src/intel/common/gen_batch_decoder.c |  2 +-
>  src/intel/tools/aubinator_error_decode.c | 15 ---
>  src/intel/tools/aubinator_viewer_decoder.cpp |  2 +-
>  3 files changed, 10 insertions(+), 9 deletions(-)

Thanks for fixing this!  Looks like everything's properly in bytes now.

Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3] intel/decoders: fix end of batch limit

2018-10-22 Thread Lionel Landwerlin
Pointer arithmetic...

v2: s/4/sizeof(uint32_t)/ (Eric)

v3: Give bytes to print_batch() in error_decode (Lionel)
Make clear what values we're dealing with in error_decode (Lionel)

Signed-off-by: Lionel Landwerlin 
Reviewed-by: Eric Engestrom  (v2)
---
 src/intel/common/gen_batch_decoder.c |  2 +-
 src/intel/tools/aubinator_error_decode.c | 15 ---
 src/intel/tools/aubinator_viewer_decoder.cpp |  2 +-
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/intel/common/gen_batch_decoder.c 
b/src/intel/common/gen_batch_decoder.c
index 827f3dbdd2f..0a36b1678b5 100644
--- a/src/intel/common/gen_batch_decoder.c
+++ b/src/intel/common/gen_batch_decoder.c
@@ -818,7 +818,7 @@ gen_print_batch(struct gen_batch_decode_ctx *ctx,
 const uint32_t *batch, uint32_t batch_size,
 uint64_t batch_addr)
 {
-   const uint32_t *p, *end = batch + batch_size;
+   const uint32_t *p, *end = batch + batch_size / sizeof(uint32_t);
int length;
struct gen_group *inst;
 
diff --git a/src/intel/tools/aubinator_error_decode.c 
b/src/intel/tools/aubinator_error_decode.c
index d7f9d9f2af1..c581414eb2c 100644
--- a/src/intel/tools/aubinator_error_decode.c
+++ b/src/intel/tools/aubinator_error_decode.c
@@ -292,7 +292,7 @@ struct section {
char *ring_name;
const char *buffer_name;
uint32_t *data;
-   int count;
+   int dword_count;
 };
 
 #define MAX_SECTIONS 256
@@ -389,11 +389,11 @@ get_gen_batch_bo(void *user_data, uint64_t address)
 {
for (int s = 0; s < num_sections; s++) {
   if (sections[s].gtt_offset <= address &&
-  address < sections[s].gtt_offset + sections[s].count * 4) {
+  address < sections[s].gtt_offset + sections[s].dword_count * 4) {
  return (struct gen_batch_decode_bo) {
 .addr = sections[s].gtt_offset,
 .map = sections[s].data,
-.size = sections[s].count * 4,
+.size = sections[s].dword_count * 4,
  };
   }
}
@@ -424,14 +424,14 @@ read_data_file(FILE *file)
 
   if (line[0] == ':' || line[0] == '~') {
  uint32_t *data = NULL;
- int count = ascii85_decode(line+1, , line[0] == ':');
- if (count == 0) {
+ int dword_count = ascii85_decode(line+1, , line[0] == ':');
+ if (dword_count == 0) {
 fprintf(stderr, "ASCII85 decode failed.\n");
 exit(EXIT_FAILURE);
  }
  assert(num_sections < MAX_SECTIONS);
  sections[num_sections].data = data;
- sections[num_sections].count = count;
+ sections[num_sections].dword_count = dword_count;
  num_sections++;
  continue;
   }
@@ -610,7 +610,8 @@ read_data_file(FILE *file)
   strcmp(sections[s].buffer_name, "batch buffer") == 0 ||
   strcmp(sections[s].buffer_name, "ring buffer") == 0 ||
   strcmp(sections[s].buffer_name, "HW Context") == 0) {
- gen_print_batch(_ctx, sections[s].data, sections[s].count,
+ gen_print_batch(_ctx, sections[s].data,
+ sections[s].dword_count * 4,
  sections[s].gtt_offset);
   }
}
diff --git a/src/intel/tools/aubinator_viewer_decoder.cpp 
b/src/intel/tools/aubinator_viewer_decoder.cpp
index 7fe1c3ad1a9..a10d01e94d0 100644
--- a/src/intel/tools/aubinator_viewer_decoder.cpp
+++ b/src/intel/tools/aubinator_viewer_decoder.cpp
@@ -888,7 +888,7 @@ aub_viewer_render_batch(struct aub_viewer_decode_ctx *ctx,
 uint64_t batch_addr)
 {
struct gen_group *inst;
-   const uint32_t *p, *batch = (const uint32_t *) _batch, *end = batch + 
batch_size;
+   const uint32_t *p, *batch = (const uint32_t *) _batch, *end = batch + 
batch_size / sizeof(uint32_t);
int length;
 
for (p = batch; p < end; p += length) {
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 31/31] nir: Add a bool to float32 lowering pass

2018-10-22 Thread Ian Romanick
On 10/22/2018 05:09 PM, Alyssa Rosenzweig wrote:
>> Right.  Sorry.  I forgot who's working on what chip these days.  Maybe Ilia
>> can CC the right person.
> 
> No worries at all! It may well have been me (I had touched the b2f code;
> our hardware lacks a dedicated instruction and needs `iand 1.0` to do
> the conversion). I'm not sure which hardware lacks native integers these
> days.

It does have an instruction.  That instruction is just 'iand 1.0'. :)
This is what we do in i965.  I mentioned this in my review of patch 14.
If you want to set me straight, that's probably the better patch to
argue it out. :)

> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nir/algebraic: Fix a typo in the bit size validation code

2018-10-22 Thread Ian Romanick
On 10/22/2018 04:31 PM, Jason Ekstrand wrote:
> The conon_bit_class and canon_var_class variables got switched.

  canon  :)

Reviewed-by: Ian Romanick 

> 
> Fixes: 932c650e0b "nir/algebraic: Loosen a restriction on variables"
> Reported-by: Ian Romanick 
> ---
>  src/compiler/nir/nir_algebraic.py | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/src/compiler/nir/nir_algebraic.py 
> b/src/compiler/nir/nir_algebraic.py
> index b880aa0dc66..033c2298073 100644
> --- a/src/compiler/nir/nir_algebraic.py
> +++ b/src/compiler/nir/nir_algebraic.py
> @@ -362,8 +362,8 @@ class BitSizeValidator(object):
>if var_class == 0:
>   self._var_classes[var.index] = bit_class
>else:
> - canon_bit_class = self._class_relation.get_canonical(var_class)
> - canon_var_class = self._class_relation.get_canonical(bit_class)
> + canon_var_class = self._class_relation.get_canonical(var_class)
> + canon_bit_class = self._class_relation.get_canonical(bit_class)
>   assert canon_var_class < 0 or canon_bit_class < 0 or \
>  canon_var_class == canon_bit_class, \
>  'Variable {0} cannot be both {1}-bit and {2}-bit' \
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 31/31] nir: Add a bool to float32 lowering pass

2018-10-22 Thread Alyssa Rosenzweig
> Right.  Sorry.  I forgot who's working on what chip these days.  Maybe Ilia
> can CC the right person.

No worries at all! It may well have been me (I had touched the b2f code;
our hardware lacks a dedicated instruction and needs `iand 1.0` to do
the conversion). I'm not sure which hardware lacks native integers these
days.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 10/31] nir/prog: Use nir_bany in kill handling

2018-10-22 Thread Eric Anholt
Jason Ekstrand  writes:

> We have a helper that does exactly what the bany_inequal was doing.  It
> emits the same code but is a bit higher level and is designed to operate
> on a bvec4.

Patch 6, 8-10 also r-b.  I couldn't make sense of patch 7's optimization
pass beforehand, so I failed to review it.


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 14/31] nir/algebraic: Disable b2f lowering and two optimizations

2018-10-22 Thread Jason Ekstrand
On Mon, Oct 22, 2018 at 6:26 PM Ian Romanick  wrote:

> On 10/22/2018 03:13 PM, Jason Ekstrand wrote:
> > These all assume the 0/~0 representation of booleans.  We'll turn them
> > back on before too long.
> > ---
> >  src/compiler/nir/nir_opt_algebraic.py | 5 -
> >  1 file changed, 5 deletions(-)
> >
> > diff --git a/src/compiler/nir/nir_opt_algebraic.py
> b/src/compiler/nir/nir_opt_algebraic.py
> > index 8b24daddfdc..5a4e78e8e0e 100644
> > --- a/src/compiler/nir/nir_opt_algebraic.py
> > +++ b/src/compiler/nir/nir_opt_algebraic.py
> > @@ -436,9 +436,7 @@ optimizations = [
> > (('imul', ('b2i', a), ('b2i', b)), ('b2i', ('iand', a, b))),
> > (('fmul', ('b2f', a), ('b2f', b)), ('b2f', ('iand', a, b))),
> > (('fsat', ('fadd', ('b2f', a), ('b2f', b))), ('b2f', ('ior', a, b))),
> > -   (('iand', 'a@bool', 1.0), ('b2f', a), '!options->lower_b2f'),
> > # True/False are ~0 and 0 in NIR.  b2i of True is 1, and -1 is ~0
> (True).
>
> Is this comment still true at the end of the series?  Should it also
> change?
>

It is still true but it doesn't apply to any of the optimizations that
follow.  I'll kill it off.


> > -   (('ineg', ('b2i@32', a)), a),
> > (('flt', ('fneg', ('b2f', a)), 0), a), # Generated by TGSI KILL_IF.
> > (('flt', ('fsub', 0.0, ('b2f', a)), 0), a), # Generated by TGSI
> KILL_IF.
> > # Comparison with the same args.  Note that these are not done for
> > @@ -917,9 +915,6 @@ late_optimizations = [
> > # we do these late so that we don't get in the way of creating ffmas
> > (('fmin', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)',
> '#c', b)), ('fadd', c, ('fmin', a, b))),
> > (('fmax', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)',
> '#c', b)), ('fadd', c, ('fmax', a, b))),
> > -
> > -   # Lowered for backends without a dedicated b2f instruction
> > -   (('b2f@32', a), ('iand', a, 1.0), 'options->lower_b2f'),
>
> I've never understood the point of this.  The backend should just
> generate the iand instruction as it's implementation of b2f.  This is
> what i965 does.  Converting the b2f at the late hour doesn't give any
> opportunities to enable other optimizations, so it seems pretty useless.
>

I don't really get it either but Alyssa wanted it.  Maybe we can can it
eventually once their back-end gets more mature?  In any case, it's not
hurting anything sitting here.

--Jason


> >  ]
> >
> >  print(nir_algebraic.AlgebraicPass("nir_opt_algebraic",
> optimizations).render())
> >
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 21/31] nir: Add support for 1-bit data types

2018-10-22 Thread Jason Ekstrand
On Mon, Oct 22, 2018 at 6:22 PM Ian Romanick  wrote:

> On 10/22/2018 03:13 PM, Jason Ekstrand wrote:
> > This commit adds support for 1-bit booleans and integers.  Booleans
>
> I've noticed this in some of the patches... Boolean is a proper name, so
> it should be capitalized everywhere.
>

I did not know that.  Thanks for pointing it out!  I think I've fixed all
the commit messages now.

--Jason


> > obviously take a value of true or false.  Because we have to define the
> > semantics of 1-bit signed and unsigned integers, we define uint1_t to
> > take values of 0 and 1 and int1_t to take values of 0 and -1.  1-bit
> > arithmetic is then well-defined in the usual way, just with fewer bits.
> > The definition of int1_t and uint1_t doesn't usually matter but we do
> > need something for purposes of constant folding.
> > ---
> >  src/compiler/nir/nir.c| 15 +--
> >  src/compiler/nir/nir.h| 21 +++-
> >  src/compiler/nir/nir_builder.h| 12 -
> >  src/compiler/nir/nir_constant_expressions.py  | 25 ---
> >  src/compiler/nir/nir_instr_set.c  | 23 ++---
> >  .../nir/nir_lower_load_const_to_scalar.c  |  3 +++
> >  src/compiler/nir/nir_opt_constant_folding.c   |  3 +++
> >  src/compiler/nir/nir_opt_large_constants.c|  5 
> >  src/compiler/nir/nir_print.c  |  3 +++
> >  src/compiler/nir/nir_search.c |  3 ++-
> >  src/compiler/nir/nir_validate.c   |  2 +-
> >  src/compiler/nir_types.cpp|  2 +-
> >  src/compiler/spirv/spirv_to_nir.c |  9 +++
> >  13 files changed, 101 insertions(+), 25 deletions(-)
> >
> > diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
> > index 0be40d257f5..8e83edb3644 100644
> > --- a/src/compiler/nir/nir.c
> > +++ b/src/compiler/nir/nir.c
> > @@ -638,6 +638,7 @@ const_value_int(int64_t i, unsigned bit_size)
> >  {
> > nir_const_value v;
> > switch (bit_size) {
> > +   case 1:  v.b[0]   = i & 1;  break;
> > case 8:  v.i8[0]  = i;  break;
> > case 16: v.i16[0] = i;  break;
> > case 32: v.i32[0] = i;  break;
> > @@ -1206,6 +1207,8 @@ nir_src_comp_as_int(nir_src src, unsigned comp)
> >
> > assert(comp < load->def.num_components);
> > switch (load->def.bit_size) {
> > +   /* int1_t uses 0/-1 convention */
> > +   case 1:  return -(int)load->value.b[comp];
> > case 8:  return load->value.i8[comp];
> > case 16: return load->value.i16[comp];
> > case 32: return load->value.i32[comp];
> > @@ -1223,6 +1226,7 @@ nir_src_comp_as_uint(nir_src src, unsigned comp)
> >
> > assert(comp < load->def.num_components);
> > switch (load->def.bit_size) {
> > +   case 1:  return load->value.b[comp];
> > case 8:  return load->value.u8[comp];
> > case 16: return load->value.u16[comp];
> > case 32: return load->value.u32[comp];
> > @@ -1235,15 +1239,12 @@ nir_src_comp_as_uint(nir_src src, unsigned comp)
> >  bool
> >  nir_src_comp_as_bool(nir_src src, unsigned comp)
> >  {
> > -   assert(nir_src_is_const(src));
> > -   nir_load_const_instr *load =
> nir_instr_as_load_const(src.ssa->parent_instr);
> > +   int64_t i = nir_src_comp_as_int(src, comp);
> >
> > -   assert(comp < load->def.num_components);
> > -   assert(load->def.bit_size == 32);
> > -   assert(load->value.u32[comp] == NIR_TRUE ||
> > -  load->value.u32[comp] == NIR_FALSE);
> > +   /* Booleans of any size use 0/-1 convention */
> > +   assert(i == 0 || i == -1);
> >
> > -   return load->value.u32[comp];
> > +   return i;
> >  }
> >
> >  double
> > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> > index 47c7f400b2d..b19138f9e61 100644
> > --- a/src/compiler/nir/nir.h
> > +++ b/src/compiler/nir/nir.h
> > @@ -117,6 +117,7 @@ typedef enum {
> >  } nir_rounding_mode;
> >
> >  typedef union {
> > +   bool b[NIR_MAX_VEC_COMPONENTS];
> > float f32[NIR_MAX_VEC_COMPONENTS];
> > double f64[NIR_MAX_VEC_COMPONENTS];
> > int8_t i8[NIR_MAX_VEC_COMPONENTS];
> > @@ -778,17 +779,25 @@ typedef struct {
> > unsigned write_mask : NIR_MAX_VEC_COMPONENTS; /* ignored if
> dest.is_ssa is true */
> >  } nir_alu_dest;
> >
> > +/** NIR sized and unsized types
> > + *
> > + * The values in this enum are carefully chosen so that the sized type
> is
> > + * just the unsized type OR the number of bits.
> > + */
> >  typedef enum {
> > nir_type_invalid = 0, /* Not a valid type */
> > -   nir_type_float,
> > -   nir_type_int,
> > -   nir_type_uint,
> > -   nir_type_bool,
> > +   nir_type_int =   2,
> > +   nir_type_uint =  4,
> > +   nir_type_bool =  6,
> > +   nir_type_float = 128,
> > +   nir_type_bool1 = 1  | nir_type_bool,
> > nir_type_bool32 =32 | nir_type_bool,
> > +   nir_type_int1 =  1  | nir_type_int,
> > nir_type_int8 =  8  | nir_type_int,
> > nir_type_int16 = 16 | nir_type_int,
> > nir_type_int32 = 

Re: [Mesa-dev] [RFC 31/31] nir: Add a bool to float32 lowering pass

2018-10-22 Thread Jason Ekstrand
On Mon, Oct 22, 2018 at 6:13 PM Ilia Mirkin  wrote:

> On Mon, Oct 22, 2018 at 6:16 PM Jason Ekstrand 
> wrote:
> >
> > This should be useful for drivers that don't support real integers.
> >
> > Cc: Alyssa Rosenzweig 
> > ---
> >  src/compiler/Makefile.sources  |   1 +
> >  src/compiler/nir/meson.build   |   1 +
> >  src/compiler/nir/nir_lower_bool_to_float.c | 181 +
> >  3 files changed, 183 insertions(+)
> >  create mode 100644 src/compiler/nir/nir_lower_bool_to_float.c
> > +   nir_foreach_block(block, impl) {
> > +  nir_foreach_instr_safe(instr, block) {
> > + switch (instr->type) {
> > + case nir_instr_type_alu:
> > +progress |= lower_alu_instr(, nir_instr_as_alu(instr));
> > +break;
> > +
> > + case nir_instr_type_load_const: {
> > +nir_load_const_instr *load = nir_instr_as_load_const(instr);
> > +if (load->def.bit_size == 1) {
> > +   nir_const_value value = load->value;
> > +   for (unsigned i = 0; i < load->def.num_components; i++)
> > +  load->value.u32[i] = value.b[i] ? NIR_TRUE :
> NIR_FALSE;
> > +   load->def.bit_size = 32;
> > +   progress = true;
> > +}
> > +break;
> > + }
>
> Should this instead rewrite the load_const to a 1.0f / 0.0f?
>

Yup.  As you can tell, this is completely untested.  However, I think it
does make the point that 1-bit bools aren't bad for float-only hardware
which is why I did it.  I've fixed the bugs pointed out so far.

--Jason
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 31/31] nir: Add a bool to float32 lowering pass

2018-10-22 Thread Jason Ekstrand
On Mon, Oct 22, 2018 at 6:20 PM Alyssa Rosenzweig 
wrote:

> For what it's worth, Midgard has real integers (including int32
> support), using hardware-level D3D10 boolean conventions. I'm trying to
> wrap my head around how this interacts with 5d85a0a.
>

Right.  Sorry.  I forgot who's working on what chip these days.  Maybe Ilia
can CC the right person.

--Jason


> I'm tempted to think the standard lower_bool_to_int32 pass would work,
> with an emulated b2f instruction in the backend IR level, rather than
> burdening NIR with those details.
>
> I'll have to look at the patch set closer to understand the impact.
>
> ---
>
> > +nir_lower_bool_to_int32_impl(nir_function_impl *impl)
> > +nir_lower_bool_to_int32(nir_shader *shader)
> > +  if (function->impl &&
> nir_lower_bool_to_int32_impl(function->impl))
>
> I'm guessing these were intended to read float32?
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nir/algebraic: Fix a typo in the bit size validation code

2018-10-22 Thread Jason Ekstrand
The conon_bit_class and canon_var_class variables got switched.

Fixes: 932c650e0b "nir/algebraic: Loosen a restriction on variables"
Reported-by: Ian Romanick 
---
 src/compiler/nir/nir_algebraic.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/compiler/nir/nir_algebraic.py 
b/src/compiler/nir/nir_algebraic.py
index b880aa0dc66..033c2298073 100644
--- a/src/compiler/nir/nir_algebraic.py
+++ b/src/compiler/nir/nir_algebraic.py
@@ -362,8 +362,8 @@ class BitSizeValidator(object):
   if var_class == 0:
  self._var_classes[var.index] = bit_class
   else:
- canon_bit_class = self._class_relation.get_canonical(var_class)
- canon_var_class = self._class_relation.get_canonical(bit_class)
+ canon_var_class = self._class_relation.get_canonical(var_class)
+ canon_bit_class = self._class_relation.get_canonical(bit_class)
  assert canon_var_class < 0 or canon_bit_class < 0 or \
 canon_var_class == canon_bit_class, \
 'Variable {0} cannot be both {1}-bit and {2}-bit' \
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/6] nir/algebraic: Losten a restriction on variables

2018-10-22 Thread Jason Ekstrand
On Mon, Oct 22, 2018 at 6:28 PM Ian Romanick  wrote:

> On 10/20/2018 11:01 AM, Jason Ekstrand wrote:
> > Previously, we would fail if a variable had an assigned but unknown bit
> > size X and we tried to assign it an actual bit size.  However, this is
> > ok because, at the time we do the search, the variable does have an
> > actual bit size and it will match X because of the NIR rules.
> > ---
> >  src/compiler/nir/nir_algebraic.py | 8 ++--
> >  1 file changed, 6 insertions(+), 2 deletions(-)
> >
> > diff --git a/src/compiler/nir/nir_algebraic.py
> b/src/compiler/nir/nir_algebraic.py
> > index 34ea2ba9e68..bcefa3372b3 100644
> > --- a/src/compiler/nir/nir_algebraic.py
> > +++ b/src/compiler/nir/nir_algebraic.py
> > @@ -352,8 +352,12 @@ class BitSizeValidator(object):
> >if var_class == 0:
> >   self._var_classes[var.index] = bit_class
> >else:
> > - canon_class = self._class_relation.get_canonical(var_class)
> > - assert canon_class < 0 or canon_class == bit_class
> > + canon_bit_class = self._class_relation.get_canonical(var_class)
> > + canon_var_class = self._class_relation.get_canonical(bit_class)
>
> Are the _bit_ and _var_ names transposed in the left-hand side, or am I
> not understanding something?  Or both? :)
>

Uh Oops... Fortunately, it doesn't matter since it's just used in this
check.  I pushed this today with Samuel's review.  I'll send out a fixup
patch.

--Jason


> > + assert canon_var_class < 0 or canon_bit_class < 0 or \
> > +canon_var_class == canon_bit_class, \
> > +'Variable {0} cannot be both {1}-bit and {2}-bit' \
> > +.format(str(var), bit_class, var_class)
> >   var_class = self._class_relation.add_equiv(var_class,
> bit_class)
> >   self._var_classes[var.index] = var_class
> >
> >
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 15/31] nir: Rename boolean-related opcodes to include 32 in the name

2018-10-22 Thread Jason Ekstrand
On Mon, Oct 22, 2018 at 6:06 PM Ian Romanick  wrote:

> On 10/22/2018 03:13 PM, Jason Ekstrand wrote:
> > ---
> >  src/compiler/nir/nir_lower_alu_to_scalar.c |  8 ++--
> >  src/compiler/nir/nir_opcodes.py| 46 +++---
> >  src/compiler/nir/nir_opcodes_c.py  |  8 ++--
> >  3 files changed, 31 insertions(+), 31 deletions(-)
> >
> > diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c
> b/src/compiler/nir/nir_lower_alu_to_scalar.c
> > index 0be3aba9456..e424dff25c4 100644
> > --- a/src/compiler/nir/nir_lower_alu_to_scalar.c
> > +++ b/src/compiler/nir/nir_lower_alu_to_scalar.c
> > @@ -197,10 +197,10 @@ lower_alu_instr_scalar(nir_alu_instr *instr,
> nir_builder *b)
> >return false;
> >
> >LOWER_REDUCTION(nir_op_fdot, nir_op_fmul, nir_op_fadd);
> > -  LOWER_REDUCTION(nir_op_ball_fequal, nir_op_feq, nir_op_iand);
> > -  LOWER_REDUCTION(nir_op_ball_iequal, nir_op_ieq, nir_op_iand);
> > -  LOWER_REDUCTION(nir_op_bany_fnequal, nir_op_fne, nir_op_ior);
> > -  LOWER_REDUCTION(nir_op_bany_inequal, nir_op_ine, nir_op_ior);
> > +  LOWER_REDUCTION(nir_op_b32all_fequal, nir_op_feq32, nir_op_iand);
> > +  LOWER_REDUCTION(nir_op_b32all_iequal, nir_op_ieq32, nir_op_iand);
> > +  LOWER_REDUCTION(nir_op_b32any_fnequal, nir_op_fne32, nir_op_ior);
> > +  LOWER_REDUCTION(nir_op_b32any_inequal, nir_op_ine32, nir_op_ior);
> >LOWER_REDUCTION(nir_op_fall_equal, nir_op_seq, nir_op_fand);
> >LOWER_REDUCTION(nir_op_fany_nequal, nir_op_sne, nir_op_for);
> >
> > diff --git a/src/compiler/nir/nir_opcodes.py
> b/src/compiler/nir/nir_opcodes.py
> > index 4ef4ecc6f22..d349f74ed2a 100644
> > --- a/src/compiler/nir/nir_opcodes.py
> > +++ b/src/compiler/nir/nir_opcodes.py
> > @@ -89,7 +89,7 @@ class Opcode(object):
> >  # helper variables for strings
> >  tfloat = "float"
> >  tint = "int"
> > -tbool = "bool32"
> > +tbool32 = "bool32"
> >  tuint = "uint"
> >  tuint16 = "uint16"
> >  tfloat32 = "float32"
> > @@ -192,10 +192,10 @@ for src_t in [tint, tuint, tfloat]:
> >
> >  # We'll hand-code the to/from bool conversion opcodes.  Because bool
> doesn't
> >  # have multiple bit-sizes, we can always infer the size from the other
> type.
> > -unop_convert("f2b", tbool, tfloat, "src0 != 0.0")
> > -unop_convert("i2b", tbool, tint, "src0 != 0")
> > -unop_convert("b2f", tfloat, tbool, "src0 ? 1.0 : 0.0")
> > -unop_convert("b2i", tint, tbool, "src0 ? 1 : 0")
> > +unop_convert("f2b32", tbool32, tfloat, "src0 != 0.0")
> > +unop_convert("i2b32", tbool32, tint, "src0 != 0")
> > +unop_convert("b322f", tfloat, tbool32, "src0 ? 1.0 : 0.0")
> > +unop_convert("b322i", tint, tbool32, "src0 ? 1 : 0")
>
> Yeee-uck.  b332i is hard to type and hard for my brain to parse.  None
> of the other *2* opcodes have the size on the left side of the 2.  The
> all infer the size from the source operand.
>
> I'd really like the sized Boolean types to work like the other sized
> types.  I doubt we'll have have a using for a uint1 type, but I've been
> thinking about adding a bool8 or bool16 type for some time.  There are
> quite a few cases where we carry Boolean values for a long, long time in
> some of the very large shaders.  Those same shaders also often face
> register pressure.  Being able to store those long lived values in a
> smaller type could be very helpful.
>

I was afraid someone was going to say that. :-/  I guess it wouldn't be too
difficult to have a pre-patch that reworks b2i and b2f though that is going
to touch ALL the drivers.  I agree that it'd be the better more consistent
thing to do.  I'll look into it.

--Jason


> >
> >
> >  # Unary floating-point rounding operations.
> > @@ -405,8 +405,8 @@ def binop_convert(name, out_type, in_type,
> alg_props, const_expr):
> >  def binop(name, ty, alg_props, const_expr):
> > binop_convert(name, ty, ty, alg_props, const_expr)
> >
> > -def binop_compare(name, ty, alg_props, const_expr):
> > -   binop_convert(name, tbool, ty, alg_props, const_expr)
> > +def binop_compare32(name, ty, alg_props, const_expr):
> > +   binop_convert(name, tbool32, ty, alg_props, const_expr)
> >
> >  def binop_horiz(name, out_size, out_type, src1_size, src1_type,
> src2_size,
> >  src2_type, const_expr):
> > @@ -488,26 +488,26 @@ binop("frem", tfloat, "", "src0 - src1 *
> truncf(src0 / src1)")
> >
> >  # these integer-aware comparisons return a boolean (0 or ~0)
> >
> > -binop_compare("flt", tfloat, "", "src0 < src1")
> > -binop_compare("fge", tfloat, "", "src0 >= src1")
> > -binop_compare("feq", tfloat, commutative, "src0 == src1")
> > -binop_compare("fne", tfloat, commutative, "src0 != src1")
> > -binop_compare("ilt", tint, "", "src0 < src1")
> > -binop_compare("ige", tint, "", "src0 >= src1")
> > -binop_compare("ieq", tint, commutative, "src0 == src1")
> > -binop_compare("ine", tint, commutative, "src0 != src1")
> > -binop_compare("ult", tuint, "", "src0 < src1")
> > -binop_compare("uge", tuint, "", "src0 >= src1")
> > 

Re: [Mesa-dev] [PATCH 5/6] nir/algebraic: Losten a restriction on variables

2018-10-22 Thread Ian Romanick
On 10/20/2018 11:01 AM, Jason Ekstrand wrote:
> Previously, we would fail if a variable had an assigned but unknown bit
> size X and we tried to assign it an actual bit size.  However, this is
> ok because, at the time we do the search, the variable does have an
> actual bit size and it will match X because of the NIR rules.
> ---
>  src/compiler/nir/nir_algebraic.py | 8 ++--
>  1 file changed, 6 insertions(+), 2 deletions(-)
> 
> diff --git a/src/compiler/nir/nir_algebraic.py 
> b/src/compiler/nir/nir_algebraic.py
> index 34ea2ba9e68..bcefa3372b3 100644
> --- a/src/compiler/nir/nir_algebraic.py
> +++ b/src/compiler/nir/nir_algebraic.py
> @@ -352,8 +352,12 @@ class BitSizeValidator(object):
>if var_class == 0:
>   self._var_classes[var.index] = bit_class
>else:
> - canon_class = self._class_relation.get_canonical(var_class)
> - assert canon_class < 0 or canon_class == bit_class
> + canon_bit_class = self._class_relation.get_canonical(var_class)
> + canon_var_class = self._class_relation.get_canonical(bit_class)

Are the _bit_ and _var_ names transposed in the left-hand side, or am I
not understanding something?  Or both? :)

> + assert canon_var_class < 0 or canon_bit_class < 0 or \
> +canon_var_class == canon_bit_class, \
> +'Variable {0} cannot be both {1}-bit and {2}-bit' \
> +.format(str(var), bit_class, var_class)
>   var_class = self._class_relation.add_equiv(var_class, bit_class)
>   self._var_classes[var.index] = var_class
>  
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 14/31] nir/algebraic: Disable b2f lowering and two optimizations

2018-10-22 Thread Ian Romanick
On 10/22/2018 03:13 PM, Jason Ekstrand wrote:
> These all assume the 0/~0 representation of booleans.  We'll turn them
> back on before too long.
> ---
>  src/compiler/nir/nir_opt_algebraic.py | 5 -
>  1 file changed, 5 deletions(-)
> 
> diff --git a/src/compiler/nir/nir_opt_algebraic.py 
> b/src/compiler/nir/nir_opt_algebraic.py
> index 8b24daddfdc..5a4e78e8e0e 100644
> --- a/src/compiler/nir/nir_opt_algebraic.py
> +++ b/src/compiler/nir/nir_opt_algebraic.py
> @@ -436,9 +436,7 @@ optimizations = [
> (('imul', ('b2i', a), ('b2i', b)), ('b2i', ('iand', a, b))),
> (('fmul', ('b2f', a), ('b2f', b)), ('b2f', ('iand', a, b))),
> (('fsat', ('fadd', ('b2f', a), ('b2f', b))), ('b2f', ('ior', a, b))),
> -   (('iand', 'a@bool', 1.0), ('b2f', a), '!options->lower_b2f'),
> # True/False are ~0 and 0 in NIR.  b2i of True is 1, and -1 is ~0 (True).

Is this comment still true at the end of the series?  Should it also change?

> -   (('ineg', ('b2i@32', a)), a),
> (('flt', ('fneg', ('b2f', a)), 0), a), # Generated by TGSI KILL_IF.
> (('flt', ('fsub', 0.0, ('b2f', a)), 0), a), # Generated by TGSI KILL_IF.
> # Comparison with the same args.  Note that these are not done for
> @@ -917,9 +915,6 @@ late_optimizations = [
> # we do these late so that we don't get in the way of creating ffmas
> (('fmin', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)', '#c', 
> b)), ('fadd', c, ('fmin', a, b))),
> (('fmax', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)', '#c', 
> b)), ('fadd', c, ('fmax', a, b))),
> -
> -   # Lowered for backends without a dedicated b2f instruction
> -   (('b2f@32', a), ('iand', a, 1.0), 'options->lower_b2f'),

I've never understood the point of this.  The backend should just
generate the iand instruction as it's implementation of b2f.  This is
what i965 does.  Converting the b2f at the late hour doesn't give any
opportunities to enable other optimizations, so it seems pretty useless.

>  ]
>  
>  print(nir_algebraic.AlgebraicPass("nir_opt_algebraic", 
> optimizations).render())
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/31] nir/validate: Print when the validation failed

2018-10-22 Thread Jason Ekstrand
On Mon, Oct 22, 2018 at 6:16 PM Eric Anholt  wrote:

> Jason Ekstrand  writes:
>
> > diff --git a/src/mesa/drivers/dri/i965/brw_program.c
> b/src/mesa/drivers/dri/i965/brw_program.c
> > index f5ebd3c3b05..78050cda359 100644
> > --- a/src/mesa/drivers/dri/i965/brw_program.c
> > +++ b/src/mesa/drivers/dri/i965/brw_program.c
> > @@ -91,14 +91,14 @@ brw_create_nir(struct brw_context *brw,
> >
> >nir_remove_dead_variables(nir, nir_var_shader_in |
> nir_var_shader_out);
> >nir_lower_returns(nir);
> > -  nir_validate_shader(nir);
> > +  nir_validate_shader(nir, NULL);
> >NIR_PASS_V(nir, nir_lower_io_to_temporaries,
> >   nir_shader_get_entrypoint(nir), true, false);
> > } else {
> >nir = prog_to_nir(prog, options);
> >NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into
> SSA */
> > }
> > -   nir_validate_shader(nir);
> > +   nir_validate_shader(nir, NULL);
>
> It seems like you ought to have valid where args here.  Other than that,
> patch 1-5 are:
>

Yeah, I was lazy.  I added something.


> Reviewed-by: Eric Anholt 
>

Thanks!
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 21/31] nir: Add support for 1-bit data types

2018-10-22 Thread Ian Romanick
On 10/22/2018 03:13 PM, Jason Ekstrand wrote:
> This commit adds support for 1-bit booleans and integers.  Booleans

I've noticed this in some of the patches... Boolean is a proper name, so
it should be capitalized everywhere.

> obviously take a value of true or false.  Because we have to define the
> semantics of 1-bit signed and unsigned integers, we define uint1_t to
> take values of 0 and 1 and int1_t to take values of 0 and -1.  1-bit
> arithmetic is then well-defined in the usual way, just with fewer bits.
> The definition of int1_t and uint1_t doesn't usually matter but we do
> need something for purposes of constant folding.
> ---
>  src/compiler/nir/nir.c| 15 +--
>  src/compiler/nir/nir.h| 21 +++-
>  src/compiler/nir/nir_builder.h| 12 -
>  src/compiler/nir/nir_constant_expressions.py  | 25 ---
>  src/compiler/nir/nir_instr_set.c  | 23 ++---
>  .../nir/nir_lower_load_const_to_scalar.c  |  3 +++
>  src/compiler/nir/nir_opt_constant_folding.c   |  3 +++
>  src/compiler/nir/nir_opt_large_constants.c|  5 
>  src/compiler/nir/nir_print.c  |  3 +++
>  src/compiler/nir/nir_search.c |  3 ++-
>  src/compiler/nir/nir_validate.c   |  2 +-
>  src/compiler/nir_types.cpp|  2 +-
>  src/compiler/spirv/spirv_to_nir.c |  9 +++
>  13 files changed, 101 insertions(+), 25 deletions(-)
> 
> diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
> index 0be40d257f5..8e83edb3644 100644
> --- a/src/compiler/nir/nir.c
> +++ b/src/compiler/nir/nir.c
> @@ -638,6 +638,7 @@ const_value_int(int64_t i, unsigned bit_size)
>  {
> nir_const_value v;
> switch (bit_size) {
> +   case 1:  v.b[0]   = i & 1;  break;
> case 8:  v.i8[0]  = i;  break;
> case 16: v.i16[0] = i;  break;
> case 32: v.i32[0] = i;  break;
> @@ -1206,6 +1207,8 @@ nir_src_comp_as_int(nir_src src, unsigned comp)
>  
> assert(comp < load->def.num_components);
> switch (load->def.bit_size) {
> +   /* int1_t uses 0/-1 convention */
> +   case 1:  return -(int)load->value.b[comp];
> case 8:  return load->value.i8[comp];
> case 16: return load->value.i16[comp];
> case 32: return load->value.i32[comp];
> @@ -1223,6 +1226,7 @@ nir_src_comp_as_uint(nir_src src, unsigned comp)
>  
> assert(comp < load->def.num_components);
> switch (load->def.bit_size) {
> +   case 1:  return load->value.b[comp];
> case 8:  return load->value.u8[comp];
> case 16: return load->value.u16[comp];
> case 32: return load->value.u32[comp];
> @@ -1235,15 +1239,12 @@ nir_src_comp_as_uint(nir_src src, unsigned comp)
>  bool
>  nir_src_comp_as_bool(nir_src src, unsigned comp)
>  {
> -   assert(nir_src_is_const(src));
> -   nir_load_const_instr *load = 
> nir_instr_as_load_const(src.ssa->parent_instr);
> +   int64_t i = nir_src_comp_as_int(src, comp);
>  
> -   assert(comp < load->def.num_components);
> -   assert(load->def.bit_size == 32);
> -   assert(load->value.u32[comp] == NIR_TRUE ||
> -  load->value.u32[comp] == NIR_FALSE);
> +   /* Booleans of any size use 0/-1 convention */
> +   assert(i == 0 || i == -1);
>  
> -   return load->value.u32[comp];
> +   return i;
>  }
>  
>  double
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index 47c7f400b2d..b19138f9e61 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -117,6 +117,7 @@ typedef enum {
>  } nir_rounding_mode;
>  
>  typedef union {
> +   bool b[NIR_MAX_VEC_COMPONENTS];
> float f32[NIR_MAX_VEC_COMPONENTS];
> double f64[NIR_MAX_VEC_COMPONENTS];
> int8_t i8[NIR_MAX_VEC_COMPONENTS];
> @@ -778,17 +779,25 @@ typedef struct {
> unsigned write_mask : NIR_MAX_VEC_COMPONENTS; /* ignored if dest.is_ssa 
> is true */
>  } nir_alu_dest;
>  
> +/** NIR sized and unsized types
> + *
> + * The values in this enum are carefully chosen so that the sized type is
> + * just the unsized type OR the number of bits.
> + */
>  typedef enum {
> nir_type_invalid = 0, /* Not a valid type */
> -   nir_type_float,
> -   nir_type_int,
> -   nir_type_uint,
> -   nir_type_bool,
> +   nir_type_int =   2,
> +   nir_type_uint =  4,
> +   nir_type_bool =  6,
> +   nir_type_float = 128,
> +   nir_type_bool1 = 1  | nir_type_bool,
> nir_type_bool32 =32 | nir_type_bool,
> +   nir_type_int1 =  1  | nir_type_int,
> nir_type_int8 =  8  | nir_type_int,
> nir_type_int16 = 16 | nir_type_int,
> nir_type_int32 = 32 | nir_type_int,
> nir_type_int64 = 64 | nir_type_int,
> +   nir_type_uint1 = 1  | nir_type_uint,
> nir_type_uint8 = 8  | nir_type_uint,
> nir_type_uint16 =16 | nir_type_uint,
> nir_type_uint32 =32 | nir_type_uint,
> @@ -798,8 +807,8 @@ typedef enum {
> nir_type_float64 =   64 | nir_type_float,
>  } nir_alu_type;
>  
> -#define 

Re: [Mesa-dev] [RFC 31/31] nir: Add a bool to float32 lowering pass

2018-10-22 Thread Alyssa Rosenzweig
For what it's worth, Midgard has real integers (including int32
support), using hardware-level D3D10 boolean conventions. I'm trying to
wrap my head around how this interacts with 5d85a0a.

I'm tempted to think the standard lower_bool_to_int32 pass would work,
with an emulated b2f instruction in the backend IR level, rather than
burdening NIR with those details.

I'll have to look at the patch set closer to understand the impact. 

---

> +nir_lower_bool_to_int32_impl(nir_function_impl *impl)
> +nir_lower_bool_to_int32(nir_shader *shader)
> +  if (function->impl && nir_lower_bool_to_int32_impl(function->impl))

I'm guessing these were intended to read float32?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/31] nir/validate: Print when the validation failed

2018-10-22 Thread Eric Anholt
Jason Ekstrand  writes:

> diff --git a/src/mesa/drivers/dri/i965/brw_program.c 
> b/src/mesa/drivers/dri/i965/brw_program.c
> index f5ebd3c3b05..78050cda359 100644
> --- a/src/mesa/drivers/dri/i965/brw_program.c
> +++ b/src/mesa/drivers/dri/i965/brw_program.c
> @@ -91,14 +91,14 @@ brw_create_nir(struct brw_context *brw,
>  
>nir_remove_dead_variables(nir, nir_var_shader_in | nir_var_shader_out);
>nir_lower_returns(nir);
> -  nir_validate_shader(nir);
> +  nir_validate_shader(nir, NULL);
>NIR_PASS_V(nir, nir_lower_io_to_temporaries,
>   nir_shader_get_entrypoint(nir), true, false);
> } else {
>nir = prog_to_nir(prog, options);
>NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
> }
> -   nir_validate_shader(nir);
> +   nir_validate_shader(nir, NULL);

It seems like you ought to have valid where args here.  Other than that,
patch 1-5 are:

Reviewed-by: Eric Anholt 


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 31/31] nir: Add a bool to float32 lowering pass

2018-10-22 Thread Ilia Mirkin
On Mon, Oct 22, 2018 at 6:16 PM Jason Ekstrand  wrote:
>
> This should be useful for drivers that don't support real integers.
>
> Cc: Alyssa Rosenzweig 
> ---
>  src/compiler/Makefile.sources  |   1 +
>  src/compiler/nir/meson.build   |   1 +
>  src/compiler/nir/nir_lower_bool_to_float.c | 181 +
>  3 files changed, 183 insertions(+)
>  create mode 100644 src/compiler/nir/nir_lower_bool_to_float.c
> +   nir_foreach_block(block, impl) {
> +  nir_foreach_instr_safe(instr, block) {
> + switch (instr->type) {
> + case nir_instr_type_alu:
> +progress |= lower_alu_instr(, nir_instr_as_alu(instr));
> +break;
> +
> + case nir_instr_type_load_const: {
> +nir_load_const_instr *load = nir_instr_as_load_const(instr);
> +if (load->def.bit_size == 1) {
> +   nir_const_value value = load->value;
> +   for (unsigned i = 0; i < load->def.num_components; i++)
> +  load->value.u32[i] = value.b[i] ? NIR_TRUE : NIR_FALSE;
> +   load->def.bit_size = 32;
> +   progress = true;
> +}
> +break;
> + }

Should this instead rewrite the load_const to a 1.0f / 0.0f?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 30/31] nir/algebraic: Add some optimizations for D3D-style booleans

2018-10-22 Thread Bas Nieuwenhuizen
On Tue, Oct 23, 2018 at 12:16 AM Jason Ekstrand  wrote:
>
> D3D booleans use a 32-bit 0/-1 representation.  Because this previously
> matched NIR exactly, we didn't have to really optimize for it.  Now that
> we have 1-bit booleans, we need some specific optimizations to chew
> through the D3D12-style booleans.
> ---
>  src/compiler/nir/nir_opt_algebraic.py | 13 +
>  1 file changed, 13 insertions(+)
>
> diff --git a/src/compiler/nir/nir_opt_algebraic.py 
> b/src/compiler/nir/nir_opt_algebraic.py
> index f0861c4411d..4d778e4b308 100644
> --- a/src/compiler/nir/nir_opt_algebraic.py
> +++ b/src/compiler/nir/nir_opt_algebraic.py
> @@ -239,6 +239,7 @@ optimizations = [
> (('fne', ('b2f', a), 0.0), a),
> (('ieq', ('b2i', a), 0),   ('inot', a)),
> (('ine', ('b2i', a), 0),   a),
> +   (('ine', ('ineg', ('b2i', a)), 0), a),
>
> (('fne', ('u2f32', a), 0.0), ('ine', a, 0)),
> (('feq', ('u2f32', a), 0.0), ('ieq', a, 0)),
> @@ -528,6 +529,18 @@ optimizations = [
> (('bcsel', a, b, b), b),
> (('fcsel', a, b, b), b),
>
> +   # D3D Boolean eumulation

Small typo, emulation.

Otherwise this patch is

Reviewed-by: Bas Nieuwenhuizen 

> +   (('bcsel', a, -1, 0), ('ineg', ('b2i', a))),
> +   (('bcsel', a, 0, -1), ('ineg', ('b2i', ('inot', a,
> +   (('iand', ('ineg', ('b2i', a)), ('ineg', ('b2i', b))),
> +('ineg', ('b2i', ('iand', a, b,
> +   (('ior', ('ineg', ('b2i', a)), ('ineg', ('b2i', b))),
> +('ineg', ('b2i', ('ior', a, b,
> +   (('ieq', ('ineg', ('b2i', a)), 0), ('inot', a)),
> +   (('ieq', ('ineg', ('b2i', a)), -1), a),
> +   (('ine', ('ineg', ('b2i', a)), 0), a),
> +   (('ine', ('ineg', ('b2i', a)), -1), ('inot', a)),
> +
> # Conversions
> (('i2b', ('b2i', a)), a),
> (('f2i32', ('ftrunc', a)), ('f2i32', a)),
> --
> 2.19.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 108508] Graphic glitches with stream output support on OLAND AMD GPU GCN 1.0

2018-10-22 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=108508

--- Comment #6 from Ahmed Elsayed  ---
Created attachment 142145
  --> https://bugs.freedesktop.org/attachment.cgi?id=142145=edit
Mafia III

It gets better and better :D

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/31] nir: Use a 1-bit data type for booleans

2018-10-22 Thread Ian Romanick
Patches 1 through 13 are

Reviewed-by: Ian Romanick 

The rest are going to take a bit more time and deep thought.

On 10/22/2018 03:13 PM, Jason Ekstrand wrote:
> This is something that Connor and I have talked about quite a bit over the
> last couple of months.  The core idea is to replace NIR's current 32-bit
> 0/-1 D3D10-style booleans with a 1-bit data type.  All in all, I think it
> worked out pretty well though I really don't like the proliferation of
> 32-bit comparison opcodes we now have kicking around for i965.
> 
> Why?  No hardware really has a 1-bit type, right?  Well, sort of...  AMD
> actually uses 64-bit scalars for booleans with one bit per invocation.
> However, most hardware such as Intel uses some other larger value for
> booleans.  The real benefit of 1-bit booleans and requiring a lowering pass
> is that you can do somewhat custom lowering (like AMD wants) and your
> lowering pass can always tell in an instant if a value is a boolean based
> on the bit size.  As can be seen in the last patch, this makes it really
> easy to implement a bool -> float lowering pass for hardware that doesn't
> have real integers where NIR's current booleans are actually rather
> painful.
> 
> On Intel, the situation is a bit more complicated.  It's tempting to say
> that we have 32-bit D3D10 booleans.  However, they're not really D3D10
> booleans on gen4-5 because the top 31 bits are undefined garbage and, while
> iand, ior, ixor, and inot operations work, you have to iand with 1 at the
> last minute to clear off all that garbage.  Also, on all generations, a
> comparison of two N-bit values results in an N-bit boolean, not a 32-bit
> bool.  This has caused the Igalia folks no end of trouble as they've been
> working on native 8 and 16-bit support.  If, instead, we have a 1-bit bool
> with a lowering pass and we can lower to whatever we want, then we could
> lower to a set of comparison opcodes that return the same bit-size as they
> compare and it would match GEN hardware much better.
> 
> But what about performance?  Aren't there all sorts of neat tricks we can
> do with D3D10 booleans like b & 1.0f for b2f?  As it turns out, not really;
> that's about the only one.  There is some small advantage when optimizing
> shaders that come from D3D if your native representation of booleans
> matches that of D3D.  However, penultimate patch in this series adds a few
> small optimizations that get us to actually better than we were before.
> With the entire series, shader-db on Kaby Lak looks like this:
> 
> total instructions in shared programs: 15084098 -> 14988578 (-0.63%)
> instructions in affected programs: 1321114 -> 1225594 (-7.23%)
> helped: 2340
> HURT: 23
> 
> total cycles in shared programs: 369790134 -> 359798399 (-2.70%)
> cycles in affected programs: 134085452 -> 124093717 (-7.45%)
> helped: 2149
> HURT: 720
> 
> total loops in shared programs: 4393 -> 4393 (0.00%)
> loops in affected programs: 0 -> 0
> helped: 0
> HURT: 0
> 
> total spills in shared programs: 10158 -> 10051 (-1.05%)
> spills in affected programs: 1429 -> 1322 (-7.49%)
> helped: 8
> HURT: 15
> 
> total fills in shared programs: 22105 -> 21720 (-1.74%)
> fills in affected programs: 2853 -> 2468 (-13.49%)
> helped: 12
> HURT: 15
> 
> How about ease of use?  Are they a pain to deal with?  Yes, adding support
> for 1-bit types was a bit awkward in a few places but most of it was
> dealing with all the places where we have 32-bit booleans baked into
> assumptions.  Getting rid of that baking in solves the problem and also
> makes the whole IR more future-proof.
> 
> All in all, I'd say I'm pretty happy with it.  However, I'd like other
> people (particularly the AMD folks) to play with it a bit and verify that
> it solves their problems as well.  Also, I added a lowering pass and tried
> to turn it on in everyone's driver but may not have put it in the right
> spot.  Please double-check my work.  For those wishing to take a look, you
> can also find the entire series on my gitlab here:
> 
> https://gitlab.freedesktop.org/jekstrand/mesa/commits/review/nir-1-bit-bool
> 
> Please review!
> 
> --Jason
> 
> Cc: Connor Abbott 
> Cc: Timothy Arceri 
> Cc: Eric Anholt 
> Cc: Rob Clark 
> Cc: Karol Herbst 
> Cc: Bas Nieuwenhuizen 
> Cc: Alyssa Rosenzweig 
> 
> 
> Jason Ekstrand (31):
>   nir/validate: Print when the validation failed
>   nir/constant_folding: Add an unreachable to a switch
>   nir/constant_folding: Use nir_src_as_bool for discard_if
>   nir/builder: Add a nir_imm_true/false helpers
>   nir/builder: Handle 16-bit floats in nir_imm_floatN_t
>   nir/search: Use nir_builder
>   nir/opt_if: Rework condition propagation
>   nir/system_values: Use the bit size from the load_deref
>   glsl/nir: Use i2b instead of ine for fixing UBO/SSBO booleans
>   nir/prog: Use nir_bany in kill handling
>   spirv: Use the right bit-size for spec constant ops
>   spirv: 

Re: [Mesa-dev] [PATCH 15/31] nir: Rename boolean-related opcodes to include 32 in the name

2018-10-22 Thread Ian Romanick
On 10/22/2018 03:13 PM, Jason Ekstrand wrote:
> ---
>  src/compiler/nir/nir_lower_alu_to_scalar.c |  8 ++--
>  src/compiler/nir/nir_opcodes.py| 46 +++---
>  src/compiler/nir/nir_opcodes_c.py  |  8 ++--
>  3 files changed, 31 insertions(+), 31 deletions(-)
> 
> diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c 
> b/src/compiler/nir/nir_lower_alu_to_scalar.c
> index 0be3aba9456..e424dff25c4 100644
> --- a/src/compiler/nir/nir_lower_alu_to_scalar.c
> +++ b/src/compiler/nir/nir_lower_alu_to_scalar.c
> @@ -197,10 +197,10 @@ lower_alu_instr_scalar(nir_alu_instr *instr, 
> nir_builder *b)
>return false;
>  
>LOWER_REDUCTION(nir_op_fdot, nir_op_fmul, nir_op_fadd);
> -  LOWER_REDUCTION(nir_op_ball_fequal, nir_op_feq, nir_op_iand);
> -  LOWER_REDUCTION(nir_op_ball_iequal, nir_op_ieq, nir_op_iand);
> -  LOWER_REDUCTION(nir_op_bany_fnequal, nir_op_fne, nir_op_ior);
> -  LOWER_REDUCTION(nir_op_bany_inequal, nir_op_ine, nir_op_ior);
> +  LOWER_REDUCTION(nir_op_b32all_fequal, nir_op_feq32, nir_op_iand);
> +  LOWER_REDUCTION(nir_op_b32all_iequal, nir_op_ieq32, nir_op_iand);
> +  LOWER_REDUCTION(nir_op_b32any_fnequal, nir_op_fne32, nir_op_ior);
> +  LOWER_REDUCTION(nir_op_b32any_inequal, nir_op_ine32, nir_op_ior);
>LOWER_REDUCTION(nir_op_fall_equal, nir_op_seq, nir_op_fand);
>LOWER_REDUCTION(nir_op_fany_nequal, nir_op_sne, nir_op_for);
>  
> diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
> index 4ef4ecc6f22..d349f74ed2a 100644
> --- a/src/compiler/nir/nir_opcodes.py
> +++ b/src/compiler/nir/nir_opcodes.py
> @@ -89,7 +89,7 @@ class Opcode(object):
>  # helper variables for strings
>  tfloat = "float"
>  tint = "int"
> -tbool = "bool32"
> +tbool32 = "bool32"
>  tuint = "uint"
>  tuint16 = "uint16"
>  tfloat32 = "float32"
> @@ -192,10 +192,10 @@ for src_t in [tint, tuint, tfloat]:
>  
>  # We'll hand-code the to/from bool conversion opcodes.  Because bool doesn't
>  # have multiple bit-sizes, we can always infer the size from the other type.
> -unop_convert("f2b", tbool, tfloat, "src0 != 0.0")
> -unop_convert("i2b", tbool, tint, "src0 != 0")
> -unop_convert("b2f", tfloat, tbool, "src0 ? 1.0 : 0.0")
> -unop_convert("b2i", tint, tbool, "src0 ? 1 : 0")
> +unop_convert("f2b32", tbool32, tfloat, "src0 != 0.0")
> +unop_convert("i2b32", tbool32, tint, "src0 != 0")
> +unop_convert("b322f", tfloat, tbool32, "src0 ? 1.0 : 0.0")
> +unop_convert("b322i", tint, tbool32, "src0 ? 1 : 0")

Yeee-uck.  b332i is hard to type and hard for my brain to parse.  None
of the other *2* opcodes have the size on the left side of the 2.  The
all infer the size from the source operand.

I'd really like the sized Boolean types to work like the other sized
types.  I doubt we'll have have a using for a uint1 type, but I've been
thinking about adding a bool8 or bool16 type for some time.  There are
quite a few cases where we carry Boolean values for a long, long time in
some of the very large shaders.  Those same shaders also often face
register pressure.  Being able to store those long lived values in a
smaller type could be very helpful.

>  
>  
>  # Unary floating-point rounding operations.
> @@ -405,8 +405,8 @@ def binop_convert(name, out_type, in_type, alg_props, 
> const_expr):
>  def binop(name, ty, alg_props, const_expr):
> binop_convert(name, ty, ty, alg_props, const_expr)
>  
> -def binop_compare(name, ty, alg_props, const_expr):
> -   binop_convert(name, tbool, ty, alg_props, const_expr)
> +def binop_compare32(name, ty, alg_props, const_expr):
> +   binop_convert(name, tbool32, ty, alg_props, const_expr)
>  
>  def binop_horiz(name, out_size, out_type, src1_size, src1_type, src2_size,
>  src2_type, const_expr):
> @@ -488,26 +488,26 @@ binop("frem", tfloat, "", "src0 - src1 * truncf(src0 / 
> src1)")
>  
>  # these integer-aware comparisons return a boolean (0 or ~0)
>  
> -binop_compare("flt", tfloat, "", "src0 < src1")
> -binop_compare("fge", tfloat, "", "src0 >= src1")
> -binop_compare("feq", tfloat, commutative, "src0 == src1")
> -binop_compare("fne", tfloat, commutative, "src0 != src1")
> -binop_compare("ilt", tint, "", "src0 < src1")
> -binop_compare("ige", tint, "", "src0 >= src1")
> -binop_compare("ieq", tint, commutative, "src0 == src1")
> -binop_compare("ine", tint, commutative, "src0 != src1")
> -binop_compare("ult", tuint, "", "src0 < src1")
> -binop_compare("uge", tuint, "", "src0 >= src1")
> +binop_compare32("flt32", tfloat, "", "src0 < src1")
> +binop_compare32("fge32", tfloat, "", "src0 >= src1")
> +binop_compare32("feq32", tfloat, commutative, "src0 == src1")
> +binop_compare32("fne32", tfloat, commutative, "src0 != src1")
> +binop_compare32("ilt32", tint, "", "src0 < src1")
> +binop_compare32("ige32", tint, "", "src0 >= src1")
> +binop_compare32("ieq32", tint, commutative, "src0 == src1")
> +binop_compare32("ine32", tint, commutative, "src0 != src1")
> 

Re: [Mesa-dev] [PATCH 06/31] nir/search: Use nir_builder

2018-10-22 Thread Jason Ekstrand
On Mon, Oct 22, 2018 at 5:50 PM Ian Romanick  wrote:

> I think a better description is "nir/search: Use nir_imm_* helpers".
> Using nir_builder is just a prerequisite to doing that.
>

Good call.  The new commit message is:

nir/search: Use the nir_imm_* helpers from nir_builder

This requires that we rework the interface a bit to use nir_builder but
that's a nice little modernization anyway.



> On 10/22/2018 03:13 PM, Jason Ekstrand wrote:
> > ---
> >  src/compiler/nir/nir_algebraic.py |  14 ++--
> >  src/compiler/nir/nir_search.c | 111 --
> >  src/compiler/nir/nir_search.h |   9 ++-
> >  3 files changed, 43 insertions(+), 91 deletions(-)
> >
> > diff --git a/src/compiler/nir/nir_algebraic.py
> b/src/compiler/nir/nir_algebraic.py
> > index 4134d496030..d2374d3216a 100644
> > --- a/src/compiler/nir/nir_algebraic.py
> > +++ b/src/compiler/nir/nir_algebraic.py
> > @@ -563,6 +563,7 @@ class SearchAndReplace(object):
> >
> >  _algebraic_pass_template = mako.template.Template("""
> >  #include "nir.h"
> > +#include "nir_builder.h"
> >  #include "nir_search.h"
> >  #include "nir_search_helpers.h"
> >
> > @@ -591,8 +592,8 @@ static const struct transform
> ${pass_name}_${opcode}_xforms[] = {
> >  % endfor
> >
> >  static bool
> > -${pass_name}_block(nir_block *block, const bool *condition_flags,
> > -   void *mem_ctx)
> > +${pass_name}_block(nir_builder *build, nir_block *block,
> > +   const bool *condition_flags)
> >  {
> > bool progress = false;
> >
> > @@ -610,8 +611,7 @@ ${pass_name}_block(nir_block *block, const bool
> *condition_flags,
> >   for (unsigned i = 0; i <
> ARRAY_SIZE(${pass_name}_${opcode}_xforms); i++) {
> >  const struct transform *xform =
> &${pass_name}_${opcode}_xforms[i];
> >  if (condition_flags[xform->condition_offset] &&
> > -nir_replace_instr(alu, xform->search, xform->replace,
> > -  mem_ctx)) {
> > +nir_replace_instr(build, alu, xform->search,
> xform->replace)) {
> > progress = true;
> > break;
> >  }
> > @@ -629,11 +629,13 @@ ${pass_name}_block(nir_block *block, const bool
> *condition_flags,
> >  static bool
> >  ${pass_name}_impl(nir_function_impl *impl, const bool *condition_flags)
> >  {
> > -   void *mem_ctx = ralloc_parent(impl);
> > bool progress = false;
> >
> > +   nir_builder build;
> > +   nir_builder_init(, impl);
> > +
> > nir_foreach_block_reverse(block, impl) {
> > -  progress |= ${pass_name}_block(block, condition_flags, mem_ctx);
> > +  progress |= ${pass_name}_block(, block, condition_flags);
> > }
> >
> > if (progress)
> > diff --git a/src/compiler/nir/nir_search.c
> b/src/compiler/nir/nir_search.c
> > index 1686b6bd0de..0270302fd3d 100644
> > --- a/src/compiler/nir/nir_search.c
> > +++ b/src/compiler/nir/nir_search.c
> > @@ -27,6 +27,7 @@
> >
> >  #include 
> >  #include "nir_search.h"
> > +#include "nir_builder.h"
> >  #include "util/half_float.h"
> >
> >  struct match_state {
> > @@ -408,10 +409,11 @@ bitsize_tree_filter_down(bitsize_tree *tree,
> unsigned size)
> >  }
> >
> >  static nir_alu_src
> > -construct_value(const nir_search_value *value,
> > +construct_value(nir_builder *build,
> > +const nir_search_value *value,
> >  unsigned num_components, bitsize_tree *bitsize,
> >  struct match_state *state,
> > -nir_instr *instr, void *mem_ctx)
> > +nir_instr *instr)
> >  {
> > switch (value->type) {
> > case nir_search_value_expression: {
> > @@ -420,7 +422,7 @@ construct_value(const nir_search_value *value,
> >if (nir_op_infos[expr->opcode].output_size != 0)
> >   num_components = nir_op_infos[expr->opcode].output_size;
> >
> > -  nir_alu_instr *alu = nir_alu_instr_create(mem_ctx, expr->opcode);
> > +  nir_alu_instr *alu = nir_alu_instr_create(build->shader,
> expr->opcode);
> >nir_ssa_dest_init(>instr, >dest.dest, num_components,
> >  bitsize->dest_size, NULL);
> >alu->dest.write_mask = (1 << num_components) - 1;
> > @@ -440,12 +442,12 @@ construct_value(const nir_search_value *value,
> >   if (nir_op_infos[alu->op].input_sizes[i] != 0)
> >  num_components = nir_op_infos[alu->op].input_sizes[i];
> >
> > - alu->src[i] = construct_value(expr->srcs[i],
> > + alu->src[i] = construct_value(build, expr->srcs[i],
> > num_components, bitsize->srcs[i],
> > -   state, instr, mem_ctx);
> > +   state, instr);
> >}
> >
> > -  nir_instr_insert_before(instr, >instr);
> > +  nir_builder_instr_insert(build, >instr);
> >
> >nir_alu_src val;
> >val.src = nir_src_for_ssa(>dest.dest.ssa);
> > @@ -461,8 

Re: [Mesa-dev] [PATCH 06/31] nir/search: Use nir_builder

2018-10-22 Thread Ian Romanick
I think a better description is "nir/search: Use nir_imm_* helpers".
Using nir_builder is just a prerequisite to doing that.

On 10/22/2018 03:13 PM, Jason Ekstrand wrote:
> ---
>  src/compiler/nir/nir_algebraic.py |  14 ++--
>  src/compiler/nir/nir_search.c | 111 --
>  src/compiler/nir/nir_search.h |   9 ++-
>  3 files changed, 43 insertions(+), 91 deletions(-)
> 
> diff --git a/src/compiler/nir/nir_algebraic.py 
> b/src/compiler/nir/nir_algebraic.py
> index 4134d496030..d2374d3216a 100644
> --- a/src/compiler/nir/nir_algebraic.py
> +++ b/src/compiler/nir/nir_algebraic.py
> @@ -563,6 +563,7 @@ class SearchAndReplace(object):
>  
>  _algebraic_pass_template = mako.template.Template("""
>  #include "nir.h"
> +#include "nir_builder.h"
>  #include "nir_search.h"
>  #include "nir_search_helpers.h"
>  
> @@ -591,8 +592,8 @@ static const struct transform 
> ${pass_name}_${opcode}_xforms[] = {
>  % endfor
>  
>  static bool
> -${pass_name}_block(nir_block *block, const bool *condition_flags,
> -   void *mem_ctx)
> +${pass_name}_block(nir_builder *build, nir_block *block,
> +   const bool *condition_flags)
>  {
> bool progress = false;
>  
> @@ -610,8 +611,7 @@ ${pass_name}_block(nir_block *block, const bool 
> *condition_flags,
>   for (unsigned i = 0; i < ARRAY_SIZE(${pass_name}_${opcode}_xforms); 
> i++) {
>  const struct transform *xform = 
> &${pass_name}_${opcode}_xforms[i];
>  if (condition_flags[xform->condition_offset] &&
> -nir_replace_instr(alu, xform->search, xform->replace,
> -  mem_ctx)) {
> +nir_replace_instr(build, alu, xform->search, 
> xform->replace)) {
> progress = true;
> break;
>  }
> @@ -629,11 +629,13 @@ ${pass_name}_block(nir_block *block, const bool 
> *condition_flags,
>  static bool
>  ${pass_name}_impl(nir_function_impl *impl, const bool *condition_flags)
>  {
> -   void *mem_ctx = ralloc_parent(impl);
> bool progress = false;
>  
> +   nir_builder build;
> +   nir_builder_init(, impl);
> +
> nir_foreach_block_reverse(block, impl) {
> -  progress |= ${pass_name}_block(block, condition_flags, mem_ctx);
> +  progress |= ${pass_name}_block(, block, condition_flags);
> }
>  
> if (progress)
> diff --git a/src/compiler/nir/nir_search.c b/src/compiler/nir/nir_search.c
> index 1686b6bd0de..0270302fd3d 100644
> --- a/src/compiler/nir/nir_search.c
> +++ b/src/compiler/nir/nir_search.c
> @@ -27,6 +27,7 @@
>  
>  #include 
>  #include "nir_search.h"
> +#include "nir_builder.h"
>  #include "util/half_float.h"
>  
>  struct match_state {
> @@ -408,10 +409,11 @@ bitsize_tree_filter_down(bitsize_tree *tree, unsigned 
> size)
>  }
>  
>  static nir_alu_src
> -construct_value(const nir_search_value *value,
> +construct_value(nir_builder *build,
> +const nir_search_value *value,
>  unsigned num_components, bitsize_tree *bitsize,
>  struct match_state *state,
> -nir_instr *instr, void *mem_ctx)
> +nir_instr *instr)
>  {
> switch (value->type) {
> case nir_search_value_expression: {
> @@ -420,7 +422,7 @@ construct_value(const nir_search_value *value,
>if (nir_op_infos[expr->opcode].output_size != 0)
>   num_components = nir_op_infos[expr->opcode].output_size;
>  
> -  nir_alu_instr *alu = nir_alu_instr_create(mem_ctx, expr->opcode);
> +  nir_alu_instr *alu = nir_alu_instr_create(build->shader, expr->opcode);
>nir_ssa_dest_init(>instr, >dest.dest, num_components,
>  bitsize->dest_size, NULL);
>alu->dest.write_mask = (1 << num_components) - 1;
> @@ -440,12 +442,12 @@ construct_value(const nir_search_value *value,
>   if (nir_op_infos[alu->op].input_sizes[i] != 0)
>  num_components = nir_op_infos[alu->op].input_sizes[i];
>  
> - alu->src[i] = construct_value(expr->srcs[i],
> + alu->src[i] = construct_value(build, expr->srcs[i],
> num_components, bitsize->srcs[i],
> -   state, instr, mem_ctx);
> +   state, instr);
>}
>  
> -  nir_instr_insert_before(instr, >instr);
> +  nir_builder_instr_insert(build, >instr);
>  
>nir_alu_src val;
>val.src = nir_src_for_ssa(>dest.dest.ssa);
> @@ -461,8 +463,8 @@ construct_value(const nir_search_value *value,
>assert(state->variables_seen & (1 << var->variable));
>  
>nir_alu_src val = { NIR_SRC_INIT };
> -  nir_alu_src_copy(, >variables[var->variable], mem_ctx);
> -
> +  nir_alu_src_copy(, >variables[var->variable],
> +   (void *)build->shader);
>assert(!var->is_constant);
>  
>return val;
> @@ -470,79 +472,27 @@ construct_value(const nir_search_value 

Re: [Mesa-dev] [PATCH] intel/compiler: Print floating point values upto precision 8

2018-10-22 Thread Matt Turner
On Mon, Oct 22, 2018 at 2:14 PM Sagar Ghuge  wrote:
>
>
>
> On 10/22/18 10:34 AM, Matt Turner wrote:
> > On Fri, Oct 5, 2018 at 11:15 AM Sagar Ghuge  wrote:
> >>
> >> avoid misinterpretation of encoded immediate values in instruction and
> >> disassembled output.
> >>
> >> Signed-off-by: Sagar Ghuge 
> >> ---
> >> While encoding the immediate floating point values in instruction we use
> >> values upto precision 8, but while disassembling, we print precision to
> >> 6 places, which round up the value and gives wrong interpretation for
> >> encoded immediate constant. Printing it upto precision 8 will help in
> >> reassembling it again.
> >
> > Let's put that in the commit message.
> >
> >>  src/intel/compiler/brw_disasm.c | 2 +-
> >>  1 file changed, 1 insertion(+), 1 deletion(-)
> >>
> >> diff --git a/src/intel/compiler/brw_disasm.c 
> >> b/src/intel/compiler/brw_disasm.c
> >> index 322f4544df..7cbbc080b3 100644
> >> --- a/src/intel/compiler/brw_disasm.c
> >> +++ b/src/intel/compiler/brw_disasm.c
> >> @@ -1293,7 +1293,7 @@ imm(FILE *file, const struct gen_device_info 
> >> *devinfo, enum brw_reg_type type,
> >>format(file, "0x%08xV", brw_inst_imm_ud(devinfo, inst));
> >>break;
> >> case BRW_REGISTER_TYPE_F:
> >> -  format(file, "%-gF", brw_inst_imm_f(devinfo, inst));
> >> +  format(file, "%.8fF", brw_inst_imm_f(devinfo, inst));
> >
> > I'm not sure 8 digits is sufficient to get an exact representation
> > that the assembler can "round-trip". This page [1] indicates that 9
> > digits are necessary for binary->decimal->binary round-trips.
> >
> I was also not sure about it, [1] article is nice.
>
> > NIR takes another approach:
> >
> > vec1 32 ssa_0 = load_const (0x3f80 /* 1.00 */)
> >
> > What do you think about printing the binary representation and the
> > floating-point value? That way humans can easily read one number and
> > the assembler can easily read the other :)
> >
> I think we can just print F and DF to 9 and 17 precision respectively to avoid
> output alignment issue.

Ken pointed out to me that this wouldn't allow us to round-trip
different variations of NaN :(

So I think we have to take the NIR approach.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 31/31] nir: Add a bool to float32 lowering pass

2018-10-22 Thread Jason Ekstrand
This should be useful for drivers that don't support real integers.

Cc: Alyssa Rosenzweig 
---
 src/compiler/Makefile.sources  |   1 +
 src/compiler/nir/meson.build   |   1 +
 src/compiler/nir/nir_lower_bool_to_float.c | 181 +
 3 files changed, 183 insertions(+)
 create mode 100644 src/compiler/nir/nir_lower_bool_to_float.c

diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index 8f65f974ab8..2ff12ff43cb 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -230,6 +230,7 @@ NIR_FILES = \
nir/nir_lower_atomics_to_ssbo.c \
nir/nir_lower_bitmap.c \
nir/nir_lower_bit_size.c \
+   nir/nir_lower_bool_to_float.c \
nir/nir_lower_bool_to_int32.c \
nir/nir_lower_clamp_color_outputs.c \
nir/nir_lower_clip.c \
diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
index 5809551c9d4..f715668a03b 100644
--- a/src/compiler/nir/meson.build
+++ b/src/compiler/nir/meson.build
@@ -113,6 +113,7 @@ files_libnir = files(
   'nir_lower_alpha_test.c',
   'nir_lower_atomics_to_ssbo.c',
   'nir_lower_bitmap.c',
+  'nir_lower_bool_to_float.c',
   'nir_lower_bool_to_int32.c',
   'nir_lower_clamp_color_outputs.c',
   'nir_lower_clip.c',
diff --git a/src/compiler/nir/nir_lower_bool_to_float.c 
b/src/compiler/nir/nir_lower_bool_to_float.c
new file mode 100644
index 000..7aa5efb5a2f
--- /dev/null
+++ b/src/compiler/nir/nir_lower_bool_to_float.c
@@ -0,0 +1,181 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+static bool
+assert_ssa_def_is_not_1bit(nir_ssa_def *def, UNUSED void *unused)
+{
+   assert(def->bit_size > 1);
+   return true;
+}
+
+static bool
+rewrite_1bit_ssa_def_to_32bit(nir_ssa_def *def, void *_progress)
+{
+   bool *progress = _progress;
+   if (def->bit_size == 1) {
+  def->bit_size = 32;
+  *progress = true;
+   }
+   return true;
+}
+
+static bool
+lower_alu_instr(nir_builder *b, nir_alu_instr *alu)
+{
+   const nir_op_info *op_info = _op_infos[alu->op];
+
+   b->cursor = nir_before_instr(>instr);
+
+   /* Replacement SSA value */
+   nir_ssa_def *rep = NULL;
+   switch (alu->op) {
+   case nir_op_b2f: alu->op = nir_op_fmov; break;
+   case nir_op_b2i: alu->op = nir_op_fmov; break;
+   case nir_op_f2b:
+   case nir_op_i2b:
+  rep = nir_sne(b, nir_ssa_for_alu_src(b, alu, 0),
+   nir_imm_float(b, 0));
+  break;
+
+   case nir_op_flt: alu->op = nir_op_slt; break;
+   case nir_op_fge: alu->op = nir_op_sge; break;
+   case nir_op_feq: alu->op = nir_op_seq; break;
+   case nir_op_fne: alu->op = nir_op_sne; break;
+   case nir_op_ilt: alu->op = nir_op_slt; break;
+   case nir_op_ige: alu->op = nir_op_sge; break;
+   case nir_op_ieq: alu->op = nir_op_seq; break;
+   case nir_op_ine: alu->op = nir_op_sne; break;
+   case nir_op_ult: alu->op = nir_op_slt; break;
+   case nir_op_uge: alu->op = nir_op_sge; break;
+
+   case nir_op_ball_fequal2:  alu->op = nir_op_fall_equal2; break;
+   case nir_op_ball_fequal3:  alu->op = nir_op_fall_equal3; break;
+   case nir_op_ball_fequal4:  alu->op = nir_op_fall_equal4; break;
+   case nir_op_bany_fnequal2: alu->op = nir_op_fany_nequal2; break;
+   case nir_op_bany_fnequal3: alu->op = nir_op_fany_nequal3; break;
+   case nir_op_bany_fnequal4: alu->op = nir_op_fany_nequal4; break;
+   case nir_op_ball_iequal2:  alu->op = nir_op_fall_equal2; break;
+   case nir_op_ball_iequal3:  alu->op = nir_op_fall_equal3; break;
+   case nir_op_ball_iequal4:  alu->op = nir_op_fall_equal4; break;
+   case nir_op_bany_inequal2: alu->op = nir_op_fany_nequal2; break;
+   case nir_op_bany_inequal3: alu->op = nir_op_fany_nequal3; break;
+   case nir_op_bany_inequal4: alu->op = nir_op_fany_nequal4; break;
+
+   case nir_op_bcsel: alu->op = 

[Mesa-dev] [PATCH 30/31] nir/algebraic: Add some optimizations for D3D-style booleans

2018-10-22 Thread Jason Ekstrand
D3D booleans use a 32-bit 0/-1 representation.  Because this previously
matched NIR exactly, we didn't have to really optimize for it.  Now that
we have 1-bit booleans, we need some specific optimizations to chew
through the D3D12-style booleans.
---
 src/compiler/nir/nir_opt_algebraic.py | 13 +
 1 file changed, 13 insertions(+)

diff --git a/src/compiler/nir/nir_opt_algebraic.py 
b/src/compiler/nir/nir_opt_algebraic.py
index f0861c4411d..4d778e4b308 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -239,6 +239,7 @@ optimizations = [
(('fne', ('b2f', a), 0.0), a),
(('ieq', ('b2i', a), 0),   ('inot', a)),
(('ine', ('b2i', a), 0),   a),
+   (('ine', ('ineg', ('b2i', a)), 0), a),
 
(('fne', ('u2f32', a), 0.0), ('ine', a, 0)),
(('feq', ('u2f32', a), 0.0), ('ieq', a, 0)),
@@ -528,6 +529,18 @@ optimizations = [
(('bcsel', a, b, b), b),
(('fcsel', a, b, b), b),
 
+   # D3D Boolean eumulation
+   (('bcsel', a, -1, 0), ('ineg', ('b2i', a))),
+   (('bcsel', a, 0, -1), ('ineg', ('b2i', ('inot', a,
+   (('iand', ('ineg', ('b2i', a)), ('ineg', ('b2i', b))),
+('ineg', ('b2i', ('iand', a, b,
+   (('ior', ('ineg', ('b2i', a)), ('ineg', ('b2i', b))),
+('ineg', ('b2i', ('ior', a, b,
+   (('ieq', ('ineg', ('b2i', a)), 0), ('inot', a)),
+   (('ieq', ('ineg', ('b2i', a)), -1), a),
+   (('ine', ('ineg', ('b2i', a)), 0), a),
+   (('ine', ('ineg', ('b2i', a)), -1), ('inot', a)),
+
# Conversions
(('i2b', ('b2i', a)), a),
(('f2i32', ('ftrunc', a)), ('f2i32', a)),
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 29/31] nir/algebraic: Add back lower_b2f and a b2f optimization

2018-10-22 Thread Jason Ekstrand
---
 src/compiler/nir/nir_opt_algebraic.py | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/compiler/nir/nir_opt_algebraic.py 
b/src/compiler/nir/nir_opt_algebraic.py
index 6c767501a51..f0861c4411d 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -436,6 +436,7 @@ optimizations = [
(('imul', ('b2i', a), ('b2i', b)), ('b2i', ('iand', a, b))),
(('fmul', ('b2f', a), ('b2f', b)), ('b2f', ('iand', a, b))),
(('fsat', ('fadd', ('b2f', a), ('b2f', b))), ('b2f', ('ior', a, b))),
+   (('iand', ('ineg', ('b2i', a)), '1.0@32'), ('b2f', a), 
'!options->lower_b2f'),
# True/False are ~0 and 0 in NIR.  b2i of True is 1, and -1 is ~0 (True).
(('flt', ('fneg', ('b2f', a)), 0), a), # Generated by TGSI KILL_IF.
(('flt', ('fsub', 0.0, ('b2f', a)), 0), a), # Generated by TGSI KILL_IF.
@@ -914,6 +915,9 @@ late_optimizations = [
# we do these late so that we don't get in the way of creating ffmas
(('fmin', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)', '#c', 
b)), ('fadd', c, ('fmin', a, b))),
(('fmax', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)', '#c', 
b)), ('fadd', c, ('fmax', a, b))),
+
+   # Lowered for backends without a dedicated b2f instruction
+   (('b2f@32', a), ('bcsel', a, 1.0, 0.0), 'options->lower_b2f'),
 ]
 
 print(nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render())
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 28/31] nir/algebraic: Optimize 1-bit booleans

2018-10-22 Thread Jason Ekstrand
---
 src/compiler/nir/nir_algebraic.py | 43 +++
 src/compiler/nir/nir_opt_algebraic.py |  1 -
 2 files changed, 4 insertions(+), 40 deletions(-)

diff --git a/src/compiler/nir/nir_algebraic.py 
b/src/compiler/nir/nir_algebraic.py
index 9d187ca36d7..b880aa0dc66 100644
--- a/src/compiler/nir/nir_algebraic.py
+++ b/src/compiler/nir/nir_algebraic.py
@@ -146,8 +146,8 @@ class Constant(Value):
  self.bit_size = 0
 
   if isinstance(self.value, bool):
- assert self.bit_size == 0 or self.bit_size == 32
- self.bit_size = 32
+ assert self.bit_size == 0 or self.bit_size == 1
+ self.bit_size = 1
 
def hex(self):
   if isinstance(self.value, (bool)):
@@ -194,8 +194,8 @@ class Variable(Value):
   self.bit_size = int(m.group('bits')) if m.group('bits') else 0
 
   if self.required_type == 'bool':
- assert self.bit_size == 0 or self.bit_size == 32
- self.bit_size = 32
+ assert self.bit_size == 0 or self.bit_size == 1
+ self.bit_size = 1
 
   if self.required_type is not None:
  assert self.required_type in ('float', 'bool', 'int', 'uint')
@@ -216,39 +216,6 @@ class Variable(Value):
 _opcode_re = re.compile(r"(?P~)?(?P\w+)(?:@(?P\d+))?"
 r"(?P\([^\)]+\))?")
 
-opcode_remap = {
-   'b2i' : 'b322i',
-   'b2f' : 'b322f',
-   'i2b' : 'i2b32',
-   'f2b' : 'f2b32',
-
-   'flt' : 'flt32',
-   'fge' : 'fge32',
-   'feq' : 'feq32',
-   'fne' : 'fne32',
-   'ilt' : 'ilt32',
-   'ige' : 'ige32',
-   'ieq' : 'ieq32',
-   'ine' : 'ine32',
-   'ult' : 'ult32',
-   'uge' : 'uge32',
-
-   'ball_iequal2' : 'b32all_iequal2',
-   'ball_iequal3' : 'b32all_iequal3',
-   'ball_iequal4' : 'b32all_iequal4',
-   'bany_inequal2' : 'b32any_inequal2',
-   'bany_inequal3' : 'b32any_inequal3',
-   'bany_inequal4' : 'b32any_inequal4',
-   'ball_fequal2' : 'b32all_fequal2',
-   'ball_fequal3' : 'b32all_fequal3',
-   'ball_fequal4' : 'b32all_fequal4',
-   'bany_fnequal2' : 'b32any_fnequal2',
-   'bany_fnequal3' : 'b32any_fnequal3',
-   'bany_fnequal4' : 'b32any_fnequal4',
-
-   'bcsel' : 'b32csel',
-}
-
 class Expression(Value):
def __init__(self, expr, name_base, varset):
   Value.__init__(self, expr, name_base, "expression")
@@ -258,8 +225,6 @@ class Expression(Value):
   assert m and m.group('opcode') is not None
 
   self.opcode = m.group('opcode')
-  if self.opcode in opcode_remap:
- self.opcode = opcode_remap[self.opcode]
   self.bit_size = int(m.group('bits')) if m.group('bits') else 0
   self.inexact = m.group('inexact') is not None
   self.cond = m.group('cond')
diff --git a/src/compiler/nir/nir_opt_algebraic.py 
b/src/compiler/nir/nir_opt_algebraic.py
index 5a4e78e8e0e..6c767501a51 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -529,7 +529,6 @@ optimizations = [
 
# Conversions
(('i2b', ('b2i', a)), a),
-   (('i2b', 'a@bool'), a),
(('f2i32', ('ftrunc', a)), ('f2i32', a)),
(('f2u32', ('ftrunc', a)), ('f2u32', a)),
(('i2b', ('ineg', a)), ('i2b', a)),
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 25/31] FIXUP: Revert "Use 32-bit opcodes in the NIR producers and optimizations"

2018-10-22 Thread Jason Ekstrand
---
 src/compiler/nir/nir.h | 24 -
 src/compiler/nir/nir_loop_analyze.c| 28 +-
 src/compiler/nir/nir_opt_if.c  |  2 +-
 src/compiler/nir/nir_opt_peephole_select.c |  2 +-
 src/compiler/nir/nir_opt_undef.c   |  2 +-
 src/compiler/spirv/vtn_alu.c   | 62 +++---
 6 files changed, 60 insertions(+), 60 deletions(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 51775e5e18c..4f4a940cbbb 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1513,18 +1513,18 @@ static inline bool
 nir_alu_instr_is_comparison(const nir_alu_instr *instr)
 {
switch (instr->op) {
-   case nir_op_flt32:
-   case nir_op_fge32:
-   case nir_op_feq32:
-   case nir_op_fne32:
-   case nir_op_ilt32:
-   case nir_op_ult32:
-   case nir_op_ige32:
-   case nir_op_uge32:
-   case nir_op_ieq32:
-   case nir_op_ine32:
-   case nir_op_i2b32:
-   case nir_op_f2b32:
+   case nir_op_flt:
+   case nir_op_fge:
+   case nir_op_feq:
+   case nir_op_fne:
+   case nir_op_ilt:
+   case nir_op_ult:
+   case nir_op_ige:
+   case nir_op_uge:
+   case nir_op_ieq:
+   case nir_op_ine:
+   case nir_op_i2b:
+   case nir_op_f2b:
case nir_op_inot:
case nir_op_fnot:
   return true;
diff --git a/src/compiler/nir/nir_loop_analyze.c 
b/src/compiler/nir/nir_loop_analyze.c
index 37409525bbb..9c3fd2f286f 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -357,26 +357,26 @@ get_iteration(nir_op cond_op, nir_const_value *initial, 
nir_const_value *step,
int32_t iter;
 
switch (cond_op) {
-   case nir_op_ige32:
-   case nir_op_ilt32:
-   case nir_op_ieq32:
-   case nir_op_ine32: {
+   case nir_op_ige:
+   case nir_op_ilt:
+   case nir_op_ieq:
+   case nir_op_ine: {
   int32_t initial_val = initial->i32[0];
   int32_t span = limit->i32[0] - initial_val;
   iter = span / step->i32[0];
   break;
}
-   case nir_op_uge32:
-   case nir_op_ult32: {
+   case nir_op_uge:
+   case nir_op_ult: {
   uint32_t initial_val = initial->u32[0];
   uint32_t span = limit->u32[0] - initial_val;
   iter = span / step->u32[0];
   break;
}
-   case nir_op_fge32:
-   case nir_op_flt32:
-   case nir_op_feq32:
-   case nir_op_fne32: {
+   case nir_op_fge:
+   case nir_op_flt:
+   case nir_op_feq:
+   case nir_op_fne: {
   float initial_val = initial->f32[0];
   float span = limit->f32[0] - initial_val;
   iter = span / step->f32[0];
@@ -547,10 +547,10 @@ find_trip_count(loop_info_state *state)
   bool limit_rhs = true;
 
   switch (alu->op) {
-  case nir_op_fge32:  case nir_op_ige32:  case nir_op_uge32:
-  case nir_op_flt32:  case nir_op_ilt32:  case nir_op_ult32:
-  case nir_op_feq32:  case nir_op_ieq32:
-  case nir_op_fne32:  case nir_op_ine32:
+  case nir_op_fge:  case nir_op_ige:  case nir_op_uge:
+  case nir_op_flt:  case nir_op_ilt:  case nir_op_ult:
+  case nir_op_feq:  case nir_op_ieq:
+  case nir_op_fne:  case nir_op_ine:
 
  /* We assume that the limit is the "right" operand */
  basic_ind = get_loop_var(alu->src[0].src.ssa, state);
diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c
index cdad06c1c48..60368a0259e 100644
--- a/src/compiler/nir/nir_opt_if.c
+++ b/src/compiler/nir/nir_opt_if.c
@@ -478,7 +478,7 @@ can_propagate_through_alu(nir_src *src)
(nir_instr_as_alu(src->parent_instr)->op == nir_op_ior ||
 nir_instr_as_alu(src->parent_instr)->op == nir_op_iand ||
 nir_instr_as_alu(src->parent_instr)->op == nir_op_inot ||
-nir_instr_as_alu(src->parent_instr)->op == nir_op_b322i))
+nir_instr_as_alu(src->parent_instr)->op == nir_op_b2i))
   return true;
 
return false;
diff --git a/src/compiler/nir/nir_opt_peephole_select.c 
b/src/compiler/nir/nir_opt_peephole_select.c
index 6308c8cab12..ad9d0abec03 100644
--- a/src/compiler/nir/nir_opt_peephole_select.c
+++ b/src/compiler/nir/nir_opt_peephole_select.c
@@ -205,7 +205,7 @@ nir_opt_peephole_select_block(nir_block *block, nir_shader 
*shader,
  break;
 
   nir_phi_instr *phi = nir_instr_as_phi(instr);
-  nir_alu_instr *sel = nir_alu_instr_create(shader, nir_op_b32csel);
+  nir_alu_instr *sel = nir_alu_instr_create(shader, nir_op_bcsel);
   nir_src_copy(>src[0].src, _stmt->condition, sel);
   /* Splat the condition to all channels */
   memset(sel->src[0].swizzle, 0, sizeof sel->src[0].swizzle);
diff --git a/src/compiler/nir/nir_opt_undef.c b/src/compiler/nir/nir_opt_undef.c
index 52c1d257e9f..c26158dab7e 100644
--- a/src/compiler/nir/nir_opt_undef.c
+++ b/src/compiler/nir/nir_opt_undef.c
@@ -38,7 +38,7 @@
 static bool
 opt_undef_csel(nir_alu_instr *instr)
 {
-   if (instr->op != nir_op_b32csel && instr->op != nir_op_fcsel)
+   if (instr->op != nir_op_bcsel && instr->op != nir_op_fcsel)
   return 

[Mesa-dev] [PATCH 26/31] FIXUP: Revert "nir/builder: Generate 32-bit bool opcodes transparently"

2018-10-22 Thread Jason Ekstrand
---
 src/compiler/nir/nir_builder_opcodes_h.py | 44 +--
 1 file changed, 1 insertion(+), 43 deletions(-)

diff --git a/src/compiler/nir/nir_builder_opcodes_h.py 
b/src/compiler/nir/nir_builder_opcodes_h.py
index a9de0e4ea47..e600093e9f6 100644
--- a/src/compiler/nir/nir_builder_opcodes_h.py
+++ b/src/compiler/nir/nir_builder_opcodes_h.py
@@ -27,41 +27,6 @@ template = """\
 #define _NIR_BUILDER_OPCODES_
 
 <%
-opcode_remap = {
-   'b2i' : 'b322i',
-   'b2f' : 'b322f',
-   'i2b' : 'i2b32',
-   'f2b' : 'f2b32',
-
-   'flt' : 'flt32',
-   'fge' : 'fge32',
-   'feq' : 'feq32',
-   'fne' : 'fne32',
-   'ilt' : 'ilt32',
-   'ige' : 'ige32',
-   'ieq' : 'ieq32',
-   'ine' : 'ine32',
-   'ult' : 'ult32',
-   'uge' : 'uge32',
-
-   'ball_iequal2' : 'b32all_iequal2',
-   'ball_iequal3' : 'b32all_iequal3',
-   'ball_iequal4' : 'b32all_iequal4',
-   'bany_inequal2' : 'b32any_inequal2',
-   'bany_inequal3' : 'b32any_inequal3',
-   'bany_inequal4' : 'b32any_inequal4',
-   'ball_fequal2' : 'b32all_fequal2',
-   'ball_fequal3' : 'b32all_fequal3',
-   'ball_fequal4' : 'b32all_fequal4',
-   'bany_fnequal2' : 'b32any_fnequal2',
-   'bany_fnequal3' : 'b32any_fnequal3',
-   'bany_fnequal4' : 'b32any_fnequal4',
-
-   'bcsel' : 'b32csel',
-}
-
-opcode_remap32 = { op32 : op for op, op32 in opcode_remap.items() }
-
 def src_decl_list(num_srcs):
return ', '.join('nir_ssa_def *src' + str(i) for i in range(num_srcs))
 
@@ -70,15 +35,8 @@ def src_list(num_srcs):
 %>
 
 % for name, opcode in sorted(opcodes.items()):
-  % if name in opcode_remap:
-<% continue %>
-  % elif name in opcode_remap32:
-<% builder_name = opcode_remap32[name] %>
-  % else:
-<% builder_name = name %>
-  % endif
 static inline nir_ssa_def *
-nir_${builder_name}(nir_builder *build, ${src_decl_list(opcode.num_inputs)})
+nir_${name}(nir_builder *build, ${src_decl_list(opcode.num_inputs)})
 {
return nir_build_alu(build, nir_op_${name}, ${src_list(opcode.num_inputs)});
 }
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 27/31] FIXUP: nir/builder: Generate 1-bit booleans in nir_build_imm_bool

2018-10-22 Thread Jason Ekstrand
---
 src/compiler/nir/nir_builder.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h
index eaba5cfe448..2f2fc49c448 100644
--- a/src/compiler/nir/nir_builder.h
+++ b/src/compiler/nir/nir_builder.h
@@ -212,9 +212,9 @@ nir_imm_bool(nir_builder *build, bool x)
nir_const_value v;
 
memset(, 0, sizeof(v));
-   v.u32[0] = x ? NIR_TRUE : NIR_FALSE;
+   v.b[0] = x;
 
-   return nir_build_imm(build, 1, 32, v);
+   return nir_build_imm(build, 1, 1, v);
 }
 
 static inline nir_ssa_def *
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 20/31] nir/constant_expressions: Rework boolean handling

2018-10-22 Thread Jason Ekstrand
This commit contains three related changes.  First, we define boolN_t
for N = 8, 16, and 64 and move the definition of boolN_vec to the loop
with the other vec definitions.  Second, there's no reason why we need
the != 0 on the source because that happens implicitly when it's
converted to bool.  Third, for destinations, we use a signed integer
type and just do -(int)bool_val which will give us the 0/-1 behavior we
want and neatly scales to all bit widths.
---
 src/compiler/nir/nir_constant_expressions.py | 34 
 1 file changed, 13 insertions(+), 21 deletions(-)

diff --git a/src/compiler/nir/nir_constant_expressions.py 
b/src/compiler/nir/nir_constant_expressions.py
index 118af9f7818..8f28571ee4d 100644
--- a/src/compiler/nir/nir_constant_expressions.py
+++ b/src/compiler/nir/nir_constant_expressions.py
@@ -39,14 +39,14 @@ def op_bit_sizes(op):
 return sorted(list(sizes)) if sizes is not None else None
 
 def get_const_field(type_):
-if type_ == "bool32":
-return "u32"
-elif type_ == "float16":
+if type_ == "float16":
 return "u16"
 else:
 m = type_split_re.match(type_)
 if not m:
 raise Exception(str(type_))
+if m.group('type') == 'bool':
+return 'i' + m.group('bits')
 return m.group('type')[0] + m.group('bits')
 
 template = """\
@@ -257,8 +257,11 @@ unpack_half_1x16(uint16_t u)
 typedef float float16_t;
 typedef float float32_t;
 typedef double float64_t;
+typedef bool bool8_t;
+typedef bool bool16_t;
 typedef bool bool32_t;
-% for type in ["float", "int", "uint"]:
+typedef bool bool64_t;
+% for type in ["float", "int", "uint", "bool"]:
 % for width in type_sizes(type):
 struct ${type}${width}_vec {
${type}${width}_t x;
@@ -269,13 +272,6 @@ struct ${type}${width}_vec {
 % endfor
 % endfor
 
-struct bool32_vec {
-bool x;
-bool y;
-bool z;
-bool w;
-};
-
 <%def name="evaluate_op(op, bit_size)">
<%
output_type = type_add_size(op.output_type, bit_size)
@@ -295,9 +291,7 @@ struct bool32_vec {
 
   const struct ${input_types[j]}_vec src${j} = {
   % for k in range(op.input_sizes[j]):
- % if input_types[j] == "bool32":
-_src[${j}].u32[${k}] != 0,
- % elif input_types[j] == "float16":
+ % if input_types[j] == "float16":
 _mesa_half_to_float(_src[${j}].u16[${k}]),
  % else:
 _src[${j}].${get_const_field(input_types[j])}[${k}],
@@ -322,8 +316,6 @@ struct bool32_vec {
 % elif "src" + str(j) not in op.const_expr:
## Avoid unused variable warnings
<% continue %>
-% elif input_types[j] == "bool32":
-   const bool src${j} = _src[${j}].u32[_i] != 0;
 % elif input_types[j] == "float16":
const float src${j} =
   _mesa_half_to_float(_src[${j}].u16[_i]);
@@ -346,9 +338,9 @@ struct bool32_vec {
 
  ## Store the current component of the actual destination to the
  ## value of dst.
- % if output_type == "bool32":
-## Sanitize the C value to a proper NIR bool
-_dst_val.u32[_i] = dst ? NIR_TRUE : NIR_FALSE;
+ % if output_type.startswith("bool"):
+## Sanitize the C value to a proper NIR 0/-1 bool
+_dst_val.${get_const_field(output_type)}[_i] = -(int)dst;
  % elif output_type == "float16":
 _dst_val.u16[_i] = _mesa_float_to_half(dst);
  % else:
@@ -376,8 +368,8 @@ struct bool32_vec {
   ## the actual destination.
   % for k in range(op.output_size):
  % if output_type == "bool32":
-## Sanitize the C value to a proper NIR bool
-_dst_val.u32[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : NIR_FALSE;
+## Sanitize the C value to a proper NIR 0/-1 bool
+_dst_val.${get_const_field(output_type)}[${k}] = 
-(int)dst.${"xyzw"[k]};
  % elif output_type == "float16":
 _dst_val.u16[${k}] = _mesa_float_to_half(dst.${"xyzw"[k]});
  % else:
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 14/31] nir/algebraic: Disable b2f lowering and two optimizations

2018-10-22 Thread Jason Ekstrand
These all assume the 0/~0 representation of booleans.  We'll turn them
back on before too long.
---
 src/compiler/nir/nir_opt_algebraic.py | 5 -
 1 file changed, 5 deletions(-)

diff --git a/src/compiler/nir/nir_opt_algebraic.py 
b/src/compiler/nir/nir_opt_algebraic.py
index 8b24daddfdc..5a4e78e8e0e 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -436,9 +436,7 @@ optimizations = [
(('imul', ('b2i', a), ('b2i', b)), ('b2i', ('iand', a, b))),
(('fmul', ('b2f', a), ('b2f', b)), ('b2f', ('iand', a, b))),
(('fsat', ('fadd', ('b2f', a), ('b2f', b))), ('b2f', ('ior', a, b))),
-   (('iand', 'a@bool', 1.0), ('b2f', a), '!options->lower_b2f'),
# True/False are ~0 and 0 in NIR.  b2i of True is 1, and -1 is ~0 (True).
-   (('ineg', ('b2i@32', a)), a),
(('flt', ('fneg', ('b2f', a)), 0), a), # Generated by TGSI KILL_IF.
(('flt', ('fsub', 0.0, ('b2f', a)), 0), a), # Generated by TGSI KILL_IF.
# Comparison with the same args.  Note that these are not done for
@@ -917,9 +915,6 @@ late_optimizations = [
# we do these late so that we don't get in the way of creating ffmas
(('fmin', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)', '#c', 
b)), ('fadd', c, ('fmin', a, b))),
(('fmax', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)', '#c', 
b)), ('fadd', c, ('fmax', a, b))),
-
-   # Lowered for backends without a dedicated b2f instruction
-   (('b2f@32', a), ('iand', a, 1.0), 'options->lower_b2f'),
 ]
 
 print(nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render())
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 18/31] FIXUP: Use 32-bit opcodes in the NIR producers and optimizations

2018-10-22 Thread Jason Ekstrand
Generated with a little hand-editing and the following sed commands:

sed -i 's/nir_op_ball_fequal/nir_op_b32all_fequal/g' **/*.c
sed -i 's/nir_op_bany_fnequal/nir_op_b32any_fnequal/g' **/*.c
sed -i 's/nir_op_ball_iequal/nir_op_b32all_iequal/g' **/*.c
sed -i 's/nir_op_bany_inequal/nir_op_b32any_inequal/g' **/*.c
sed -i 's/nir_op_\([fiu]lt\)/nir_op_\132/g' **/*.c
sed -i 's/nir_op_\([fiu]ge\)/nir_op_\132/g' **/*.c
sed -i 's/nir_op_\([fiu]ne\)/nir_op_\132/g' **/*.c
sed -i 's/nir_op_\([fiu]eq\)/nir_op_\132/g' **/*.c
sed -i 's/nir_op_\([fi]\)ne32g/nir_op_\1neg/g' **/*.c
sed -i 's/nir_op_bcsel/nir_op_b32csel/g' **/*.c
---
 src/compiler/nir/nir.h | 24 -
 src/compiler/nir/nir_loop_analyze.c| 28 +-
 src/compiler/nir/nir_opt_if.c  |  2 +-
 src/compiler/nir/nir_opt_peephole_select.c |  2 +-
 src/compiler/nir/nir_opt_undef.c   |  2 +-
 src/compiler/spirv/vtn_alu.c   | 62 +++---
 6 files changed, 60 insertions(+), 60 deletions(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 93d0fb5271c..47c7f400b2d 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1504,18 +1504,18 @@ static inline bool
 nir_alu_instr_is_comparison(const nir_alu_instr *instr)
 {
switch (instr->op) {
-   case nir_op_flt:
-   case nir_op_fge:
-   case nir_op_feq:
-   case nir_op_fne:
-   case nir_op_ilt:
-   case nir_op_ult:
-   case nir_op_ige:
-   case nir_op_uge:
-   case nir_op_ieq:
-   case nir_op_ine:
-   case nir_op_i2b:
-   case nir_op_f2b:
+   case nir_op_flt32:
+   case nir_op_fge32:
+   case nir_op_feq32:
+   case nir_op_fne32:
+   case nir_op_ilt32:
+   case nir_op_ult32:
+   case nir_op_ige32:
+   case nir_op_uge32:
+   case nir_op_ieq32:
+   case nir_op_ine32:
+   case nir_op_i2b32:
+   case nir_op_f2b32:
case nir_op_inot:
case nir_op_fnot:
   return true;
diff --git a/src/compiler/nir/nir_loop_analyze.c 
b/src/compiler/nir/nir_loop_analyze.c
index 9c3fd2f286f..37409525bbb 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -357,26 +357,26 @@ get_iteration(nir_op cond_op, nir_const_value *initial, 
nir_const_value *step,
int32_t iter;
 
switch (cond_op) {
-   case nir_op_ige:
-   case nir_op_ilt:
-   case nir_op_ieq:
-   case nir_op_ine: {
+   case nir_op_ige32:
+   case nir_op_ilt32:
+   case nir_op_ieq32:
+   case nir_op_ine32: {
   int32_t initial_val = initial->i32[0];
   int32_t span = limit->i32[0] - initial_val;
   iter = span / step->i32[0];
   break;
}
-   case nir_op_uge:
-   case nir_op_ult: {
+   case nir_op_uge32:
+   case nir_op_ult32: {
   uint32_t initial_val = initial->u32[0];
   uint32_t span = limit->u32[0] - initial_val;
   iter = span / step->u32[0];
   break;
}
-   case nir_op_fge:
-   case nir_op_flt:
-   case nir_op_feq:
-   case nir_op_fne: {
+   case nir_op_fge32:
+   case nir_op_flt32:
+   case nir_op_feq32:
+   case nir_op_fne32: {
   float initial_val = initial->f32[0];
   float span = limit->f32[0] - initial_val;
   iter = span / step->f32[0];
@@ -547,10 +547,10 @@ find_trip_count(loop_info_state *state)
   bool limit_rhs = true;
 
   switch (alu->op) {
-  case nir_op_fge:  case nir_op_ige:  case nir_op_uge:
-  case nir_op_flt:  case nir_op_ilt:  case nir_op_ult:
-  case nir_op_feq:  case nir_op_ieq:
-  case nir_op_fne:  case nir_op_ine:
+  case nir_op_fge32:  case nir_op_ige32:  case nir_op_uge32:
+  case nir_op_flt32:  case nir_op_ilt32:  case nir_op_ult32:
+  case nir_op_feq32:  case nir_op_ieq32:
+  case nir_op_fne32:  case nir_op_ine32:
 
  /* We assume that the limit is the "right" operand */
  basic_ind = get_loop_var(alu->src[0].src.ssa, state);
diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c
index 60368a0259e..cdad06c1c48 100644
--- a/src/compiler/nir/nir_opt_if.c
+++ b/src/compiler/nir/nir_opt_if.c
@@ -478,7 +478,7 @@ can_propagate_through_alu(nir_src *src)
(nir_instr_as_alu(src->parent_instr)->op == nir_op_ior ||
 nir_instr_as_alu(src->parent_instr)->op == nir_op_iand ||
 nir_instr_as_alu(src->parent_instr)->op == nir_op_inot ||
-nir_instr_as_alu(src->parent_instr)->op == nir_op_b2i))
+nir_instr_as_alu(src->parent_instr)->op == nir_op_b322i))
   return true;
 
return false;
diff --git a/src/compiler/nir/nir_opt_peephole_select.c 
b/src/compiler/nir/nir_opt_peephole_select.c
index ad9d0abec03..6308c8cab12 100644
--- a/src/compiler/nir/nir_opt_peephole_select.c
+++ b/src/compiler/nir/nir_opt_peephole_select.c
@@ -205,7 +205,7 @@ nir_opt_peephole_select_block(nir_block *block, nir_shader 
*shader,
  break;
 
   nir_phi_instr *phi = nir_instr_as_phi(instr);
-  nir_alu_instr *sel = nir_alu_instr_create(shader, nir_op_bcsel);
+  

[Mesa-dev] [PATCH 21/31] nir: Add support for 1-bit data types

2018-10-22 Thread Jason Ekstrand
This commit adds support for 1-bit booleans and integers.  Booleans
obviously take a value of true or false.  Because we have to define the
semantics of 1-bit signed and unsigned integers, we define uint1_t to
take values of 0 and 1 and int1_t to take values of 0 and -1.  1-bit
arithmetic is then well-defined in the usual way, just with fewer bits.
The definition of int1_t and uint1_t doesn't usually matter but we do
need something for purposes of constant folding.
---
 src/compiler/nir/nir.c| 15 +--
 src/compiler/nir/nir.h| 21 +++-
 src/compiler/nir/nir_builder.h| 12 -
 src/compiler/nir/nir_constant_expressions.py  | 25 ---
 src/compiler/nir/nir_instr_set.c  | 23 ++---
 .../nir/nir_lower_load_const_to_scalar.c  |  3 +++
 src/compiler/nir/nir_opt_constant_folding.c   |  3 +++
 src/compiler/nir/nir_opt_large_constants.c|  5 
 src/compiler/nir/nir_print.c  |  3 +++
 src/compiler/nir/nir_search.c |  3 ++-
 src/compiler/nir/nir_validate.c   |  2 +-
 src/compiler/nir_types.cpp|  2 +-
 src/compiler/spirv/spirv_to_nir.c |  9 +++
 13 files changed, 101 insertions(+), 25 deletions(-)

diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
index 0be40d257f5..8e83edb3644 100644
--- a/src/compiler/nir/nir.c
+++ b/src/compiler/nir/nir.c
@@ -638,6 +638,7 @@ const_value_int(int64_t i, unsigned bit_size)
 {
nir_const_value v;
switch (bit_size) {
+   case 1:  v.b[0]   = i & 1;  break;
case 8:  v.i8[0]  = i;  break;
case 16: v.i16[0] = i;  break;
case 32: v.i32[0] = i;  break;
@@ -1206,6 +1207,8 @@ nir_src_comp_as_int(nir_src src, unsigned comp)
 
assert(comp < load->def.num_components);
switch (load->def.bit_size) {
+   /* int1_t uses 0/-1 convention */
+   case 1:  return -(int)load->value.b[comp];
case 8:  return load->value.i8[comp];
case 16: return load->value.i16[comp];
case 32: return load->value.i32[comp];
@@ -1223,6 +1226,7 @@ nir_src_comp_as_uint(nir_src src, unsigned comp)
 
assert(comp < load->def.num_components);
switch (load->def.bit_size) {
+   case 1:  return load->value.b[comp];
case 8:  return load->value.u8[comp];
case 16: return load->value.u16[comp];
case 32: return load->value.u32[comp];
@@ -1235,15 +1239,12 @@ nir_src_comp_as_uint(nir_src src, unsigned comp)
 bool
 nir_src_comp_as_bool(nir_src src, unsigned comp)
 {
-   assert(nir_src_is_const(src));
-   nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
+   int64_t i = nir_src_comp_as_int(src, comp);
 
-   assert(comp < load->def.num_components);
-   assert(load->def.bit_size == 32);
-   assert(load->value.u32[comp] == NIR_TRUE ||
-  load->value.u32[comp] == NIR_FALSE);
+   /* Booleans of any size use 0/-1 convention */
+   assert(i == 0 || i == -1);
 
-   return load->value.u32[comp];
+   return i;
 }
 
 double
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 47c7f400b2d..b19138f9e61 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -117,6 +117,7 @@ typedef enum {
 } nir_rounding_mode;
 
 typedef union {
+   bool b[NIR_MAX_VEC_COMPONENTS];
float f32[NIR_MAX_VEC_COMPONENTS];
double f64[NIR_MAX_VEC_COMPONENTS];
int8_t i8[NIR_MAX_VEC_COMPONENTS];
@@ -778,17 +779,25 @@ typedef struct {
unsigned write_mask : NIR_MAX_VEC_COMPONENTS; /* ignored if dest.is_ssa is 
true */
 } nir_alu_dest;
 
+/** NIR sized and unsized types
+ *
+ * The values in this enum are carefully chosen so that the sized type is
+ * just the unsized type OR the number of bits.
+ */
 typedef enum {
nir_type_invalid = 0, /* Not a valid type */
-   nir_type_float,
-   nir_type_int,
-   nir_type_uint,
-   nir_type_bool,
+   nir_type_int =   2,
+   nir_type_uint =  4,
+   nir_type_bool =  6,
+   nir_type_float = 128,
+   nir_type_bool1 = 1  | nir_type_bool,
nir_type_bool32 =32 | nir_type_bool,
+   nir_type_int1 =  1  | nir_type_int,
nir_type_int8 =  8  | nir_type_int,
nir_type_int16 = 16 | nir_type_int,
nir_type_int32 = 32 | nir_type_int,
nir_type_int64 = 64 | nir_type_int,
+   nir_type_uint1 = 1  | nir_type_uint,
nir_type_uint8 = 8  | nir_type_uint,
nir_type_uint16 =16 | nir_type_uint,
nir_type_uint32 =32 | nir_type_uint,
@@ -798,8 +807,8 @@ typedef enum {
nir_type_float64 =   64 | nir_type_float,
 } nir_alu_type;
 
-#define NIR_ALU_TYPE_SIZE_MASK 0xfff8
-#define NIR_ALU_TYPE_BASE_TYPE_MASK 0x0007
+#define NIR_ALU_TYPE_SIZE_MASK 0x79
+#define NIR_ALU_TYPE_BASE_TYPE_MASK 0x86
 
 static inline unsigned
 nir_alu_type_get_type_size(nir_alu_type type)
diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h
index 3271a480520..eaba5cfe448 100644
--- a/src/compiler/nir/nir_builder.h
+++ 

[Mesa-dev] [PATCH 23/31] nir: Add a bool to int32 lowering pass

2018-10-22 Thread Jason Ekstrand
We also enable it in all of the NIR drivers.

Cc: Timothy Arceri 
Cc: Eric Anholt 
Cc: Rob Clark 
Cc: Bas Nieuwenhuizen 

---
 src/amd/vulkan/radv_shader.c  |   2 +
 src/broadcom/compiler/vir.c   |   2 +
 src/compiler/Makefile.sources |   1 +
 src/compiler/nir/meson.build  |   1 +
 src/compiler/nir/nir.h|   1 +
 src/compiler/nir/nir_lower_bool_to_int32.c| 162 ++
 .../drivers/freedreno/ir3/ir3_compiler_nir.c  |   1 +
 src/gallium/drivers/radeonsi/si_shader_nir.c  |   2 +
 src/gallium/drivers/vc4/vc4_program.c |   2 +
 src/intel/compiler/brw_nir.c  |   2 +
 10 files changed, 176 insertions(+)
 create mode 100644 src/compiler/nir/nir_lower_bool_to_int32.c

diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index edeaefbc1a2..81f72d5b674 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -350,6 +350,8 @@ radv_shader_compile_to_nir(struct radv_device *device,
ac_lower_indirect_derefs(nir, 
device->physical_device->rad_info.chip_class);
radv_optimize_nir(nir, flags & 
VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT, false);
 
+   NIR_PASS_V(nir, nir_lower_bool_to_int32);
+
return nir;
 }
 
diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c
index 6b55b0e03bc..129afa4134a 100644
--- a/src/broadcom/compiler/vir.c
+++ b/src/broadcom/compiler/vir.c
@@ -729,6 +729,7 @@ uint64_t *v3d_compile_vs(const struct v3d_compiler 
*compiler,
 
 v3d_lower_nir_late(c);
 v3d_optimize_nir(c->s);
+NIR_PASS_V(c->s, nir_lower_bool_to_int32);
 NIR_PASS_V(c->s, nir_convert_from_ssa, true);
 
 v3d_nir_to_vir(c);
@@ -872,6 +873,7 @@ uint64_t *v3d_compile_fs(const struct v3d_compiler 
*compiler,
 
 v3d_lower_nir_late(c);
 v3d_optimize_nir(c->s);
+NIR_PASS_V(c->s, nir_lower_bool_to_int32);
 NIR_PASS_V(c->s, nir_convert_from_ssa, true);
 
 v3d_nir_to_vir(c);
diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index b65bb9b80b9..8f65f974ab8 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -230,6 +230,7 @@ NIR_FILES = \
nir/nir_lower_atomics_to_ssbo.c \
nir/nir_lower_bitmap.c \
nir/nir_lower_bit_size.c \
+   nir/nir_lower_bool_to_int32.c \
nir/nir_lower_clamp_color_outputs.c \
nir/nir_lower_clip.c \
nir/nir_lower_clip_cull_distance_arrays.c \
diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
index d8f65640004..5809551c9d4 100644
--- a/src/compiler/nir/meson.build
+++ b/src/compiler/nir/meson.build
@@ -113,6 +113,7 @@ files_libnir = files(
   'nir_lower_alpha_test.c',
   'nir_lower_atomics_to_ssbo.c',
   'nir_lower_bitmap.c',
+  'nir_lower_bool_to_int32.c',
   'nir_lower_clamp_color_outputs.c',
   'nir_lower_clip.c',
   'nir_lower_clip_cull_distance_arrays.c',
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index b19138f9e61..7518ab5b94f 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2842,6 +2842,7 @@ void nir_lower_alpha_test(nir_shader *shader, enum 
compare_func func,
   bool alpha_to_one);
 bool nir_lower_alu(nir_shader *shader);
 bool nir_lower_alu_to_scalar(nir_shader *shader);
+bool nir_lower_bool_to_int32(nir_shader *shader);
 bool nir_lower_load_const_to_scalar(nir_shader *shader);
 bool nir_lower_read_invocation_to_scalar(nir_shader *shader);
 bool nir_lower_phis_to_scalar(nir_shader *shader);
diff --git a/src/compiler/nir/nir_lower_bool_to_int32.c 
b/src/compiler/nir/nir_lower_bool_to_int32.c
new file mode 100644
index 000..8204f20d5b0
--- /dev/null
+++ b/src/compiler/nir/nir_lower_bool_to_int32.c
@@ -0,0 +1,162 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 

[Mesa-dev] [PATCH 15/31] nir: Rename boolean-related opcodes to include 32 in the name

2018-10-22 Thread Jason Ekstrand
---
 src/compiler/nir/nir_lower_alu_to_scalar.c |  8 ++--
 src/compiler/nir/nir_opcodes.py| 46 +++---
 src/compiler/nir/nir_opcodes_c.py  |  8 ++--
 3 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c 
b/src/compiler/nir/nir_lower_alu_to_scalar.c
index 0be3aba9456..e424dff25c4 100644
--- a/src/compiler/nir/nir_lower_alu_to_scalar.c
+++ b/src/compiler/nir/nir_lower_alu_to_scalar.c
@@ -197,10 +197,10 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder 
*b)
   return false;
 
   LOWER_REDUCTION(nir_op_fdot, nir_op_fmul, nir_op_fadd);
-  LOWER_REDUCTION(nir_op_ball_fequal, nir_op_feq, nir_op_iand);
-  LOWER_REDUCTION(nir_op_ball_iequal, nir_op_ieq, nir_op_iand);
-  LOWER_REDUCTION(nir_op_bany_fnequal, nir_op_fne, nir_op_ior);
-  LOWER_REDUCTION(nir_op_bany_inequal, nir_op_ine, nir_op_ior);
+  LOWER_REDUCTION(nir_op_b32all_fequal, nir_op_feq32, nir_op_iand);
+  LOWER_REDUCTION(nir_op_b32all_iequal, nir_op_ieq32, nir_op_iand);
+  LOWER_REDUCTION(nir_op_b32any_fnequal, nir_op_fne32, nir_op_ior);
+  LOWER_REDUCTION(nir_op_b32any_inequal, nir_op_ine32, nir_op_ior);
   LOWER_REDUCTION(nir_op_fall_equal, nir_op_seq, nir_op_fand);
   LOWER_REDUCTION(nir_op_fany_nequal, nir_op_sne, nir_op_for);
 
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index 4ef4ecc6f22..d349f74ed2a 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -89,7 +89,7 @@ class Opcode(object):
 # helper variables for strings
 tfloat = "float"
 tint = "int"
-tbool = "bool32"
+tbool32 = "bool32"
 tuint = "uint"
 tuint16 = "uint16"
 tfloat32 = "float32"
@@ -192,10 +192,10 @@ for src_t in [tint, tuint, tfloat]:
 
 # We'll hand-code the to/from bool conversion opcodes.  Because bool doesn't
 # have multiple bit-sizes, we can always infer the size from the other type.
-unop_convert("f2b", tbool, tfloat, "src0 != 0.0")
-unop_convert("i2b", tbool, tint, "src0 != 0")
-unop_convert("b2f", tfloat, tbool, "src0 ? 1.0 : 0.0")
-unop_convert("b2i", tint, tbool, "src0 ? 1 : 0")
+unop_convert("f2b32", tbool32, tfloat, "src0 != 0.0")
+unop_convert("i2b32", tbool32, tint, "src0 != 0")
+unop_convert("b322f", tfloat, tbool32, "src0 ? 1.0 : 0.0")
+unop_convert("b322i", tint, tbool32, "src0 ? 1 : 0")
 
 
 # Unary floating-point rounding operations.
@@ -405,8 +405,8 @@ def binop_convert(name, out_type, in_type, alg_props, 
const_expr):
 def binop(name, ty, alg_props, const_expr):
binop_convert(name, ty, ty, alg_props, const_expr)
 
-def binop_compare(name, ty, alg_props, const_expr):
-   binop_convert(name, tbool, ty, alg_props, const_expr)
+def binop_compare32(name, ty, alg_props, const_expr):
+   binop_convert(name, tbool32, ty, alg_props, const_expr)
 
 def binop_horiz(name, out_size, out_type, src1_size, src1_type, src2_size,
 src2_type, const_expr):
@@ -488,26 +488,26 @@ binop("frem", tfloat, "", "src0 - src1 * truncf(src0 / 
src1)")
 
 # these integer-aware comparisons return a boolean (0 or ~0)
 
-binop_compare("flt", tfloat, "", "src0 < src1")
-binop_compare("fge", tfloat, "", "src0 >= src1")
-binop_compare("feq", tfloat, commutative, "src0 == src1")
-binop_compare("fne", tfloat, commutative, "src0 != src1")
-binop_compare("ilt", tint, "", "src0 < src1")
-binop_compare("ige", tint, "", "src0 >= src1")
-binop_compare("ieq", tint, commutative, "src0 == src1")
-binop_compare("ine", tint, commutative, "src0 != src1")
-binop_compare("ult", tuint, "", "src0 < src1")
-binop_compare("uge", tuint, "", "src0 >= src1")
+binop_compare32("flt32", tfloat, "", "src0 < src1")
+binop_compare32("fge32", tfloat, "", "src0 >= src1")
+binop_compare32("feq32", tfloat, commutative, "src0 == src1")
+binop_compare32("fne32", tfloat, commutative, "src0 != src1")
+binop_compare32("ilt32", tint, "", "src0 < src1")
+binop_compare32("ige32", tint, "", "src0 >= src1")
+binop_compare32("ieq32", tint, commutative, "src0 == src1")
+binop_compare32("ine32", tint, commutative, "src0 != src1")
+binop_compare32("ult32", tuint, "", "src0 < src1")
+binop_compare32("uge32", tuint, "", "src0 >= src1")
 
 # integer-aware GLSL-style comparisons that compare floats and ints
 
-binop_reduce("ball_fequal",  1, tbool, tfloat, "{src0} == {src1}",
+binop_reduce("b32all_fequal",  1, tbool32, tfloat, "{src0} == {src1}",
  "{src0} && {src1}", "{src}")
-binop_reduce("bany_fnequal", 1, tbool, tfloat, "{src0} != {src1}",
+binop_reduce("b32any_fnequal", 1, tbool32, tfloat, "{src0} != {src1}",
  "{src0} || {src1}", "{src}")
-binop_reduce("ball_iequal",  1, tbool, tint, "{src0} == {src1}",
+binop_reduce("b32all_iequal",  1, tbool32, tint, "{src0} == {src1}",
  "{src0} && {src1}", "{src}")
-binop_reduce("bany_inequal", 1, tbool, tint, "{src0} != {src1}",
+binop_reduce("b32any_inequal", 1, tbool32, tint, "{src0} != {src1}",
  "{src0} || {src1}", 

[Mesa-dev] [PATCH 19/31] FIXUP: Use 32-bit opcodes in the NIR back-ends

2018-10-22 Thread Jason Ekstrand
Generated with a little hand-editing and the following sed commands:

sed -i 's/nir_op_ball_fequal/nir_op_b32all_fequal/g' **/*.c
sed -i 's/nir_op_bany_fnequal/nir_op_b32any_fnequal/g' **/*.c
sed -i 's/nir_op_ball_iequal/nir_op_b32all_iequal/g' **/*.c
sed -i 's/nir_op_bany_inequal/nir_op_b32any_inequal/g' **/*.c
sed -i 's/nir_op_\([fiu]lt\)/nir_op_\132/g' **/*.c
sed -i 's/nir_op_\([fiu]ge\)/nir_op_\132/g' **/*.c
sed -i 's/nir_op_\([fiu]ne\)/nir_op_\132/g' **/*.c
sed -i 's/nir_op_\([fiu]eq\)/nir_op_\132/g' **/*.c
sed -i 's/nir_op_\([fi]\)ne32g/nir_op_\1neg/g' **/*.c
sed -i 's/nir_op_bcsel/nir_op_b32csel/g' **/*.c
---
 src/amd/common/ac_nir_to_llvm.c   |  30 ++---
 src/broadcom/compiler/nir_to_vir.c|  44 +++
 src/gallium/auxiliary/nir/tgsi_to_nir.c   |  20 +--
 .../drivers/freedreno/ir3/ir3_compiler_nir.c  |  30 ++---
 src/gallium/drivers/vc4/vc4_program.c |  48 +++
 src/intel/compiler/brw_fs_nir.cpp |  80 ++--
 .../brw_nir_analyze_boolean_resolves.c|  24 ++--
 src/intel/compiler/brw_vec4_nir.cpp   | 122 +-
 8 files changed, 199 insertions(+), 199 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index ee75e2890dd..deb40e610da 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -677,34 +677,34 @@ static void visit_alu(struct ac_nir_context *ctx, const 
nir_alu_instr *instr)
 LLVMTypeOf(src[0]), ""),
   "");
break;
-   case nir_op_ilt:
+   case nir_op_ilt32:
result = emit_int_cmp(>ac, LLVMIntSLT, src[0], src[1]);
break;
-   case nir_op_ine:
+   case nir_op_ine32:
result = emit_int_cmp(>ac, LLVMIntNE, src[0], src[1]);
break;
-   case nir_op_ieq:
+   case nir_op_ieq32:
result = emit_int_cmp(>ac, LLVMIntEQ, src[0], src[1]);
break;
-   case nir_op_ige:
+   case nir_op_ige32:
result = emit_int_cmp(>ac, LLVMIntSGE, src[0], src[1]);
break;
-   case nir_op_ult:
+   case nir_op_ult32:
result = emit_int_cmp(>ac, LLVMIntULT, src[0], src[1]);
break;
-   case nir_op_uge:
+   case nir_op_uge32:
result = emit_int_cmp(>ac, LLVMIntUGE, src[0], src[1]);
break;
-   case nir_op_feq:
+   case nir_op_feq32:
result = emit_float_cmp(>ac, LLVMRealOEQ, src[0], src[1]);
break;
-   case nir_op_fne:
+   case nir_op_fne32:
result = emit_float_cmp(>ac, LLVMRealUNE, src[0], src[1]);
break;
-   case nir_op_flt:
+   case nir_op_flt32:
result = emit_float_cmp(>ac, LLVMRealOLT, src[0], src[1]);
break;
-   case nir_op_fge:
+   case nir_op_fge32:
result = emit_float_cmp(>ac, LLVMRealOGE, src[0], src[1]);
break;
case nir_op_fabs:
@@ -906,7 +906,7 @@ static void visit_alu(struct ac_nir_context *ctx, const 
nir_alu_instr *instr)
else
result = LLVMBuildTrunc(ctx->ac.builder, src[0], 
def_type, "");
break;
-   case nir_op_bcsel:
+   case nir_op_b32csel:
result = emit_bcsel(>ac, src[0], src[1], src[2]);
break;
case nir_op_find_lsb:
@@ -931,16 +931,16 @@ static void visit_alu(struct ac_nir_context *ctx, const 
nir_alu_instr *instr)
src[1] = ac_to_integer(>ac, src[1]);
result = emit_uint_carry(>ac, 
"llvm.usub.with.overflow.i32", src[0], src[1]);
break;
-   case nir_op_b2f:
+   case nir_op_b322f:
result = emit_b2f(>ac, src[0]);
break;
-   case nir_op_f2b:
+   case nir_op_f2b32:
result = emit_f2b(>ac, src[0]);
break;
-   case nir_op_b2i:
+   case nir_op_b322i:
result = emit_b2i(>ac, src[0], 
instr->dest.dest.ssa.bit_size);
break;
-   case nir_op_i2b:
+   case nir_op_i2b32:
src[0] = ac_to_integer(>ac, src[0]);
result = emit_i2b(>ac, src[0]);
break;
diff --git a/src/broadcom/compiler/nir_to_vir.c 
b/src/broadcom/compiler/nir_to_vir.c
index 714d76f58ba..eb7367388fc 100644
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -506,45 +506,45 @@ ntq_emit_comparison(struct v3d_compile *c, struct qreg 
*dest,
 bool cond_invert = false;
 
 switch (compare_instr->op) {
-case nir_op_feq:
+case nir_op_feq32:
 case nir_op_seq:
 vir_PF(c, vir_FCMP(c, src0, src1), V3D_QPU_PF_PUSHZ);
 break;
-case nir_op_ieq:
+case 

[Mesa-dev] [PATCH 17/31] FIXUP: nir/algebraic: Remap boolean opcodes to the 32-bit variant

2018-10-22 Thread Jason Ekstrand
---
 src/compiler/nir/nir_algebraic.py | 35 +++
 1 file changed, 35 insertions(+)

diff --git a/src/compiler/nir/nir_algebraic.py 
b/src/compiler/nir/nir_algebraic.py
index d2374d3216a..9d187ca36d7 100644
--- a/src/compiler/nir/nir_algebraic.py
+++ b/src/compiler/nir/nir_algebraic.py
@@ -216,6 +216,39 @@ class Variable(Value):
 _opcode_re = re.compile(r"(?P~)?(?P\w+)(?:@(?P\d+))?"
 r"(?P\([^\)]+\))?")
 
+opcode_remap = {
+   'b2i' : 'b322i',
+   'b2f' : 'b322f',
+   'i2b' : 'i2b32',
+   'f2b' : 'f2b32',
+
+   'flt' : 'flt32',
+   'fge' : 'fge32',
+   'feq' : 'feq32',
+   'fne' : 'fne32',
+   'ilt' : 'ilt32',
+   'ige' : 'ige32',
+   'ieq' : 'ieq32',
+   'ine' : 'ine32',
+   'ult' : 'ult32',
+   'uge' : 'uge32',
+
+   'ball_iequal2' : 'b32all_iequal2',
+   'ball_iequal3' : 'b32all_iequal3',
+   'ball_iequal4' : 'b32all_iequal4',
+   'bany_inequal2' : 'b32any_inequal2',
+   'bany_inequal3' : 'b32any_inequal3',
+   'bany_inequal4' : 'b32any_inequal4',
+   'ball_fequal2' : 'b32all_fequal2',
+   'ball_fequal3' : 'b32all_fequal3',
+   'ball_fequal4' : 'b32all_fequal4',
+   'bany_fnequal2' : 'b32any_fnequal2',
+   'bany_fnequal3' : 'b32any_fnequal3',
+   'bany_fnequal4' : 'b32any_fnequal4',
+
+   'bcsel' : 'b32csel',
+}
+
 class Expression(Value):
def __init__(self, expr, name_base, varset):
   Value.__init__(self, expr, name_base, "expression")
@@ -225,6 +258,8 @@ class Expression(Value):
   assert m and m.group('opcode') is not None
 
   self.opcode = m.group('opcode')
+  if self.opcode in opcode_remap:
+ self.opcode = opcode_remap[self.opcode]
   self.bit_size = int(m.group('bits')) if m.group('bits') else 0
   self.inexact = m.group('inexact') is not None
   self.cond = m.group('cond')
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 24/31] glsl,spirv: Generate 1-bit booleans

2018-10-22 Thread Jason Ekstrand
---
 src/compiler/glsl/glsl_to_nir.cpp | 4 ++--
 src/compiler/nir/nir.h| 2 +-
 src/compiler/nir/nir_opcodes_c.py | 8 
 src/compiler/nir_types.h  | 4 +++-
 src/compiler/spirv/spirv_to_nir.c | 2 +-
 5 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
b/src/compiler/glsl/glsl_to_nir.cpp
index 0479f8fcfe4..8e106d097aa 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -263,7 +263,7 @@ constant_copy(ir_constant *ir, void *mem_ctx)
   assert(cols == 1);
 
   for (unsigned r = 0; r < rows; r++)
- ret->values[0].u32[r] = ir->value.b[r] ? NIR_TRUE : NIR_FALSE;
+ ret->values[0].b[r] = ir->value.b[r];
 
   break;
 
@@ -1178,7 +1178,7 @@ nir_visitor::visit(ir_call *ir)
   case nir_intrinsic_vote_any:
   case nir_intrinsic_vote_all:
   case nir_intrinsic_vote_ieq: {
- nir_ssa_dest_init(>instr, >dest, 1, 32, NULL);
+ nir_ssa_dest_init(>instr, >dest, 1, 1, NULL);
  instr->num_components = 1;
 
  ir_rvalue *value = (ir_rvalue *) ir->actual_parameters.get_head();
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 7518ab5b94f..51775e5e18c 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -827,7 +827,7 @@ nir_get_nir_type_for_glsl_base_type(enum glsl_base_type 
base_type)
 {
switch (base_type) {
case GLSL_TYPE_BOOL:
-  return nir_type_bool32;
+  return nir_type_bool1;
   break;
case GLSL_TYPE_UINT:
   return nir_type_uint32;
diff --git a/src/compiler/nir/nir_opcodes_c.py 
b/src/compiler/nir/nir_opcodes_c.py
index fe3fcd4c677..8bfcda6d719 100644
--- a/src/compiler/nir/nir_opcodes_c.py
+++ b/src/compiler/nir/nir_opcodes_c.py
@@ -92,9 +92,9 @@ nir_type_conversion_op(nir_alu_type src, nir_alu_type dst, 
nir_rounding_mode rnd
 %   endfor
 case nir_type_bool:
 %  if src_t == 'float':
-  return nir_op_f2b32;
+  return nir_op_f2b;
 %  else:
-  return nir_op_i2b32;
+  return nir_op_i2b;
 %  endif
 default:
unreachable("Invalid nir alu base type");
@@ -104,9 +104,9 @@ nir_type_conversion_op(nir_alu_type src, nir_alu_type dst, 
nir_rounding_mode rnd
  switch (dst_base) {
 case nir_type_int:
 case nir_type_uint:
-   return nir_op_b322i;
+   return nir_op_b2i;
 case nir_type_float:
-   return nir_op_b322f;
+   return nir_op_b2f;
 default:
unreachable("Invalid nir alu base type");
  }
diff --git a/src/compiler/nir_types.h b/src/compiler/nir_types.h
index 77454fa9fab..60560b81242 100644
--- a/src/compiler/nir_types.h
+++ b/src/compiler/nir_types.h
@@ -95,9 +95,11 @@ static inline unsigned
 glsl_get_bit_size(const struct glsl_type *type)
 {
switch (glsl_get_base_type(type)) {
+   case GLSL_TYPE_BOOL:
+  return 1;
+
case GLSL_TYPE_INT:
case GLSL_TYPE_UINT:
-   case GLSL_TYPE_BOOL:
case GLSL_TYPE_FLOAT: /* TODO handle mediump */
case GLSL_TYPE_SUBROUTINE:
   return 32;
diff --git a/src/compiler/spirv/spirv_to_nir.c 
b/src/compiler/spirv/spirv_to_nir.c
index d0df9ea4718..b0a61d05406 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -1467,7 +1467,7 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
   opcode == SpvOpSpecConstantFalse)
  int_val = get_specialization(b, val, int_val);
 
-  val->constant->values[0].u32[0] = int_val ? NIR_TRUE : NIR_FALSE;
+  val->constant->values[0].b[0] = int_val != 0;
   break;
}
 
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 16/31] FIXUP: nir/builder: Generate 32-bit bool opcodes transparently

2018-10-22 Thread Jason Ekstrand
---
 src/compiler/nir/nir_builder_opcodes_h.py | 44 ++-
 1 file changed, 43 insertions(+), 1 deletion(-)

diff --git a/src/compiler/nir/nir_builder_opcodes_h.py 
b/src/compiler/nir/nir_builder_opcodes_h.py
index e600093e9f6..a9de0e4ea47 100644
--- a/src/compiler/nir/nir_builder_opcodes_h.py
+++ b/src/compiler/nir/nir_builder_opcodes_h.py
@@ -27,6 +27,41 @@ template = """\
 #define _NIR_BUILDER_OPCODES_
 
 <%
+opcode_remap = {
+   'b2i' : 'b322i',
+   'b2f' : 'b322f',
+   'i2b' : 'i2b32',
+   'f2b' : 'f2b32',
+
+   'flt' : 'flt32',
+   'fge' : 'fge32',
+   'feq' : 'feq32',
+   'fne' : 'fne32',
+   'ilt' : 'ilt32',
+   'ige' : 'ige32',
+   'ieq' : 'ieq32',
+   'ine' : 'ine32',
+   'ult' : 'ult32',
+   'uge' : 'uge32',
+
+   'ball_iequal2' : 'b32all_iequal2',
+   'ball_iequal3' : 'b32all_iequal3',
+   'ball_iequal4' : 'b32all_iequal4',
+   'bany_inequal2' : 'b32any_inequal2',
+   'bany_inequal3' : 'b32any_inequal3',
+   'bany_inequal4' : 'b32any_inequal4',
+   'ball_fequal2' : 'b32all_fequal2',
+   'ball_fequal3' : 'b32all_fequal3',
+   'ball_fequal4' : 'b32all_fequal4',
+   'bany_fnequal2' : 'b32any_fnequal2',
+   'bany_fnequal3' : 'b32any_fnequal3',
+   'bany_fnequal4' : 'b32any_fnequal4',
+
+   'bcsel' : 'b32csel',
+}
+
+opcode_remap32 = { op32 : op for op, op32 in opcode_remap.items() }
+
 def src_decl_list(num_srcs):
return ', '.join('nir_ssa_def *src' + str(i) for i in range(num_srcs))
 
@@ -35,8 +70,15 @@ def src_list(num_srcs):
 %>
 
 % for name, opcode in sorted(opcodes.items()):
+  % if name in opcode_remap:
+<% continue %>
+  % elif name in opcode_remap32:
+<% builder_name = opcode_remap32[name] %>
+  % else:
+<% builder_name = name %>
+  % endif
 static inline nir_ssa_def *
-nir_${name}(nir_builder *build, ${src_decl_list(opcode.num_inputs)})
+nir_${builder_name}(nir_builder *build, ${src_decl_list(opcode.num_inputs)})
 {
return nir_build_alu(build, nir_op_${name}, ${src_list(opcode.num_inputs)});
 }
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 22/31] nir: Add 1-bit boolean opcodes

2018-10-22 Thread Jason Ekstrand
---
 src/compiler/nir/nir_lower_alu_to_scalar.c |  4 +++
 src/compiler/nir/nir_opcodes.py| 29 ++
 2 files changed, 33 insertions(+)

diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c 
b/src/compiler/nir/nir_lower_alu_to_scalar.c
index e424dff25c4..4f97472a87d 100644
--- a/src/compiler/nir/nir_lower_alu_to_scalar.c
+++ b/src/compiler/nir/nir_lower_alu_to_scalar.c
@@ -197,6 +197,10 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder 
*b)
   return false;
 
   LOWER_REDUCTION(nir_op_fdot, nir_op_fmul, nir_op_fadd);
+  LOWER_REDUCTION(nir_op_ball_fequal, nir_op_feq, nir_op_iand);
+  LOWER_REDUCTION(nir_op_ball_iequal, nir_op_ieq, nir_op_iand);
+  LOWER_REDUCTION(nir_op_bany_fnequal, nir_op_fne, nir_op_ior);
+  LOWER_REDUCTION(nir_op_bany_inequal, nir_op_ine, nir_op_ior);
   LOWER_REDUCTION(nir_op_b32all_fequal, nir_op_feq32, nir_op_iand);
   LOWER_REDUCTION(nir_op_b32all_iequal, nir_op_ieq32, nir_op_iand);
   LOWER_REDUCTION(nir_op_b32any_fnequal, nir_op_fne32, nir_op_ior);
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index d349f74ed2a..aee505667b8 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -89,6 +89,7 @@ class Opcode(object):
 # helper variables for strings
 tfloat = "float"
 tint = "int"
+tbool = "bool1"
 tbool32 = "bool32"
 tuint = "uint"
 tuint16 = "uint16"
@@ -192,6 +193,10 @@ for src_t in [tint, tuint, tfloat]:
 
 # We'll hand-code the to/from bool conversion opcodes.  Because bool doesn't
 # have multiple bit-sizes, we can always infer the size from the other type.
+unop_convert("f2b", tbool, tfloat, "src0 != 0.0")
+unop_convert("i2b", tbool, tint, "src0 != 0")
+unop_convert("b2f", tfloat, tbool, "src0 ? 1.0 : 0.0")
+unop_convert("b2i", tint, tbool, "src0 ? 1 : 0")
 unop_convert("f2b32", tbool32, tfloat, "src0 != 0.0")
 unop_convert("i2b32", tbool32, tint, "src0 != 0")
 unop_convert("b322f", tfloat, tbool32, "src0 ? 1.0 : 0.0")
@@ -405,6 +410,9 @@ def binop_convert(name, out_type, in_type, alg_props, 
const_expr):
 def binop(name, ty, alg_props, const_expr):
binop_convert(name, ty, ty, alg_props, const_expr)
 
+def binop_compare(name, ty, alg_props, const_expr):
+   binop_convert(name, tbool, ty, alg_props, const_expr)
+
 def binop_compare32(name, ty, alg_props, const_expr):
binop_convert(name, tbool32, ty, alg_props, const_expr)
 
@@ -488,6 +496,16 @@ binop("frem", tfloat, "", "src0 - src1 * truncf(src0 / 
src1)")
 
 # these integer-aware comparisons return a boolean (0 or ~0)
 
+binop_compare("flt", tfloat, "", "src0 < src1")
+binop_compare("fge", tfloat, "", "src0 >= src1")
+binop_compare("feq", tfloat, commutative, "src0 == src1")
+binop_compare("fne", tfloat, commutative, "src0 != src1")
+binop_compare("ilt", tint, "", "src0 < src1")
+binop_compare("ige", tint, "", "src0 >= src1")
+binop_compare("ieq", tint, commutative, "src0 == src1")
+binop_compare("ine", tint, commutative, "src0 != src1")
+binop_compare("ult", tuint, "", "src0 < src1")
+binop_compare("uge", tuint, "", "src0 >= src1")
 binop_compare32("flt32", tfloat, "", "src0 < src1")
 binop_compare32("fge32", tfloat, "", "src0 >= src1")
 binop_compare32("feq32", tfloat, commutative, "src0 == src1")
@@ -501,6 +519,15 @@ binop_compare32("uge32", tuint, "", "src0 >= src1")
 
 # integer-aware GLSL-style comparisons that compare floats and ints
 
+binop_reduce("ball_fequal",  1, tbool, tfloat, "{src0} == {src1}",
+ "{src0} && {src1}", "{src}")
+binop_reduce("bany_fnequal", 1, tbool, tfloat, "{src0} != {src1}",
+ "{src0} || {src1}", "{src}")
+binop_reduce("ball_iequal",  1, tbool, tint, "{src0} == {src1}",
+ "{src0} && {src1}", "{src}")
+binop_reduce("bany_inequal", 1, tbool, tint, "{src0} != {src1}",
+ "{src0} || {src1}", "{src}")
+
 binop_reduce("b32all_fequal",  1, tbool32, tfloat, "{src0} == {src1}",
  "{src0} && {src1}", "{src}")
 binop_reduce("b32any_fnequal", 1, tbool32, tfloat, "{src0} != {src1}",
@@ -694,6 +721,8 @@ triop("fmed3", tfloat, "fmaxf(fminf(fmaxf(src0, src1), 
src2), fminf(src0, src1))
 triop("imed3", tint, "MAX2(MIN2(MAX2(src0, src1), src2), MIN2(src0, src1))")
 triop("umed3", tuint, "MAX2(MIN2(MAX2(src0, src1), src2), MIN2(src0, src1))")
 
+opcode("bcsel", 0, tuint, [0, 0, 0],
+  [tbool, tuint, tuint], "", "src0 ? src1 : src2")
 opcode("b32csel", 0, tuint, [0, 0, 0],
   [tbool32, tuint, tuint], "", "src0 ? src1 : src2")
 
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 13/31] intel/nir: Use the OPT macro for more passes

2018-10-22 Thread Jason Ekstrand
---
 src/intel/compiler/brw_nir.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index 1cd56861578..cf5a4a96d67 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -674,7 +674,7 @@ brw_preprocess_nir(const struct brw_compiler *compiler, 
nir_shader *nir)
/* Lower int64 instructions before nir_optimize so that loop unrolling
 * sees their actual cost.
 */
-   nir_lower_int64(nir, nir_lower_imul64 |
+   OPT(nir_lower_int64, nir_lower_imul64 |
 nir_lower_isign64 |
 nir_lower_divmod64);
 
@@ -687,7 +687,7 @@ brw_preprocess_nir(const struct brw_compiler *compiler, 
nir_shader *nir)
   OPT(nir_opt_large_constants, NULL, 32);
}
 
-   nir_lower_bit_size(nir, lower_bit_size_callback, NULL);
+   OPT(nir_lower_bit_size, lower_bit_size_callback, NULL);
 
if (is_scalar) {
   OPT(nir_lower_load_const_to_scalar);
@@ -712,7 +712,7 @@ brw_preprocess_nir(const struct brw_compiler *compiler, 
nir_shader *nir)
 
nir_variable_mode indirect_mask =
   brw_nir_no_indirect_mask(compiler, nir->info.stage);
-   nir_lower_indirect_derefs(nir, indirect_mask);
+   OPT(nir_lower_indirect_derefs, indirect_mask);
 
/* Get rid of split copies */
nir = brw_nir_optimize(nir, compiler, is_scalar, false);
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 12/31] spirv: Initialize subgroup destinations with the destination type

2018-10-22 Thread Jason Ekstrand
Instead of initializing them manually, just use the type that we already
have sitting there.
---
 src/compiler/spirv/vtn_subgroup.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/compiler/spirv/vtn_subgroup.c 
b/src/compiler/spirv/vtn_subgroup.c
index ecec3aa62d0..419130515ec 100644
--- a/src/compiler/spirv/vtn_subgroup.c
+++ b/src/compiler/spirv/vtn_subgroup.c
@@ -81,7 +81,8 @@ vtn_handle_subgroup(struct vtn_builder *b, SpvOp opcode,
   "OpGroupNonUniformElect must return a Bool");
   nir_intrinsic_instr *elect =
  nir_intrinsic_instr_create(b->nb.shader, nir_intrinsic_elect);
-  nir_ssa_dest_init(>instr, >dest, 1, 32, NULL);
+  nir_ssa_dest_init_for_type(>instr, >dest,
+ val->type->type, NULL);
   nir_builder_instr_insert(>nb, >instr);
   val->ssa->def = >dest.ssa;
   break;
@@ -112,7 +113,8 @@ vtn_handle_subgroup(struct vtn_builder *b, SpvOp opcode,
   intrin->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def);
   intrin->src[1] = nir_src_for_ssa(nir_load_subgroup_invocation(>nb));
 
-  nir_ssa_dest_init(>instr, >dest, 1, 32, NULL);
+  nir_ssa_dest_init_for_type(>instr, >dest,
+ val->type->type, NULL);
   nir_builder_instr_insert(>nb, >instr);
 
   val->ssa->def = >dest.ssa;
@@ -166,7 +168,8 @@ vtn_handle_subgroup(struct vtn_builder *b, SpvOp opcode,
   if (src1)
  intrin->src[1] = nir_src_for_ssa(src1);
 
-  nir_ssa_dest_init(>instr, >dest, 1, 32, NULL);
+  nir_ssa_dest_init_for_type(>instr, >dest,
+ val->type->type, NULL);
   nir_builder_instr_insert(>nb, >instr);
 
   val->ssa->def = >dest.ssa;
@@ -225,7 +228,8 @@ vtn_handle_subgroup(struct vtn_builder *b, SpvOp opcode,
  nir_intrinsic_instr_create(b->nb.shader, op);
   intrin->num_components = src0->num_components;
   intrin->src[0] = nir_src_for_ssa(src0);
-  nir_ssa_dest_init(>instr, >dest, 1, 32, NULL);
+  nir_ssa_dest_init_for_type(>instr, >dest,
+ val->type->type, NULL);
   nir_builder_instr_insert(>nb, >instr);
 
   val->ssa->def = >dest.ssa;
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 11/31] spirv: Use the right bit-size for spec constant ops

2018-10-22 Thread Jason Ekstrand
Previously, we would always pull the bit size from the destination which
is wrong for opcodes like nir_ilt where the sources are variable-sized
but the destination is a fixed size.  We were getting lucky before
because nir_op_ilt returns a 32-bit value and basically everyone who
uses spec constants uses 32-bit ones.

Cc: mesa-sta...@lists.freedesktop.org
---
 src/compiler/spirv/spirv_to_nir.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/compiler/spirv/spirv_to_nir.c 
b/src/compiler/spirv/spirv_to_nir.c
index 5bac3dc0e17..96ff09c3659 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -1798,11 +1798,17 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
  nir_const_value src[4];
 
  for (unsigned i = 0; i < count - 4; i++) {
-nir_constant *c =
-   vtn_value(b, w[4 + i], vtn_value_type_constant)->constant;
+struct vtn_value *src_val =
+   vtn_value(b, w[4 + i], vtn_value_type_constant);
+
+/* If this is an unsized source, pull the bit size from the
+ * source; otherwise, we'll use the bit size from the destination.
+ */
+if (!nir_alu_type_get_type_size(nir_op_infos[op].input_types[i]))
+   bit_size = glsl_get_bit_size(src_val->type->type);
 
 unsigned j = swap ? 1 - i : i;
-src[j] = c->values[0];
+src[j] = src_val->constant->values[0];
  }
 
  val->constant->values[0] =
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/31] nir/prog: Use nir_bany in kill handling

2018-10-22 Thread Jason Ekstrand
We have a helper that does exactly what the bany_inequal was doing.  It
emits the same code but is a bit higher level and is designed to operate
on a bvec4.
---
 src/mesa/program/prog_to_nir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c
index 1f0607542e8..47103306ad4 100644
--- a/src/mesa/program/prog_to_nir.c
+++ b/src/mesa/program/prog_to_nir.c
@@ -475,7 +475,7 @@ static void
 ptn_kil(nir_builder *b, nir_ssa_def **src)
 {
nir_ssa_def *cmp = b->shader->options->native_integers ?
-  nir_bany_inequal4(b, nir_flt(b, src[0], nir_imm_float(b, 0.0)), 
nir_imm_int(b, 0)) :
+  nir_bany(b, nir_flt(b, src[0], nir_imm_float(b, 0.0))) :
   nir_fany_nequal4(b, nir_slt(b, src[0], nir_imm_float(b, 0.0)), 
nir_imm_float(b, 0.0));
 
nir_intrinsic_instr *discard =
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 08/31] nir/system_values: Use the bit size from the load_deref

2018-10-22 Thread Jason Ekstrand
This isn't a great solution for bit-sizes but we don't have a
particularly convenient way to get a bit size from the system value enum
and this keeps the lowering pass from changing it.
---
 src/compiler/nir/nir_lower_system_values.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/compiler/nir/nir_lower_system_values.c 
b/src/compiler/nir/nir_lower_system_values.c
index 2820dcd1b3c..bde7eb11801 100644
--- a/src/compiler/nir/nir_lower_system_values.c
+++ b/src/compiler/nir/nir_lower_system_values.c
@@ -203,6 +203,7 @@ convert_block(nir_block *block, nir_builder *b)
  nir_intrinsic_op sysval_op =
 nir_intrinsic_from_system_value(var->data.location);
  sysval = nir_load_system_value(b, sysval_op, 0);
+ sysval->bit_size = load_deref->dest.ssa.bit_size;
   }
 
   nir_ssa_def_rewrite_uses(_deref->dest.ssa, nir_src_for_ssa(sysval));
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 06/31] nir/search: Use nir_builder

2018-10-22 Thread Jason Ekstrand
---
 src/compiler/nir/nir_algebraic.py |  14 ++--
 src/compiler/nir/nir_search.c | 111 --
 src/compiler/nir/nir_search.h |   9 ++-
 3 files changed, 43 insertions(+), 91 deletions(-)

diff --git a/src/compiler/nir/nir_algebraic.py 
b/src/compiler/nir/nir_algebraic.py
index 4134d496030..d2374d3216a 100644
--- a/src/compiler/nir/nir_algebraic.py
+++ b/src/compiler/nir/nir_algebraic.py
@@ -563,6 +563,7 @@ class SearchAndReplace(object):
 
 _algebraic_pass_template = mako.template.Template("""
 #include "nir.h"
+#include "nir_builder.h"
 #include "nir_search.h"
 #include "nir_search_helpers.h"
 
@@ -591,8 +592,8 @@ static const struct transform 
${pass_name}_${opcode}_xforms[] = {
 % endfor
 
 static bool
-${pass_name}_block(nir_block *block, const bool *condition_flags,
-   void *mem_ctx)
+${pass_name}_block(nir_builder *build, nir_block *block,
+   const bool *condition_flags)
 {
bool progress = false;
 
@@ -610,8 +611,7 @@ ${pass_name}_block(nir_block *block, const bool 
*condition_flags,
  for (unsigned i = 0; i < ARRAY_SIZE(${pass_name}_${opcode}_xforms); 
i++) {
 const struct transform *xform = &${pass_name}_${opcode}_xforms[i];
 if (condition_flags[xform->condition_offset] &&
-nir_replace_instr(alu, xform->search, xform->replace,
-  mem_ctx)) {
+nir_replace_instr(build, alu, xform->search, xform->replace)) {
progress = true;
break;
 }
@@ -629,11 +629,13 @@ ${pass_name}_block(nir_block *block, const bool 
*condition_flags,
 static bool
 ${pass_name}_impl(nir_function_impl *impl, const bool *condition_flags)
 {
-   void *mem_ctx = ralloc_parent(impl);
bool progress = false;
 
+   nir_builder build;
+   nir_builder_init(, impl);
+
nir_foreach_block_reverse(block, impl) {
-  progress |= ${pass_name}_block(block, condition_flags, mem_ctx);
+  progress |= ${pass_name}_block(, block, condition_flags);
}
 
if (progress)
diff --git a/src/compiler/nir/nir_search.c b/src/compiler/nir/nir_search.c
index 1686b6bd0de..0270302fd3d 100644
--- a/src/compiler/nir/nir_search.c
+++ b/src/compiler/nir/nir_search.c
@@ -27,6 +27,7 @@
 
 #include 
 #include "nir_search.h"
+#include "nir_builder.h"
 #include "util/half_float.h"
 
 struct match_state {
@@ -408,10 +409,11 @@ bitsize_tree_filter_down(bitsize_tree *tree, unsigned 
size)
 }
 
 static nir_alu_src
-construct_value(const nir_search_value *value,
+construct_value(nir_builder *build,
+const nir_search_value *value,
 unsigned num_components, bitsize_tree *bitsize,
 struct match_state *state,
-nir_instr *instr, void *mem_ctx)
+nir_instr *instr)
 {
switch (value->type) {
case nir_search_value_expression: {
@@ -420,7 +422,7 @@ construct_value(const nir_search_value *value,
   if (nir_op_infos[expr->opcode].output_size != 0)
  num_components = nir_op_infos[expr->opcode].output_size;
 
-  nir_alu_instr *alu = nir_alu_instr_create(mem_ctx, expr->opcode);
+  nir_alu_instr *alu = nir_alu_instr_create(build->shader, expr->opcode);
   nir_ssa_dest_init(>instr, >dest.dest, num_components,
 bitsize->dest_size, NULL);
   alu->dest.write_mask = (1 << num_components) - 1;
@@ -440,12 +442,12 @@ construct_value(const nir_search_value *value,
  if (nir_op_infos[alu->op].input_sizes[i] != 0)
 num_components = nir_op_infos[alu->op].input_sizes[i];
 
- alu->src[i] = construct_value(expr->srcs[i],
+ alu->src[i] = construct_value(build, expr->srcs[i],
num_components, bitsize->srcs[i],
-   state, instr, mem_ctx);
+   state, instr);
   }
 
-  nir_instr_insert_before(instr, >instr);
+  nir_builder_instr_insert(build, >instr);
 
   nir_alu_src val;
   val.src = nir_src_for_ssa(>dest.dest.ssa);
@@ -461,8 +463,8 @@ construct_value(const nir_search_value *value,
   assert(state->variables_seen & (1 << var->variable));
 
   nir_alu_src val = { NIR_SRC_INIT };
-  nir_alu_src_copy(, >variables[var->variable], mem_ctx);
-
+  nir_alu_src_copy(, >variables[var->variable],
+   (void *)build->shader);
   assert(!var->is_constant);
 
   return val;
@@ -470,79 +472,27 @@ construct_value(const nir_search_value *value,
 
case nir_search_value_constant: {
   const nir_search_constant *c = nir_search_value_as_constant(value);
-  nir_load_const_instr *load =
- nir_load_const_instr_create(mem_ctx, 1, bitsize->dest_size);
 
+  nir_ssa_def *cval;
   switch (c->type) {
   case nir_type_float:
- load->def.name = ralloc_asprintf(load, "%f", c->data.d);
- switch (bitsize->dest_size) {
- 

[Mesa-dev] [PATCH 09/31] glsl/nir: Use i2b instead of ine for fixing UBO/SSBO booleans

2018-10-22 Thread Jason Ekstrand
They do the same thing in the end but i2b is a bit simpler.  Also, let's
clean up the mess of code for SSBO handling with one line of builder.
---
 src/compiler/glsl/glsl_to_nir.cpp | 24 +---
 1 file changed, 5 insertions(+), 19 deletions(-)

diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
b/src/compiler/glsl/glsl_to_nir.cpp
index dc6ffa3599d..0479f8fcfe4 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -833,7 +833,7 @@ nir_visitor::visit(ir_call *ir)
   }
 
   nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op);
-  nir_dest *dest = >dest;
+  nir_ssa_def *ret = >dest.ssa;
 
   switch (op) {
   case nir_intrinsic_atomic_counter_read_deref:
@@ -1037,22 +1037,8 @@ nir_visitor::visit(ir_call *ir)
   * consider a true boolean to be ~0. Fix this up with a != 0
   * comparison.
   */
- if (type->is_boolean()) {
-nir_alu_instr *load_ssbo_compare =
-   nir_alu_instr_create(shader, nir_op_ine);
-load_ssbo_compare->src[0].src.is_ssa = true;
-load_ssbo_compare->src[0].src.ssa = >dest.ssa;
-load_ssbo_compare->src[1].src =
-   nir_src_for_ssa(nir_imm_int(, 0));
-for (unsigned i = 0; i < type->vector_elements; i++)
-   load_ssbo_compare->src[1].swizzle[i] = 0;
-nir_ssa_dest_init(_ssbo_compare->instr,
-  _ssbo_compare->dest.dest,
-  type->vector_elements, bit_size, NULL);
-load_ssbo_compare->dest.write_mask = (1 << type->vector_elements) 
- 1;
-nir_builder_instr_insert(, _ssbo_compare->instr);
-dest = _ssbo_compare->dest.dest;
- }
+ if (type->is_boolean())
+ret = nir_i2b(, >dest.ssa);
  break;
   }
   case nir_intrinsic_ssbo_atomic_add:
@@ -1243,7 +1229,7 @@ nir_visitor::visit(ir_call *ir)
   }
 
   if (ir->return_deref)
- nir_store_deref(, evaluate_deref(ir->return_deref), >ssa, ~0);
+ nir_store_deref(, evaluate_deref(ir->return_deref), ret, ~0);
 
   return;
}
@@ -1403,7 +1389,7 @@ nir_visitor::visit(ir_expression *ir)
*/
 
   if (ir->type->is_boolean())
- this->result = nir_ine(, >dest.ssa, nir_imm_int(, 0));
+ this->result = nir_i2b(, >dest.ssa);
 
   return;
}
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/31] nir/builder: Handle 16-bit floats in nir_imm_floatN_t

2018-10-22 Thread Jason Ekstrand
---
 src/compiler/nir/nir_builder.h | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h
index 5ea0a5a2637..3271a480520 100644
--- a/src/compiler/nir/nir_builder.h
+++ b/src/compiler/nir/nir_builder.h
@@ -25,6 +25,7 @@
 #define NIR_BUILDER_H
 
 #include "nir_control_flow.h"
+#include "util/half_float.h"
 
 struct exec_list;
 
@@ -228,6 +229,17 @@ nir_imm_false(nir_builder *build)
return nir_imm_bool(build, false);
 }
 
+static inline nir_ssa_def *
+nir_imm_float16(nir_builder *build, float x)
+{
+   nir_const_value v;
+
+   memset(, 0, sizeof(v));
+   v.u16[0] = _mesa_float_to_half(x);
+
+   return nir_build_imm(build, 1, 16, v);
+}
+
 static inline nir_ssa_def *
 nir_imm_float(nir_builder *build, float x)
 {
@@ -254,6 +266,8 @@ static inline nir_ssa_def *
 nir_imm_floatN_t(nir_builder *build, double x, unsigned bit_size)
 {
switch (bit_size) {
+   case 16:
+  return nir_imm_float16(build, x);
case 32:
   return nir_imm_float(build, x);
case 64:
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/31] nir/validate: Print when the validation failed

2018-10-22 Thread Jason Ekstrand
---
 src/amd/vulkan/radv_shader.c  |  4 +--
 src/compiler/nir/nir.h| 12 +++
 src/compiler/nir/nir_validate.c   | 14 ++---
 src/compiler/nir/tests/vars_tests.cpp | 38 +++
 src/intel/compiler/brw_nir.c  |  8 ++---
 src/intel/vulkan/anv_pipeline.c   |  2 +-
 src/mesa/drivers/dri/i965/brw_program.c   |  4 +--
 src/mesa/main/glspirv.c   |  2 +-
 src/mesa/state_tracker/st_glsl_to_nir.cpp |  2 +-
 9 files changed, 46 insertions(+), 40 deletions(-)

diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 15c9de1e020..edeaefbc1a2 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -189,7 +189,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
 * and just use the NIR shader */
nir = module->nir;
nir->options = _options;
-   nir_validate_shader(nir);
+   nir_validate_shader(nir, "in internal shader");
 
assert(exec_list_length(>functions) == 1);
struct exec_node *node = exec_list_get_head(>functions);
@@ -251,7 +251,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
   _options, _options);
nir = entry_point->shader;
assert(nir->info.stage == stage);
-   nir_validate_shader(nir);
+   nir_validate_shader(nir, "after spirv_to_nir");
 
free(spec_entries);
 
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 0ba19cbb25d..93d0fb5271c 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2664,7 +2664,7 @@ nir_variable *nir_variable_clone(const nir_variable *c, 
nir_shader *shader);
 nir_shader *nir_shader_serialize_deserialize(void *mem_ctx, nir_shader *s);
 
 #ifndef NDEBUG
-void nir_validate_shader(nir_shader *shader);
+void nir_validate_shader(nir_shader *shader, const char *when);
 void nir_metadata_set_validation_flag(nir_shader *shader);
 void nir_metadata_check_validation_flag(nir_shader *shader);
 
@@ -2698,7 +2698,7 @@ should_print_nir(void)
return should_print;
 }
 #else
-static inline void nir_validate_shader(nir_shader *shader) { (void) shader; }
+static inline void nir_validate_shader(nir_shader *shader, const char *when) { 
(void) shader; (void)when; }
 static inline void nir_metadata_set_validation_flag(nir_shader *shader) { 
(void) shader; }
 static inline void nir_metadata_check_validation_flag(nir_shader *shader) { 
(void) shader; }
 static inline bool should_clone_nir(void) { return false; }
@@ -2706,9 +2706,9 @@ static inline bool should_serialize_deserialize_nir(void) 
{ return false; }
 static inline bool should_print_nir(void) { return false; }
 #endif /* NDEBUG */
 
-#define _PASS(nir, do_pass) do { \
+#define _PASS(pass, nir, do_pass) do {   \
do_pass   \
-   nir_validate_shader(nir); \
+   nir_validate_shader(nir, "after " #pass); \
if (should_clone_nir()) { \
   nir_shader *clone = nir_shader_clone(ralloc_parent(nir), nir); \
   ralloc_free(nir);  \
@@ -2720,7 +2720,7 @@ static inline bool should_print_nir(void) { return false; 
}
} \
 } while (0)
 
-#define NIR_PASS(progress, nir, pass, ...) _PASS(nir,\
+#define NIR_PASS(progress, nir, pass, ...) _PASS(pass, nir,  \
nir_metadata_set_validation_flag(nir);\
if (should_print_nir())   \
   printf("%s\n", #pass); \
@@ -2732,7 +2732,7 @@ static inline bool should_print_nir(void) { return false; 
}
} \
 )
 
-#define NIR_PASS_V(nir, pass, ...) _PASS(nir,\
+#define NIR_PASS_V(nir, pass, ...) _PASS(pass, nir,  \
if (should_print_nir())   \
   printf("%s\n", #pass); \
pass(nir, ##__VA_ARGS__); \
diff --git a/src/compiler/nir/nir_validate.c b/src/compiler/nir/nir_validate.c
index 1852224b520..60d64f9c7a1 100644
--- a/src/compiler/nir/nir_validate.c
+++ b/src/compiler/nir/nir_validate.c
@@ -1146,11 +1146,17 @@ destroy_validate_state(validate_state *state)
 }
 
 static void
-dump_errors(validate_state *state)
+dump_errors(validate_state *state, const char *when)
 {
struct hash_table *errors = state->errors;
 
-   fprintf(stderr, "%d errors:\n", _mesa_hash_table_num_entries(errors));
+   if (when) {
+  

[Mesa-dev] [PATCH 07/31] nir/opt_if: Rework condition propagation

2018-10-22 Thread Jason Ekstrand
Instead of doing our own constant folding, we just emit instructions and
let constant folding happen.  This is substantially simpler and lets us
use the nir_imm_bool helper instead of dealing with the const_value's
ourselves.
---
 src/compiler/nir/nir_opt_if.c | 91 ---
 1 file changed, 30 insertions(+), 61 deletions(-)

diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c
index 0c94aa170b5..60368a0259e 100644
--- a/src/compiler/nir/nir_opt_if.c
+++ b/src/compiler/nir/nir_opt_if.c
@@ -377,31 +377,15 @@ opt_if_loop_terminator(nir_if *nif)
return true;
 }
 
-static void
-replace_if_condition_use_with_const(nir_builder *b, nir_src *use,
-nir_const_value nir_boolean,
-bool if_condition)
-{
-   /* Create const */
-   nir_ssa_def *const_def = nir_build_imm(b, 1, 32, nir_boolean);
-
-   /* Rewrite use to use const */
-   nir_src new_src = nir_src_for_ssa(const_def);
-   if (if_condition)
-  nir_if_rewrite_condition(use->parent_if, new_src);
-   else
-  nir_instr_rewrite_src(use->parent_instr, use, new_src);
-}
-
 static bool
-evaluate_if_condition(nir_if *nif, nir_cursor cursor, uint32_t *value)
+evaluate_if_condition(nir_if *nif, nir_cursor cursor, bool *value)
 {
nir_block *use_block = nir_cursor_current_block(cursor);
if (nir_block_dominates(nir_if_first_then_block(nif), use_block)) {
-  *value = NIR_TRUE;
+  *value = true;
   return true;
} else if (nir_block_dominates(nir_if_first_else_block(nif), use_block)) {
-  *value = NIR_FALSE;
+  *value = false;
   return true;
} else {
   return false;
@@ -460,52 +444,31 @@ propagate_condition_eval(nir_builder *b, nir_if *nif, 
nir_src *use_src,
  nir_src *alu_use, nir_alu_instr *alu,
  bool is_if_condition)
 {
-   bool progress = false;
+   bool bool_value;
+   if (!evaluate_if_condition(nif, b->cursor, _value))
+  return false;
 
-   nir_const_value bool_value;
b->cursor = nir_before_src(alu_use, is_if_condition);
-   if (nir_op_infos[alu->op].num_inputs == 1) {
-  assert(alu->op == nir_op_inot || alu->op == nir_op_b2i);
-
-  if (evaluate_if_condition(nif, b->cursor, _value.u32[0])) {
- assert(nir_src_bit_size(alu->src[0].src) == 32);
-
- nir_const_value result =
-nir_eval_const_opcode(alu->op, 1, 32, _value);
 
- replace_if_condition_use_with_const(b, alu_use, result,
- is_if_condition);
- progress = true;
+   nir_ssa_def *def[2] = { };
+   for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
+  if (alu->src[i].src.ssa == use_src->ssa) {
+ def[i] = nir_imm_bool(b, bool_value);
+  } else {
+ def[i] = alu->src[i].src.ssa;
   }
-   } else {
-  assert(alu->op == nir_op_ior || alu->op == nir_op_iand);
-
-  if (evaluate_if_condition(nif, b->cursor, _value.u32[0])) {
- nir_ssa_def *def[2];
- for (unsigned i = 0; i < 2; i++) {
-if (alu->src[i].src.ssa == use_src->ssa) {
-   def[i] = nir_build_imm(b, 1, 32, bool_value);
-} else {
-   def[i] = alu->src[i].src.ssa;
-}
- }
-
- nir_ssa_def *nalu =
-nir_build_alu(b, alu->op, def[0], def[1], NULL, NULL);
-
- /* Rewrite use to use new alu instruction */
- nir_src new_src = nir_src_for_ssa(nalu);
+   }
+   nir_ssa_def *nalu = nir_build_alu(b, alu->op, def[0], def[1], NULL, NULL);
 
- if (is_if_condition)
-nir_if_rewrite_condition(alu_use->parent_if, new_src);
- else
-nir_instr_rewrite_src(alu_use->parent_instr, alu_use, new_src);
+   /* Rewrite use to use new alu instruction */
+   nir_src new_src = nir_src_for_ssa(nalu);
 
- progress = true;
-  }
-   }
+   if (is_if_condition)
+  nir_if_rewrite_condition(alu_use->parent_if, new_src);
+   else
+  nir_instr_rewrite_src(alu_use->parent_instr, alu_use, new_src);
 
-   return progress;
+   return true;
 }
 
 static bool
@@ -527,11 +490,17 @@ evaluate_condition_use(nir_builder *b, nir_if *nif, 
nir_src *use_src,
 {
bool progress = false;
 
-   nir_const_value value;
b->cursor = nir_before_src(use_src, is_if_condition);
 
-   if (evaluate_if_condition(nif, b->cursor, [0])) {
-  replace_if_condition_use_with_const(b, use_src, value, is_if_condition);
+   bool bool_value;
+   if (evaluate_if_condition(nif, b->cursor, _value)) {
+  /* Rewrite use to use const */
+  nir_src imm_src = nir_src_for_ssa(nir_imm_bool(b, bool_value));
+  if (is_if_condition)
+ nir_if_rewrite_condition(use_src->parent_if, imm_src);
+  else
+ nir_instr_rewrite_src(use_src->parent_instr, use_src, imm_src);
+
   progress = true;
}
 
-- 
2.19.1

___
mesa-dev mailing list

[Mesa-dev] [PATCH 02/31] nir/constant_folding: Add an unreachable to a switch

2018-10-22 Thread Jason Ekstrand
---
 src/compiler/nir/nir_opt_constant_folding.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/compiler/nir/nir_opt_constant_folding.c 
b/src/compiler/nir/nir_opt_constant_folding.c
index e2920e6b3fd..05b47d4c0fe 100644
--- a/src/compiler/nir/nir_opt_constant_folding.c
+++ b/src/compiler/nir/nir_opt_constant_folding.c
@@ -89,6 +89,8 @@ constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
  case 8:
 src[i].u8[j] = load_const->value.u8[instr->src[i].swizzle[j]];
 break;
+ default:
+unreachable("Invalid bit size");
  }
   }
 
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/31] nir/builder: Add a nir_imm_true/false helpers

2018-10-22 Thread Jason Ekstrand
---
 src/compiler/nir/nir_builder.h| 25 ++-
 src/compiler/nir/nir_lower_int64.c|  2 +-
 src/compiler/nir/nir_lower_returns.c  |  4 +--
 src/compiler/nir/nir_lower_subgroups.c|  2 +-
 src/compiler/nir/nir_opt_intrinsics.c |  2 +-
 src/compiler/spirv/vtn_alu.c  |  4 +--
 src/compiler/spirv/vtn_cfg.c  | 10 
 .../compiler/brw_nir_lower_image_load_store.c |  2 +-
 8 files changed, 37 insertions(+), 14 deletions(-)

diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h
index 398fa68c251..5ea0a5a2637 100644
--- a/src/compiler/nir/nir_builder.h
+++ b/src/compiler/nir/nir_builder.h
@@ -205,6 +205,29 @@ nir_build_imm(nir_builder *build, unsigned num_components,
return _const->def;
 }
 
+static inline nir_ssa_def *
+nir_imm_bool(nir_builder *build, bool x)
+{
+   nir_const_value v;
+
+   memset(, 0, sizeof(v));
+   v.u32[0] = x ? NIR_TRUE : NIR_FALSE;
+
+   return nir_build_imm(build, 1, 32, v);
+}
+
+static inline nir_ssa_def *
+nir_imm_true(nir_builder *build)
+{
+   return nir_imm_bool(build, true);
+}
+
+static inline nir_ssa_def *
+nir_imm_false(nir_builder *build)
+{
+   return nir_imm_bool(build, false);
+}
+
 static inline nir_ssa_def *
 nir_imm_float(nir_builder *build, float x)
 {
@@ -489,7 +512,7 @@ nir_bany_inequal(nir_builder *b, nir_ssa_def *src0, 
nir_ssa_def *src1)
 static inline nir_ssa_def *
 nir_bany(nir_builder *b, nir_ssa_def *src)
 {
-   return nir_bany_inequal(b, src, nir_imm_int(b, 0));
+   return nir_bany_inequal(b, src, nir_imm_false(b));
 }
 
 static inline nir_ssa_def *
diff --git a/src/compiler/nir/nir_lower_int64.c 
b/src/compiler/nir/nir_lower_int64.c
index 0d7f165b406..50acc858605 100644
--- a/src/compiler/nir/nir_lower_int64.c
+++ b/src/compiler/nir/nir_lower_int64.c
@@ -86,7 +86,7 @@ lower_udiv64_mod64(nir_builder *b, nir_ssa_def *n, 
nir_ssa_def *d,
* this is always true within the if statement.
*/
   if (n->num_components == 1)
- need_high_div = nir_imm_int(b, NIR_TRUE);
+ need_high_div = nir_imm_true(b);
 
   nir_ssa_def *log2_d_lo = nir_ufind_msb(b, d_lo);
 
diff --git a/src/compiler/nir/nir_lower_returns.c 
b/src/compiler/nir/nir_lower_returns.c
index 9c4881112e3..292671ea8cb 100644
--- a/src/compiler/nir/nir_lower_returns.c
+++ b/src/compiler/nir/nir_lower_returns.c
@@ -198,11 +198,11 @@ lower_returns_in_block(nir_block *block, struct 
lower_returns_state *state)
 
   /* Initialize the variable to 0 */
   b->cursor = nir_before_cf_list(>impl->body);
-  nir_store_var(b, state->return_flag, nir_imm_int(b, NIR_FALSE), 1);
+  nir_store_var(b, state->return_flag, nir_imm_false(b), 1);
}
 
b->cursor = nir_after_block(block);
-   nir_store_var(b, state->return_flag, nir_imm_int(b, NIR_TRUE), 1);
+   nir_store_var(b, state->return_flag, nir_imm_true(b), 1);
 
if (state->loop) {
   /* We're in a loop;  we need to break out of it. */
diff --git a/src/compiler/nir/nir_lower_subgroups.c 
b/src/compiler/nir/nir_lower_subgroups.c
index ee5e8bd644b..70d736b040f 100644
--- a/src/compiler/nir/nir_lower_subgroups.c
+++ b/src/compiler/nir/nir_lower_subgroups.c
@@ -300,7 +300,7 @@ lower_subgroups_intrin(nir_builder *b, nir_intrinsic_instr 
*intrin,
case nir_intrinsic_vote_feq:
case nir_intrinsic_vote_ieq:
   if (options->lower_vote_trivial)
- return nir_imm_int(b, NIR_TRUE);
+ return nir_imm_true(b);
 
   if (options->lower_vote_eq_to_ballot)
  return lower_vote_eq_to_ballot(b, intrin, options);
diff --git a/src/compiler/nir/nir_opt_intrinsics.c 
b/src/compiler/nir/nir_opt_intrinsics.c
index 0bc2f0b3c82..7b054faa204 100644
--- a/src/compiler/nir/nir_opt_intrinsics.c
+++ b/src/compiler/nir/nir_opt_intrinsics.c
@@ -53,7 +53,7 @@ opt_intrinsics_impl(nir_function_impl *impl)
  case nir_intrinsic_vote_feq:
  case nir_intrinsic_vote_ieq:
 if (nir_src_is_const(intrin->src[0]))
-   replacement = nir_imm_int(, NIR_TRUE);
+   replacement = nir_imm_true();
 break;
  default:
 break;
diff --git a/src/compiler/spirv/vtn_alu.c b/src/compiler/spirv/vtn_alu.c
index 38ccd62ae17..6860e7dc090 100644
--- a/src/compiler/spirv/vtn_alu.c
+++ b/src/compiler/spirv/vtn_alu.c
@@ -494,7 +494,7 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
  default: vtn_fail("invalid number of components");
  }
  val->ssa->def = nir_build_alu(>nb, op, src[0],
-   nir_imm_int(>nb, NIR_FALSE),
+   nir_imm_false(>nb),
NULL, NULL);
   }
   break;
@@ -511,7 +511,7 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
  default: vtn_fail("invalid number of components");
  }
  val->ssa->def = nir_build_alu(>nb, op, src[0],
- 

[Mesa-dev] [PATCH 03/31] nir/constant_folding: Use nir_src_as_bool for discard_if

2018-10-22 Thread Jason Ekstrand
Missed one while converting to the nir_src_as_* helpers.
---
 src/compiler/nir/nir_opt_constant_folding.c | 13 +++--
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/compiler/nir/nir_opt_constant_folding.c 
b/src/compiler/nir/nir_opt_constant_folding.c
index 05b47d4c0fe..5929a60aee8 100644
--- a/src/compiler/nir/nir_opt_constant_folding.c
+++ b/src/compiler/nir/nir_opt_constant_folding.c
@@ -131,12 +131,9 @@ constant_fold_intrinsic_instr(nir_intrinsic_instr *instr)
 {
bool progress = false;
 
-   if (instr->intrinsic == nir_intrinsic_discard_if) {
-  nir_const_value *src_val = nir_src_as_const_value(instr->src[0]);
-  if (src_val && src_val->u32[0] == NIR_FALSE) {
- nir_instr_remove(>instr);
- progress = true;
-  } else if (src_val && src_val->u32[0] == NIR_TRUE) {
+   if (instr->intrinsic == nir_intrinsic_discard_if &&
+   nir_src_is_const(instr->src[0])) {
+  if (nir_src_as_bool(instr->src[0])) {
  /* This method of getting a nir_shader * from a nir_instr is
   * admittedly gross, but given the rarity of hitting this case I think
   * it's preferable to plumbing an otherwise unused nir_shader *
@@ -151,6 +148,10 @@ constant_fold_intrinsic_instr(nir_intrinsic_instr *instr)
  nir_instr_insert_before(>instr, >instr);
  nir_instr_remove(>instr);
  progress = true;
+  } else {
+ /* We're not discarding, just delete the instruction */
+ nir_instr_remove(>instr);
+ progress = true;
   }
}
 
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 00/31] nir: Use a 1-bit data type for booleans

2018-10-22 Thread Jason Ekstrand
This is something that Connor and I have talked about quite a bit over the
last couple of months.  The core idea is to replace NIR's current 32-bit
0/-1 D3D10-style booleans with a 1-bit data type.  All in all, I think it
worked out pretty well though I really don't like the proliferation of
32-bit comparison opcodes we now have kicking around for i965.

Why?  No hardware really has a 1-bit type, right?  Well, sort of...  AMD
actually uses 64-bit scalars for booleans with one bit per invocation.
However, most hardware such as Intel uses some other larger value for
booleans.  The real benefit of 1-bit booleans and requiring a lowering pass
is that you can do somewhat custom lowering (like AMD wants) and your
lowering pass can always tell in an instant if a value is a boolean based
on the bit size.  As can be seen in the last patch, this makes it really
easy to implement a bool -> float lowering pass for hardware that doesn't
have real integers where NIR's current booleans are actually rather
painful.

On Intel, the situation is a bit more complicated.  It's tempting to say
that we have 32-bit D3D10 booleans.  However, they're not really D3D10
booleans on gen4-5 because the top 31 bits are undefined garbage and, while
iand, ior, ixor, and inot operations work, you have to iand with 1 at the
last minute to clear off all that garbage.  Also, on all generations, a
comparison of two N-bit values results in an N-bit boolean, not a 32-bit
bool.  This has caused the Igalia folks no end of trouble as they've been
working on native 8 and 16-bit support.  If, instead, we have a 1-bit bool
with a lowering pass and we can lower to whatever we want, then we could
lower to a set of comparison opcodes that return the same bit-size as they
compare and it would match GEN hardware much better.

But what about performance?  Aren't there all sorts of neat tricks we can
do with D3D10 booleans like b & 1.0f for b2f?  As it turns out, not really;
that's about the only one.  There is some small advantage when optimizing
shaders that come from D3D if your native representation of booleans
matches that of D3D.  However, penultimate patch in this series adds a few
small optimizations that get us to actually better than we were before.
With the entire series, shader-db on Kaby Lak looks like this:

total instructions in shared programs: 15084098 -> 14988578 (-0.63%)
instructions in affected programs: 1321114 -> 1225594 (-7.23%)
helped: 2340
HURT: 23

total cycles in shared programs: 369790134 -> 359798399 (-2.70%)
cycles in affected programs: 134085452 -> 124093717 (-7.45%)
helped: 2149
HURT: 720

total loops in shared programs: 4393 -> 4393 (0.00%)
loops in affected programs: 0 -> 0
helped: 0
HURT: 0

total spills in shared programs: 10158 -> 10051 (-1.05%)
spills in affected programs: 1429 -> 1322 (-7.49%)
helped: 8
HURT: 15

total fills in shared programs: 22105 -> 21720 (-1.74%)
fills in affected programs: 2853 -> 2468 (-13.49%)
helped: 12
HURT: 15

How about ease of use?  Are they a pain to deal with?  Yes, adding support
for 1-bit types was a bit awkward in a few places but most of it was
dealing with all the places where we have 32-bit booleans baked into
assumptions.  Getting rid of that baking in solves the problem and also
makes the whole IR more future-proof.

All in all, I'd say I'm pretty happy with it.  However, I'd like other
people (particularly the AMD folks) to play with it a bit and verify that
it solves their problems as well.  Also, I added a lowering pass and tried
to turn it on in everyone's driver but may not have put it in the right
spot.  Please double-check my work.  For those wishing to take a look, you
can also find the entire series on my gitlab here:

https://gitlab.freedesktop.org/jekstrand/mesa/commits/review/nir-1-bit-bool

Please review!

--Jason

Cc: Connor Abbott 
Cc: Timothy Arceri 
Cc: Eric Anholt 
Cc: Rob Clark 
Cc: Karol Herbst 
Cc: Bas Nieuwenhuizen 
Cc: Alyssa Rosenzweig 


Jason Ekstrand (31):
  nir/validate: Print when the validation failed
  nir/constant_folding: Add an unreachable to a switch
  nir/constant_folding: Use nir_src_as_bool for discard_if
  nir/builder: Add a nir_imm_true/false helpers
  nir/builder: Handle 16-bit floats in nir_imm_floatN_t
  nir/search: Use nir_builder
  nir/opt_if: Rework condition propagation
  nir/system_values: Use the bit size from the load_deref
  glsl/nir: Use i2b instead of ine for fixing UBO/SSBO booleans
  nir/prog: Use nir_bany in kill handling
  spirv: Use the right bit-size for spec constant ops
  spirv: Initialize subgroup destinations with the destination type
  intel/nir: Use the OPT macro for more passes
  nir/algebraic: Disable b2f lowering and two optimizations
  nir: Rename boolean-related opcodes to include 32 in the name
  FIXUP: nir/builder: Generate 32-bit bool opcodes transparently
  FIXUP: nir/algebraic: Remap boolean opcodes to the 32-bit variant
  

Re: [Mesa-dev] [PATCH] intel/compiler: Print floating point values upto precision 8

2018-10-22 Thread Sagar Ghuge


On 10/22/18 10:34 AM, Matt Turner wrote:
> On Fri, Oct 5, 2018 at 11:15 AM Sagar Ghuge  wrote:
>>
>> avoid misinterpretation of encoded immediate values in instruction and
>> disassembled output.
>>
>> Signed-off-by: Sagar Ghuge 
>> ---
>> While encoding the immediate floating point values in instruction we use
>> values upto precision 8, but while disassembling, we print precision to
>> 6 places, which round up the value and gives wrong interpretation for
>> encoded immediate constant. Printing it upto precision 8 will help in
>> reassembling it again.
> 
> Let's put that in the commit message.
> 
>>  src/intel/compiler/brw_disasm.c | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/src/intel/compiler/brw_disasm.c 
>> b/src/intel/compiler/brw_disasm.c
>> index 322f4544df..7cbbc080b3 100644
>> --- a/src/intel/compiler/brw_disasm.c
>> +++ b/src/intel/compiler/brw_disasm.c
>> @@ -1293,7 +1293,7 @@ imm(FILE *file, const struct gen_device_info *devinfo, 
>> enum brw_reg_type type,
>>format(file, "0x%08xV", brw_inst_imm_ud(devinfo, inst));
>>break;
>> case BRW_REGISTER_TYPE_F:
>> -  format(file, "%-gF", brw_inst_imm_f(devinfo, inst));
>> +  format(file, "%.8fF", brw_inst_imm_f(devinfo, inst));
> 
> I'm not sure 8 digits is sufficient to get an exact representation
> that the assembler can "round-trip". This page [1] indicates that 9
> digits are necessary for binary->decimal->binary round-trips.
> 
I was also not sure about it, [1] article is nice. 
 
> NIR takes another approach:
> 
> vec1 32 ssa_0 = load_const (0x3f80 /* 1.00 */)
> 
> What do you think about printing the binary representation and the
> floating-point value? That way humans can easily read one number and
> the assembler can easily read the other :)
> 
I think we can just print F and DF to 9 and 17 precision respectively to avoid
output alignment issue.

> Also, I think the DF case immediately after this should be handled as well.
> 
Yes,  I was planning to handle it when I shift to 64 bit datatypes. But I can 
club both in single
patch. 
> [1] 
> https://www.exploringbinary.com/number-of-digits-required-for-round-trip-conversions/
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] i965/miptree: Use cpu tiling/detiling when mapping

2018-10-22 Thread Matt Turner
On Mon, Sep 24, 2018 at 4:20 AM Tapani Pälli  wrote:
>
> From: Scott D Phillips 
>
> Rename the (un)map_gtt functions to (un)map_map (map by
> returning a map) and add new functions (un)map_tiled_memcpy that
> return a shadow buffer populated with the intel_tiled_memcpy
> functions.
>
> Tiling/detiling with the cpu will be the only way to handle Yf/Ys
> tiling, when support is added for those formats.
>
> v2: Compute extents properly in the x|y-rounded-down case (Chris Wilson)
>
> v3: Add units to parameter names of tile_extents (Nanley Chery)
> Use _mesa_align_malloc for the shadow copy (Nanley)
> Continue using gtt maps on gen4 (Nanley)
>
> v4: Use streaming_load_memcpy when detiling
>
> v5: (edited by Ken) Move map_tiled_memcpy above map_movntdqa, so it
> takes precedence.  Add intel_miptree_access_raw, needed after
> rebasing on commit b499b85b0f2cc0c82b7c9af91502c2814fdc8e67.
>
> v6: refactor to changes done for sse41 separation (Tapani)
>
> Reviewed-by: Chris Wilson  (v5)
> Reviewed-by: Kenneth Graunke  (v5)
>
> Signed-off-by: Tapani Pälli 
> ---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 110 +-
>  1 file changed, 106 insertions(+), 4 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index 36681352ba7..4c2cee8ebba 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -31,6 +31,8 @@
>  #include "intel_image.h"
>  #include "intel_mipmap_tree.h"
>  #include "intel_tex.h"
> +#include "intel_tiled_memcpy.h"
> +#include "intel_tiled_memcpy_sse41.h"
>  #include "intel_blit.h"
>  #include "intel_fbo.h"
>
> @@ -2998,7 +3000,7 @@ intel_miptree_unmap_raw(struct intel_mipmap_tree *mt)
>  }
>
>  static void
> -intel_miptree_unmap_gtt(struct brw_context *brw,
> +intel_miptree_unmap_map(struct brw_context *brw,
>  struct intel_mipmap_tree *mt,
>  struct intel_miptree_map *map,
>  unsigned int level, unsigned int slice)
> @@ -3007,7 +3009,7 @@ intel_miptree_unmap_gtt(struct brw_context *brw,
>  }
>
>  static void
> -intel_miptree_map_gtt(struct brw_context *brw,
> +intel_miptree_map_map(struct brw_context *brw,
>   struct intel_mipmap_tree *mt,
>   struct intel_miptree_map *map,
>   unsigned int level, unsigned int slice)
> @@ -3055,7 +3057,7 @@ intel_miptree_map_gtt(struct brw_context *brw,
> mt, _mesa_get_format_name(mt->format),
> x, y, map->ptr, map->stride);
>
> -   map->unmap = intel_miptree_unmap_gtt;
> +   map->unmap = intel_miptree_unmap_map;
>  }
>
>  static void
> @@ -3087,6 +3089,101 @@ intel_miptree_unmap_blit(struct brw_context *brw,
> intel_miptree_release(>linear_mt);
>  }
>
> +/* Compute extent parameters for use with tiled_memcpy functions.
> + * xs are in units of bytes and ys are in units of strides.
> + */
> +static inline void
> +tile_extents(struct intel_mipmap_tree *mt, struct intel_miptree_map *map,
> + unsigned int level, unsigned int slice, unsigned int *x1_B,
> + unsigned int *x2_B, unsigned int *y1_el, unsigned int *y2_el)
> +{
> +   unsigned int block_width, block_height;
> +   unsigned int x0_el, y0_el;
> +
> +   _mesa_get_format_block_size(mt->format, _width, _height);
> +
> +   assert(map->x % block_width == 0);
> +   assert(map->y % block_height == 0);
> +
> +   intel_miptree_get_image_offset(mt, level, slice, _el, _el);
> +   *x1_B = (map->x / block_width + x0_el) * mt->cpp;
> +   *y1_el = map->y / block_height + y0_el;
> +   *x2_B = (DIV_ROUND_UP(map->x + map->w, block_width) + x0_el) * mt->cpp;
> +   *y2_el = DIV_ROUND_UP(map->y + map->h, block_height) + y0_el;
> +}
> +
> +static void
> +intel_miptree_unmap_tiled_memcpy(struct brw_context *brw,
> + struct intel_mipmap_tree *mt,
> + struct intel_miptree_map *map,
> + unsigned int level,
> + unsigned int slice)
> +{
> +   if (map->mode & GL_MAP_WRITE_BIT) {
> +  unsigned int x1, x2, y1, y2;
> +  tile_extents(mt, map, level, slice, , , , );
> +
> +  char *dst = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
> +  dst += mt->offset;
> +
> +  linear_to_tiled(x1, x2, y1, y2, dst, map->ptr, mt->surf.row_pitch,
> +  map->stride, brw->has_swizzling, mt->surf.tiling,
> +  INTEL_COPY_MEMCPY);
> +
> +  intel_miptree_unmap_raw(mt);
> +   }
> +   _mesa_align_free(map->buffer);
> +   map->buffer = map->ptr = NULL;
> +}
> +
> +static void
> +intel_miptree_map_tiled_memcpy(struct brw_context *brw,
> +   struct intel_mipmap_tree *mt,
> +   struct intel_miptree_map *map,
> +   unsigned int level, unsigned int slice)
> 

Re: [Mesa-dev] [PATCH] intel: Fix decoding for partial STATE_BASE_ADDRESS updates.

2018-10-22 Thread Kenneth Graunke
On Monday, October 22, 2018 2:57:10 AM PDT Lionel Landwerlin wrote:
> Could you maybe update src/intel/tools/aubinator_viewer_decoder.cpp 
> (function handle_state_base_address) ?
> 
> Either way :
> 
> Reviewed-by: Lionel Landwerlin 

Oops, good call, thanks!

Pushed a v2 that updates both.


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] intel/eu: Don't apply chansel when repctrl is set

2018-10-22 Thread Sagar Ghuge


On 10/22/18 10:14 AM, Matt Turner wrote:
> On Tue, Oct 16, 2018 at 4:57 PM Sagar Ghuge  wrote:
>>
> 
> Let's describe a little of why we're doing this and how we found it.
> If I recall correctly, we set a NOP (XYZW) swizzle on 3-src
> instructions, except we set an  swizzle on LRP. Is that correct?
> 

Yes. I will resend patch with full description. 

>> Signed-off-by: Sagar Ghuge 
>> ---
>>  src/intel/compiler/brw_eu_emit.c | 36 
>>  1 file changed, 27 insertions(+), 9 deletions(-)
>>
>> diff --git a/src/intel/compiler/brw_eu_emit.c 
>> b/src/intel/compiler/brw_eu_emit.c
>> index 0cbc682ebc..a6b45fcb1a 100644
>> --- a/src/intel/compiler/brw_eu_emit.c
>> +++ b/src/intel/compiler/brw_eu_emit.c
>> @@ -765,31 +765,49 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, 
>> struct brw_reg dest,
>>brw_inst_set_3src_a16_dst_writemask(devinfo, inst, dest.writemask);
>>
>>assert(src0.file == BRW_GENERAL_REGISTER_FILE);
>> -  brw_inst_set_3src_a16_src0_swizzle(devinfo, inst, src0.swizzle);
>>brw_inst_set_3src_a16_src0_subreg_nr(devinfo, inst, 
>> get_3src_subreg_nr(src0));
>>brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr);
>>brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
>>brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate);
>> -  brw_inst_set_3src_a16_src0_rep_ctrl(devinfo, inst,
>> -  src0.vstride == 
>> BRW_VERTICAL_STRIDE_0);
>> +
>> +  /* RepCtrl field in Source or Destination Operand table in BDW Bspec
>> +   * says:
>> +   *"ChanSel does not apply when Replicate Control is set."
>> +   */
>> +  if (src0.vstride == BRW_VERTICAL_STRIDE_0)
>> + brw_inst_set_3src_a16_src0_rep_ctrl(devinfo, inst, true);
>> +  else
>> + brw_inst_set_3src_a16_src0_swizzle(devinfo, inst, src0.swizzle);
> 
> I see that the brw_vec1_reg() function uses an  swizzle. That
> indeed determines what swizzle scalar sources (i.e., those with
> rep_ctrl set) will have.
> 
> I would rather we always set the swizzle (so as to not complicate the
> code here) and instead figure out why the brw_reg's swizzle field is
> sometimes XYZW fix that.
> 
> I modified brw_disasm.c locally to always print the swizzle on 3-src
> operands and using the piglit tests
> 
> generated_tests/spec/glsl-1.10/execution/built-in-functions/fs-mix-vec2-vec2-vec2.shader_test
> (for LRP)
> tests/spec/arb_gpu_shader5/execution/built-in-functions/fs-fma.shader_test
> (for MAD)
> 
> I cannot confirm that the swizzles are different between MAD and LRP.
> What am I missing?
> 
> lrp(8)  g4<1>F  g2.4<0,1,0>.xF  g2.2<0,1,0>.xF
> g2.0<0,1,0>.xF { align16 1Q };
> mad(8)  g4<1>F  g3.0<0,1,0>.xF  g2.4<0,1,0>.xF
> g2.0<0,1,0>.xF { align16 1Q };
> 
My bad, swizzles are not different (I will investigate more with different 
test), 
but while assembling we set both the rep_ctrl 
and the swizzle bits without checking region is scalar or not, 

https://gitlab.freedesktop.org/mesa/mesa/blob/master/src/intel/compiler/brw_eu_emit.c#L768

and while disassembling we check that if the region is scalar g1<0,1,0> then we 
don't 
disassemble the swizzle bits. For src0/src1/src2 in 3src operand,

https://gitlab.freedesktop.org/mesa/mesa/blob/master/src/intel/compiler/brw_disasm.c#L1103

That's where we have different behavior for assembling and disassembling the 
instruction. 

BDW+ Bspec says RepCtrl : "This field is only present in three-source 
instructions, 
for each of the three source operands. It controls replication of the starting 
channel
to all channels in the execution size. ChanSel does not apply when Replicate 
Control is set."
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] meson: don't require libelf for r600 without LLVM

2018-10-22 Thread Dylan Baker
And pushed, thanks!

Quoting Matt Turner (2018-10-22 10:40:36)
> Thanks Dylan!
> 
> Reviewed-by: Matt Turner 


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 108508] Graphic glitches with stream output support on OLAND AMD GPU GCN 1.0

2018-10-22 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=108508

--- Comment #5 from Alex Deucher  ---
(In reply to Ahmed Elsayed from comment #4)
> I am sorry. You mean mesa-dev@lists.freedesktop.org ?

Correct.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] meson: don't require libelf for r600 without LLVM

2018-10-22 Thread Matt Turner
Thanks Dylan!

Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] intel/compiler: Print floating point values upto precision 8

2018-10-22 Thread Matt Turner
On Fri, Oct 5, 2018 at 11:15 AM Sagar Ghuge  wrote:
>
> avoid misinterpretation of encoded immediate values in instruction and
> disassembled output.
>
> Signed-off-by: Sagar Ghuge 
> ---
> While encoding the immediate floating point values in instruction we use
> values upto precision 8, but while disassembling, we print precision to
> 6 places, which round up the value and gives wrong interpretation for
> encoded immediate constant. Printing it upto precision 8 will help in
> reassembling it again.

Let's put that in the commit message.

>  src/intel/compiler/brw_disasm.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/intel/compiler/brw_disasm.c b/src/intel/compiler/brw_disasm.c
> index 322f4544df..7cbbc080b3 100644
> --- a/src/intel/compiler/brw_disasm.c
> +++ b/src/intel/compiler/brw_disasm.c
> @@ -1293,7 +1293,7 @@ imm(FILE *file, const struct gen_device_info *devinfo, 
> enum brw_reg_type type,
>format(file, "0x%08xV", brw_inst_imm_ud(devinfo, inst));
>break;
> case BRW_REGISTER_TYPE_F:
> -  format(file, "%-gF", brw_inst_imm_f(devinfo, inst));
> +  format(file, "%.8fF", brw_inst_imm_f(devinfo, inst));

I'm not sure 8 digits is sufficient to get an exact representation
that the assembler can "round-trip". This page [1] indicates that 9
digits are necessary for binary->decimal->binary round-trips.

NIR takes another approach:

vec1 32 ssa_0 = load_const (0x3f80 /* 1.00 */)

What do you think about printing the binary representation and the
floating-point value? That way humans can easily read one number and
the assembler can easily read the other :)

Also, I think the DF case immediately after this should be handled as well.

[1] 
https://www.exploringbinary.com/number-of-digits-required-for-round-trip-conversions/
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] intel/eu: Don't apply chansel when repctrl is set

2018-10-22 Thread Matt Turner
On Tue, Oct 16, 2018 at 4:57 PM Sagar Ghuge  wrote:
>

Let's describe a little of why we're doing this and how we found it.
If I recall correctly, we set a NOP (XYZW) swizzle on 3-src
instructions, except we set an  swizzle on LRP. Is that correct?

> Signed-off-by: Sagar Ghuge 
> ---
>  src/intel/compiler/brw_eu_emit.c | 36 
>  1 file changed, 27 insertions(+), 9 deletions(-)
>
> diff --git a/src/intel/compiler/brw_eu_emit.c 
> b/src/intel/compiler/brw_eu_emit.c
> index 0cbc682ebc..a6b45fcb1a 100644
> --- a/src/intel/compiler/brw_eu_emit.c
> +++ b/src/intel/compiler/brw_eu_emit.c
> @@ -765,31 +765,49 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct 
> brw_reg dest,
>brw_inst_set_3src_a16_dst_writemask(devinfo, inst, dest.writemask);
>
>assert(src0.file == BRW_GENERAL_REGISTER_FILE);
> -  brw_inst_set_3src_a16_src0_swizzle(devinfo, inst, src0.swizzle);
>brw_inst_set_3src_a16_src0_subreg_nr(devinfo, inst, 
> get_3src_subreg_nr(src0));
>brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr);
>brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
>brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate);
> -  brw_inst_set_3src_a16_src0_rep_ctrl(devinfo, inst,
> -  src0.vstride == 
> BRW_VERTICAL_STRIDE_0);
> +
> +  /* RepCtrl field in Source or Destination Operand table in BDW Bspec
> +   * says:
> +   *"ChanSel does not apply when Replicate Control is set."
> +   */
> +  if (src0.vstride == BRW_VERTICAL_STRIDE_0)
> + brw_inst_set_3src_a16_src0_rep_ctrl(devinfo, inst, true);
> +  else
> + brw_inst_set_3src_a16_src0_swizzle(devinfo, inst, src0.swizzle);

I see that the brw_vec1_reg() function uses an  swizzle. That
indeed determines what swizzle scalar sources (i.e., those with
rep_ctrl set) will have.

I would rather we always set the swizzle (so as to not complicate the
code here) and instead figure out why the brw_reg's swizzle field is
sometimes XYZW fix that.

I modified brw_disasm.c locally to always print the swizzle on 3-src
operands and using the piglit tests

generated_tests/spec/glsl-1.10/execution/built-in-functions/fs-mix-vec2-vec2-vec2.shader_test
(for LRP)
tests/spec/arb_gpu_shader5/execution/built-in-functions/fs-fma.shader_test
(for MAD)

I cannot confirm that the swizzles are different between MAD and LRP.
What am I missing?

lrp(8)  g4<1>F  g2.4<0,1,0>.xF  g2.2<0,1,0>.xF
g2.0<0,1,0>.xF { align16 1Q };
mad(8)  g4<1>F  g3.0<0,1,0>.xF  g2.4<0,1,0>.xF
g2.0<0,1,0>.xF { align16 1Q };
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] vulkan/wsi: use the drmGetDevice2() API

2018-10-22 Thread Jason Ekstrand
On Mon, Oct 22, 2018 at 12:10 PM Emil Velikov 
wrote:

> From: Emil Velikov 
>
> On older kernels, the drmGetDevice() call will wake up all the GPUs
> on the system, while fetching the PCI revision.
>
> Use the 2 version of the API and pass flags == 0, so we don't fetch the
> device PCI revision, since we don't need that information.
>

As long as we still get all the PCI address information,

Reviewed-by: Jason Ekstrand 

I had no idea one of them work up the device and the other didn't...

--Jason


> Fixes: baa38c144f6 ("vulkan/wsi: Use VK_EXT_pci_bus_info for DRM fd
> matching")
> Cc: Jason Ekstrand 
> Signed-off-by: Emil Velikov 
> ---
>  src/vulkan/wsi/wsi_common.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/vulkan/wsi/wsi_common.c b/src/vulkan/wsi/wsi_common.c
> index 51d8655a5a9..1cd5f8d62c5 100644
> --- a/src/vulkan/wsi/wsi_common.c
> +++ b/src/vulkan/wsi/wsi_common.c
> @@ -135,7 +135,7 @@ bool
>  wsi_device_matches_drm_fd(const struct wsi_device *wsi, int drm_fd)
>  {
> drmDevicePtr fd_device;
> -   int ret = drmGetDevice(drm_fd, _device);
> +   int ret = drmGetDevice2(drm_fd, 0, _device);
> if (ret)
>return false;
>
> --
> 2.19.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] vulkan/wsi: use the drmGetDevice2() API

2018-10-22 Thread Emil Velikov
From: Emil Velikov 

On older kernels, the drmGetDevice() call will wake up all the GPUs
on the system, while fetching the PCI revision.

Use the 2 version of the API and pass flags == 0, so we don't fetch the
device PCI revision, since we don't need that information.

Fixes: baa38c144f6 ("vulkan/wsi: Use VK_EXT_pci_bus_info for DRM fd matching")
Cc: Jason Ekstrand 
Signed-off-by: Emil Velikov 
---
 src/vulkan/wsi/wsi_common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/vulkan/wsi/wsi_common.c b/src/vulkan/wsi/wsi_common.c
index 51d8655a5a9..1cd5f8d62c5 100644
--- a/src/vulkan/wsi/wsi_common.c
+++ b/src/vulkan/wsi/wsi_common.c
@@ -135,7 +135,7 @@ bool
 wsi_device_matches_drm_fd(const struct wsi_device *wsi, int drm_fd)
 {
drmDevicePtr fd_device;
-   int ret = drmGetDevice(drm_fd, _device);
+   int ret = drmGetDevice2(drm_fd, 0, _device);
if (ret)
   return false;
 
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] intel/compiler: Change src1 reg type to unsigned doubleword

2018-10-22 Thread Matt Turner
On Mon, Oct 22, 2018 at 9:53 AM Sagar Ghuge  wrote:
>
> Thank you for reviewing the patch.
>
> On 10/22/18 5:02 AM, Samuel Iglesias Gonsálvez wrote:
> >
> >
> > On 20/10/18 3:25, Sagar Ghuge wrote:
> >> To have uniform behavior while disassembling send(c) instruction use
> >> register type of unsigned doubleword for src1 when message descriptor is
> >> immediate value. Bspec does not specifiy anything for src1 immediate
> >
> > s/specifiy/specify
> >
> oops, I will fix it and resend the patch. I don't have push access.

Don't worry. I'll fix it when I commit it.

Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] intel/compiler: Change src1 reg type to unsigned doubleword

2018-10-22 Thread Sagar Ghuge
Thank you for reviewing the patch. 

On 10/22/18 5:02 AM, Samuel Iglesias Gonsálvez wrote:
> 
> 
> On 20/10/18 3:25, Sagar Ghuge wrote:
>> To have uniform behavior while disassembling send(c) instruction use
>> register type of unsigned doubleword for src1 when message descriptor is
>> immediate value. Bspec does not specifiy anything for src1 immediate
> 
> s/specifiy/specify
> 
oops, I will fix it and resend the patch. I don't have push access. 

> With that fixed and assuming no problems appeared on Intel CI,
> 
Yes, I ran through CI and it looks like there are no failures. 

> Reviewed-by: Samuel Iglesias Gonsálvez 
> 
> Sam
> 
>> default type.
>>
>> Signed-off-by: Sagar Ghuge 
>> ---
>>  src/intel/compiler/brw_eu_emit.c| 2 +-
>>  src/intel/compiler/brw_fs_generator.cpp | 4 ++--
>>  2 files changed, 3 insertions(+), 3 deletions(-)
>>
>> diff --git a/src/intel/compiler/brw_eu_emit.c 
>> b/src/intel/compiler/brw_eu_emit.c
>> index 0cbc682ebc..4630b83b1a 100644
>> --- a/src/intel/compiler/brw_eu_emit.c
>> +++ b/src/intel/compiler/brw_eu_emit.c
>> @@ -371,7 +371,7 @@ brw_set_desc_ex(struct brw_codegen *p, brw_inst *inst,
>> assert(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
>>brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC);
>> brw_inst_set_src1_file_type(devinfo, inst,
>> -   BRW_IMMEDIATE_VALUE, BRW_REGISTER_TYPE_D);
>> +   BRW_IMMEDIATE_VALUE, BRW_REGISTER_TYPE_UD);
>> brw_inst_set_send_desc(devinfo, inst, desc);
>> if (devinfo->gen >= 9)
>>brw_inst_set_send_ex_desc(devinfo, inst, ex_desc);
>> diff --git a/src/intel/compiler/brw_fs_generator.cpp 
>> b/src/intel/compiler/brw_fs_generator.cpp
>> index cb402cd4e7..08dd83dded 100644
>> --- a/src/intel/compiler/brw_fs_generator.cpp
>> +++ b/src/intel/compiler/brw_fs_generator.cpp
>> @@ -630,7 +630,7 @@ fs_generator::generate_urb_write(fs_inst *inst, struct 
>> brw_reg payload)
>>  
>> brw_set_dest(p, insn, brw_null_reg());
>> brw_set_src0(p, insn, payload);
>> -   brw_set_src1(p, insn, brw_imm_d(0));
>> +   brw_set_src1(p, insn, brw_imm_ud(0u));
>>  
>> brw_inst_set_sfid(p->devinfo, insn, BRW_SFID_URB);
>> brw_inst_set_urb_opcode(p->devinfo, insn, GEN8_URB_OPCODE_SIMD8_WRITE);
>> @@ -659,7 +659,7 @@ fs_generator::generate_cs_terminate(fs_inst *inst, 
>> struct brw_reg payload)
>>  
>> brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW));
>> brw_set_src0(p, insn, retype(payload, BRW_REGISTER_TYPE_UW));
>> -   brw_set_src1(p, insn, brw_imm_d(0));
>> +   brw_set_src1(p, insn, brw_imm_ud(0u));
>>  
>> /* Terminate a compute shader by sending a message to the thread spawner.
>>  */
>>
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/9] nir: Add better helpers for handling constant sources

2018-10-22 Thread Stuart Young
Re: The list.

It doesn't always happen every month, but I've seen it multiple times now,
as far back as January 2018, though possibly earlier.

The gzip mail archive however has all the messages that are missing from
the web archive.
https://lists.freedesktop.org/archives/mesa-dev/2018-October.txt.gz

There are patches in that series missing however, as you pointed out.

That said, looking at that file I see that the last message that didn't get
processed had the following Message ID header:

Message-ID: <20181020175547.9159-6-jason.ekstr...@intel.com>

The messages are a bit out of order there, which may or may not be related.
I can't see anything else that seems out of the ordinary.

I also get DMARC issues for certain emails (they fail DMARC, meaning that
they've not been handled properly when passing through the list). I notice
this particularly for emails from Liviu Prodea liviupro...@yahoo.com but
sometimes for other ppl too.

Perhaps a later version of Mailman (eg: 2.1.29?) might resolve some/both of
these issues?


On Mon, 22 Oct 2018 at 22:31, Samuel Iglesias Gonsálvez <
sigles...@igalia.com> wrote:

> Patch 7/9 has never arrived my inbox and checking the archives [0], looks
> like
> the archive is messed up... it has only a few emails. Does somebody know
> what
> happened with the ML archive?
>
> Sam
>
> [0] https://lists.freedesktop.org/archives/mesa-dev/2018-October/date.html
>
> On Saturday, 20 October 2018 19:55:38 (CEST) Jason Ekstrand wrote:
> > Previously, the only thing we had was nir_src_as_const_value which
> returns
> > a pointer to a nir_const_value which was NULL if the source wasn't
> actually
> > a constant.  This was great except that almost none of the users
> considered
> > anything other than 32-bit values.  With 8, 16, and 64-bit values
> floating
> > around, we really shouldn't be hand-rolling it everywhere.  Most of the
> > code is currently safe if you operate under the assumption that things
> like
> > array indices are always 32 bits.  The glaring exception was some of the
> > helpers in nir_search_helpers.h where we definitely cannot be making that
> > assumption but were anyway.
> >
> > I've converted core NIR and i965 but have not written patches for vc4,
> ir3,
> > or radeon.
> >
> > Cc: Connor Abbott 
> > Cc: Timothy Arceri 
> > Cc: Eric Anholt 
> > Cc: Rob Clark 
> > Cc: Karol Herbst 
> > Cc: Bas Nieuwenhuizen 
> >
> > Jason Ekstrand (9):
> >   nir: Add some new helpers for working with const sources
> >   nir/search: Use nir_src_is_const and friends
> >   nir/search_helpers: Use nir_src_is_const and friends
> >   nir: Use nir_src_is_const and nir_src_as_* in core code
> >   intel/fs,vec4: Clean up a repeated pattern with SSBOs
> >   intel/fs: Use the new nir_src_is_const and friends
> >   intel/vec4: Use the new nir_src_is_const and friends
> >   intel/analyze_ubo_ranges: Use nir_src_is_const and friends
> >   anv: Use nir_src_is_const and friends in lowering code
> >
> >  src/compiler/glsl/gl_nir_lower_samplers.c |   7 +-
> >  src/compiler/nir/nir.c|  92 +
> >  src/compiler/nir/nir.h|  16 +
> >  src/compiler/nir/nir_deref.c  |  14 +-
> >  src/compiler/nir/nir_gather_info.c|   6 +-
> >  src/compiler/nir/nir_gs_count_vertices.c  |   7 +-
> >  src/compiler/nir/nir_lower_clip.c |   2 +-
> >  src/compiler/nir/nir_lower_indirect_derefs.c  |   4 +-
> >  src/compiler/nir/nir_lower_io.c   |   6 +-
> >  .../nir/nir_lower_io_arrays_to_elements.c |  11 +-
> >  src/compiler/nir/nir_lower_locals_to_regs.c   |   6 +-
> >  src/compiler/nir/nir_lower_two_sided_color.c  |   2 +-
> >  src/compiler/nir/nir_lower_vars_to_ssa.c  |  14 +-
> >  src/compiler/nir/nir_opt_dead_cf.c|   7 +-
> >  src/compiler/nir/nir_opt_find_array_copies.c  |  13 +-
> >  src/compiler/nir/nir_opt_intrinsics.c |   4 +-
> >  src/compiler/nir/nir_opt_large_constants.c|   2 +-
> >  src/compiler/nir/nir_search.c |  70 +---
> >  src/compiler/nir/nir_search_helpers.h |  49 ++-
> >  src/compiler/nir/nir_split_vars.c |  17 +-
> >  src/compiler/nir/tests/vars_tests.cpp |  10 +-
> >  src/intel/compiler/brw_fs.h   |   2 +
> >  src/intel/compiler/brw_fs_nir.cpp | 315 +++---
> >  .../compiler/brw_nir_analyze_ubo_ranges.c |  15 +-
> >  src/intel/compiler/brw_vec4.h |   2 +
> >  src/intel/compiler/brw_vec4_gs_nir.cpp|  12 +-
> >  src/intel/compiler/brw_vec4_nir.cpp   | 190 ---
> >  src/intel/compiler/brw_vec4_tcs.cpp   |   6 +-
> >  .../vulkan/anv_nir_apply_pipeline_layout.c|  15 +-
> >  .../vulkan/anv_nir_lower_ycbcr_textures.c |   6 +-
> >  30 files changed, 423 insertions(+), 499 deletions(-)
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> 

Re: [Mesa-dev] [PATCH] scons: Remove gles option.

2018-10-22 Thread Dylan Baker
Okay, I'll just leave it as "auto-off" on windows for now, with the toggle to
explicitly turn it on if someone has a use case for that.

Thanks,
Dylan

Quoting Jose Fonseca (2018-10-19 14:44:49)
> > Jose, does it make more sense to just make gles on windows an error in 
> >meson?
> 
> 
> I don't mind if there are options for other things, but the default for the
> unsuspicious user should be to create a single self-contained opengl32.dll, as
> that is what's generally most useful (it can be used with unmodified windows
> applications by putting on the same dir.)
> 
> 
> Jose
> 
> 
> ━━━
> From: Dylan Baker 
> Sent: Friday, October 19, 2018 18:04
> To: mesa-dev@lists.freedesktop.org; Brian Paul; Jose Fonseca; Liviu Prodea;
> Roland Scheidegger
> Subject: Re: [Mesa-dev] [PATCH] scons: Remove gles option.
>  
> That's not quite right. GLES needs shared glapi, but shared glapi doesn't need
> gles. meson and autoconf have separate toggles for shared-glapi and gles, they
> both happen to default to "on" currently.
> 
> If you want to uses GLES with mesa on Windows your best bet is probably to use
> ARB_ES_compatibility (replace  with 1, 2, 3, 3_1, or 3_2 as needed) from
> a
> GL Core Context like you would using the Nvidia or Intel driver on windows.
> Don't try to go down the EGL on windows madness that the proprietary AMD 
> driver
> requires.
> 
> Meson will likely continue to support shared glapi on windows because every
> other platform that mesa supports needs shared-glapi. I have in my latest spin
> default gles off on windows with meson. Jose, does it make more sense to just
> make gles on windows an error in meson?
> 
> Dylan
> 
> Quoting Liviu Prodea (2018-10-19 08:04:28)
> >
> >
> >
> > I think I found autotools build equivalent for gles=y. It is
> --enable-shared-glapi. And the docs say it is needed to support applications
> that mix OpenGL and OpenGL ES:
> >
> > https://www.mesa3d.org/egl.html
> >
> >
> -
> >
> >
> > On Friday, October 19, 2018, 4:15:48 PM GMT+3, Brian Paul 
> > 
> wrote:
> >
> >
> >
> >
> >
> > Reviewed-by: Brian Paul 
> >
> > On 10/19/2018 06:33 AM, Jose Fonseca wrote:
> > > It's broken, and WGL state tracker is always built with GLES support
> > > noawadays.
> > > ---
> > >  common.py                                    | 2 --
> > >  src/SConscript                              | 7 ---
> > >  src/gallium/state_trackers/osmesa/SConscript | 4 +---
> > >  src/gallium/state_trackers/wgl/SConscript    | 4 +---
> > >  src/gallium/targets/libgl-gdi/SConscript    | 6 --
> > >  src/gallium/targets/libgl-xlib/SConscript    | 6 --
> > >  src/mapi/glapi/SConscript                    | 6 +-
> > >  src/mapi/shared-glapi/SConscript            | 9 +
> > >  src/mesa/SConscript                          | 4 +---
> > >  src/mesa/drivers/osmesa/SConscript          | 4 +---
> > >  10 files changed, 6 insertions(+), 46 deletions(-)
> > >
> > > diff --git a/common.py b/common.py
> > > index 113fc7f5c12..f4f2bb44c1c 100644
> > > --- a/common.py
> > > +++ b/common.py
> > > @@ -99,8 +99,6 @@ def AddOptions(opts):
> > >                          'enable static code analysis where available',
> 'no'))
> > >      opts.Add(BoolOption('asan', 'enable Address Sanitizer', 'no'))
> > >      opts.Add('toolchain', 'compiler toolchain', default_toolchain)
> > > -    opts.Add(BoolOption('gles', 'EXPERIMENTAL: enable OpenGL ES support',
> > > -                        'no'))
> > >      opts.Add(BoolOption('llvm', 'use LLVM', default_llvm))
> > >      opts.Add(BoolOption('openmp', 'EXPERIMENTAL: compile with openmp
> (swrast)',
> > >                          'no'))
> > > diff --git a/src/SConscript b/src/SConscript
> > > index 95ea061c4bb..54350a9cdcc 100644
> > > --- a/src/SConscript
> > > +++ b/src/SConscript
> > > @@ -42,10 +42,6 @@ env.Append(CPPPATH = ["#" + env['build_dir']])
> > >  if env['platform'] != 'windows':
> > >      SConscript('loader/SConscript')
> > > 
> > > -# When env['gles'] is set, the targets defined in mapi/glapi/SConscript
> are not
> > > -# used.  libgl-xlib and libgl-gdi adapt themselves to use the targets
> defined
> > > -# in mapi/glapi-shared/SConscript.  mesa/SConscript also adapts itself to
> > > -# enable OpenGL ES support.
> > >  SConscript('mapi/glapi/gen/SConscript')
> > >  SConscript('mapi/glapi/SConscript')
> > > 
> > > @@ -61,8 +57,5 @@ if not env['embedded']:
> > >      if env['platform'] == 'haiku':
> > >          SConscript('egl/SConscript')
> > > 
> > > -    if env['gles']:
> > > -        SConscript('mapi/shared-glapi/SConscript')
> > > -
> > >  SConscript('gallium/SConscript')
> > > 
> > > diff --git a/src/gallium/state_trackers/osmesa/SConscript b/src/gallium/
> state_trackers/osmesa/SConscript
> > > index f5519f13762..be67d0fe739 100644
> > > --- 

[Mesa-dev] [PATCH] meson: don't require libelf for r600 without LLVM

2018-10-22 Thread Dylan Baker
r600 doesn't have a hard requirement on LLVM, and therefore doesn't have
a hard requirement on libelf. Currently the logic doesn't allow that
however.

Distro-bug: https://bugs.gentoo.org/669058
Fixes: 5060c51b6f4dfb0d5358bde6523285163d3faaad
   ("meson: build r600 driver")
---
 meson.build | 18 ++
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/meson.build b/meson.build
index 0dfe09858bf..712210f0f1b 100644
--- a/meson.build
+++ b/meson.build
@@ -1086,14 +1086,6 @@ if dep_thread.found() and host_machine.system() != 
'windows'
 pre_args += '-DHAVE_PTHREAD_SETAFFINITY'
   endif
 endif
-if with_amd_vk or with_gallium_radeonsi or with_gallium_r600 or 
with_gallium_opencl
-  dep_elf = dependency('libelf', required : false)
-  if not dep_elf.found()
-dep_elf = cc.find_library('elf')
-  endif
-else
-  dep_elf = null_dep
-endif
 dep_expat = dependency('expat')
 # this only exists on linux so either this is linux and it will be found, or
 # its not linux and and wont
@@ -1240,6 +1232,16 @@ elif with_amd_vk or with_gallium_radeonsi or 
with_gallium_swr
   error('The following drivers require LLVM: Radv, RadeonSI, SWR. One of these 
is enabled, but LLVM is disabled.')
 endif
 
+if (with_amd_vk or with_gallium_radeonsi or with_gallium_opencl or
+(with_gallium_r600 and with_llvm))
+  dep_elf = dependency('libelf', required : false)
+  if not dep_elf.found()
+dep_elf = cc.find_library('elf')
+  endif
+else
+  dep_elf = null_dep
+endif
+
 dep_glvnd = null_dep
 if with_glvnd
   dep_glvnd = dependency('libglvnd', version : '>= 0.2.0')
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radv: fix btoi for R32G32B32 when the dest offset is not 0

2018-10-22 Thread Samuel Pitoiset
Fixes: 593996bc02 ("radv: implement buffer to image operations for R32G32B32")
Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_meta_bufimage.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/amd/vulkan/radv_meta_bufimage.c 
b/src/amd/vulkan/radv_meta_bufimage.c
index 73a5034222..ec449c5bca 100644
--- a/src/amd/vulkan/radv_meta_bufimage.c
+++ b/src/amd/vulkan/radv_meta_bufimage.c
@@ -556,8 +556,8 @@ build_nir_btoi_r32g32b32_compute_shader(struct radv_device 
*dev)
 
nir_ssa_def *global_pos =
nir_iadd(,
-nir_imul(, pos_y, >dest.ssa),
-nir_imul(, pos_x, nir_imm_int(, 3)));
+nir_imul(, nir_channel(, img_coord, 1), 
>dest.ssa),
+nir_imul(, nir_channel(, img_coord, 0), 
nir_imm_int(, 3)));
 
nir_ssa_def *input_img_deref = _build_deref_var(, 
input_img)->dest.ssa;
 
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 107865] swr fail to build with llvm-libs 6.0.1

2018-10-22 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107865

Alok Hota  changed:

   What|Removed |Added

 Resolution|--- |FIXED
 Status|NEW |RESOLVED

--- Comment #3 from Alok Hota  ---
This patch is to fix this issue:
https://patchwork.freedesktop.org/series/51133/

It should make its way to the stable branch, hopefully in the next dot release.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/9] nir: Add better helpers for handling constant sources

2018-10-22 Thread Jason Ekstrand
Yeah... Bas said he had the same problem. The series can be found on my 
wip/nir-const-src branch of you want to see it that way. I can also resend 
if it's helpful.


--Jason

On October 22, 2018 06:31:01 Samuel Iglesias Gonsálvez 
 wrote:



Patch 7/9 has never arrived my inbox and checking the archives [0], looks like
the archive is messed up... it has only a few emails. Does somebody know what
happened with the ML archive?

Sam

[0] https://lists.freedesktop.org/archives/mesa-dev/2018-October/date.html

On Saturday, 20 October 2018 19:55:38 (CEST) Jason Ekstrand wrote:

Previously, the only thing we had was nir_src_as_const_value which returns
a pointer to a nir_const_value which was NULL if the source wasn't actually
a constant.  This was great except that almost none of the users considered
anything other than 32-bit values.  With 8, 16, and 64-bit values floating
around, we really shouldn't be hand-rolling it everywhere.  Most of the
code is currently safe if you operate under the assumption that things like
array indices are always 32 bits.  The glaring exception was some of the
helpers in nir_search_helpers.h where we definitely cannot be making that
assumption but were anyway.

I've converted core NIR and i965 but have not written patches for vc4, ir3,
or radeon.

Cc: Connor Abbott 
Cc: Timothy Arceri 
Cc: Eric Anholt 
Cc: Rob Clark 
Cc: Karol Herbst 
Cc: Bas Nieuwenhuizen 

Jason Ekstrand (9):
  nir: Add some new helpers for working with const sources
  nir/search: Use nir_src_is_const and friends
  nir/search_helpers: Use nir_src_is_const and friends
  nir: Use nir_src_is_const and nir_src_as_* in core code
  intel/fs,vec4: Clean up a repeated pattern with SSBOs
  intel/fs: Use the new nir_src_is_const and friends
  intel/vec4: Use the new nir_src_is_const and friends
  intel/analyze_ubo_ranges: Use nir_src_is_const and friends
  anv: Use nir_src_is_const and friends in lowering code

 src/compiler/glsl/gl_nir_lower_samplers.c |   7 +-
 src/compiler/nir/nir.c|  92 +
 src/compiler/nir/nir.h|  16 +
 src/compiler/nir/nir_deref.c  |  14 +-
 src/compiler/nir/nir_gather_info.c|   6 +-
 src/compiler/nir/nir_gs_count_vertices.c  |   7 +-
 src/compiler/nir/nir_lower_clip.c |   2 +-
 src/compiler/nir/nir_lower_indirect_derefs.c  |   4 +-
 src/compiler/nir/nir_lower_io.c   |   6 +-
 .../nir/nir_lower_io_arrays_to_elements.c |  11 +-
 src/compiler/nir/nir_lower_locals_to_regs.c   |   6 +-
 src/compiler/nir/nir_lower_two_sided_color.c  |   2 +-
 src/compiler/nir/nir_lower_vars_to_ssa.c  |  14 +-
 src/compiler/nir/nir_opt_dead_cf.c|   7 +-
 src/compiler/nir/nir_opt_find_array_copies.c  |  13 +-
 src/compiler/nir/nir_opt_intrinsics.c |   4 +-
 src/compiler/nir/nir_opt_large_constants.c|   2 +-
 src/compiler/nir/nir_search.c |  70 +---
 src/compiler/nir/nir_search_helpers.h |  49 ++-
 src/compiler/nir/nir_split_vars.c |  17 +-
 src/compiler/nir/tests/vars_tests.cpp |  10 +-
 src/intel/compiler/brw_fs.h   |   2 +
 src/intel/compiler/brw_fs_nir.cpp | 315 +++---
 .../compiler/brw_nir_analyze_ubo_ranges.c |  15 +-
 src/intel/compiler/brw_vec4.h |   2 +
 src/intel/compiler/brw_vec4_gs_nir.cpp|  12 +-
 src/intel/compiler/brw_vec4_nir.cpp   | 190 ---
 src/intel/compiler/brw_vec4_tcs.cpp   |   6 +-
 .../vulkan/anv_nir_apply_pipeline_layout.c|  15 +-
 .../vulkan/anv_nir_lower_ycbcr_textures.c |   6 +-
 30 files changed, 423 insertions(+), 499 deletions(-)




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 23/28] nir/linker: add program ubo/ssbo at the resource list

2018-10-22 Thread Alejandro Piñeiro
---
 src/compiler/glsl/gl_nir_linker.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/src/compiler/glsl/gl_nir_linker.c 
b/src/compiler/glsl/gl_nir_linker.c
index 547549bc4e0..138a12e532d 100644
--- a/src/compiler/glsl/gl_nir_linker.c
+++ b/src/compiler/glsl/gl_nir_linker.c
@@ -67,5 +67,19 @@ nir_build_program_resource_list(struct gl_context *ctx,
}
 
 
+   /* Add program uniform blocks. */
+   for (unsigned i = 0; i < prog->data->NumUniformBlocks; i++) {
+  if (!link_util_add_program_resource(prog, resource_set, GL_UNIFORM_BLOCK,
+  >data->UniformBlocks[i], 0))
+ return;
+   }
+
+   /* Add program shader storage blocks. */
+   for (unsigned i = 0; i < prog->data->NumShaderStorageBlocks; i++) {
+  if (!link_util_add_program_resource(prog, resource_set, 
GL_SHADER_STORAGE_BLOCK,
+  >data->ShaderStorageBlocks[i], 
0))
+ return;
+   }
+
_mesa_set_destroy(resource_set, NULL);
 }
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 25/28] i965: call to gl_nir_link_uniform_blocks

2018-10-22 Thread Alejandro Piñeiro
When using a SPIR-V shader. Note that needs to be done before linking
uniforms, so when creating the uniform storage entries, block_index
could be filled properly (among other things).
---
 src/mesa/drivers/dri/i965/brw_link.cpp | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp 
b/src/mesa/drivers/dri/i965/brw_link.cpp
index 03b32d1fe7a..d0179cc89a1 100644
--- a/src/mesa/drivers/dri/i965/brw_link.cpp
+++ b/src/mesa/drivers/dri/i965/brw_link.cpp
@@ -263,6 +263,10 @@ brw_link_shader(struct gl_context *ctx, struct 
gl_shader_program *shProg)
 
/* SPIR-V programs use a NIR linker */
if (shProg->data->spirv) {
+  if (!gl_nir_link_uniform_blocks(ctx, shProg)) {
+ return GL_FALSE;
+  }
+
   if (!gl_nir_link_uniforms(ctx, shProg))
  return GL_FALSE;
 
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 27/28] mesa: add NULL name check for several length queries

2018-10-22 Thread Alejandro Piñeiro
Since ARB_gl_spirv it is possible to miss a lot of name reflection
information, so it is needed to add NULL name checks for several
queries, and return a specific value on those cases. This commit add
them for ACTIVE_UNIFORM_BLOCK_MAX_NAME_LENGTH,
ACTIVE_ATTRIBUTE_MAX_LENGTH and ACTIVE_UNIFORM_MAX_LENGTH.

From ARB_gl_spirv spec:
   "If pname is ACTIVE_UNIFORM_BLOCK_MAX_NAME_LENGTH, the length of
the longest active uniform block name, including the null
terminator, is returned. If no active uniform blocks exist, zero
is returned. If no name reflection information is available, one
is returned.

If pname is ACTIVE_ATTRIBUTE_MAX_LENGTH, the length of the longest
active attribute name, including a null terminator, is returned.
If no active attributes exist, zero is returned. If no name
reflection information is available, one is returned.

If pname is ACTIVE_UNIFORM_MAX_LENGTH, the length of the longest
active uniform name, including a null terminator, is returned. If
no active uniforms exist, zero is returned. If no name reflection
information is available, one is returned."
---
 src/mesa/main/shader_query.cpp | 12 ++--
 src/mesa/main/shaderapi.c  | 26 ++
 2 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp
index b775b4231c2..0a85e183a0c 100644
--- a/src/mesa/main/shader_query.cpp
+++ b/src/mesa/main/shader_query.cpp
@@ -244,9 +244,17 @@ _mesa_longest_attribute_name_length(struct 
gl_shader_program *shProg)
   if (res->Type == GL_PROGRAM_INPUT &&
   res->StageReferences & (1 << MESA_SHADER_VERTEX)) {
 
-  const size_t length = strlen(RESOURCE_VAR(res)->name);
+ /* From ARB_gl_spirv spec:
+  *   "If pname is ACTIVE_ATTRIBUTE_MAX_LENGTH, the length of the
+  *longest active attribute name, including a null terminator, is
+  *returned.  If no active attributes exist, zero is returned. If
+  *no name reflection information is available, one is returned."
+  */
+  const size_t length = RESOURCE_VAR(res)->name != NULL ?
+ strlen(RESOURCE_VAR(res)->name) : 1;
+
   if (length >= longest)
- longest = length + 1;
+ longest = RESOURCE_VAR(res)->name != NULL ? length + 1 : length;
   }
}
 
diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index 2ea8d965aba..3e532c1b41e 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -728,11 +728,22 @@ get_programiv(struct gl_context *ctx, GLuint program, 
GLenum pname,
  if (shProg->data->UniformStorage[i].is_shader_storage)
 continue;
 
+ /* From ARB_gl_spirv spec:
+  *   "If pname is ACTIVE_UNIFORM_MAX_LENGTH, the length of the
+  *longest active uniform name, including a null terminator, is
+  *returned. If no active uniforms exist, zero is returned. If no
+  *name reflection information is available, one is returned."
+  *
+  * We are setting 0 here, as below it will add 1 for the NUL 
character.
+  */
+ const GLint base_len = shProg->data->UniformStorage[i].name != NULL ?
+strlen(shProg->data->UniformStorage[i].name) : 0;
+
 /* Add one for the terminating NUL character for a non-array, and
  * 4 for the "[0]" and the NUL for an array.
  */
- const GLint len = strlen(shProg->data->UniformStorage[i].name) + 1 +
- ((shProg->data->UniformStorage[i].array_elements != 0) ? 3 : 0);
+ const GLint len = base_len + 1 +
+((shProg->data->UniformStorage[i].array_elements != 0) ? 3 : 0);
 
 if (len > max_len)
max_len = len;
@@ -810,9 +821,16 @@ get_programiv(struct gl_context *ctx, GLuint program, 
GLenum pname,
  break;
 
   for (i = 0; i < shProg->data->NumUniformBlocks; i++) {
-/* Add one for the terminating NUL character.
+/* Add one for the terminating NUL character. Name can be NULL, in
+  * that case, from ARB_gl_spirv:
+  *   "If pname is ACTIVE_UNIFORM_BLOCK_MAX_NAME_LENGTH, the length of
+  *the longest active uniform block name, including the null
+  *terminator, is returned. If no active uniform blocks exist,
+  *zero is returned. If no name reflection information is
+  *available, one is returned."
  */
- const GLint len = strlen(shProg->data->UniformBlocks[i].Name) + 1;
+ const GLint len = shProg->data->UniformBlocks[i].Name ?
+strlen(shProg->data->UniformBlocks[i].Name) + 1 : 1;
 
 if (len > max_len)
max_len = len;
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 26/28] mesa: add NULL name check for NUM_ACTIVE_VARIABLES query

2018-10-22 Thread Alejandro Piñeiro
This can happens if we are running an SPIR-V shader (ARB_gl_spirv).
---
 src/mesa/main/shader_query.cpp | 30 --
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp
index 11ecd71c575..b775b4231c2 100644
--- a/src/mesa/main/shader_query.cpp
+++ b/src/mesa/main/shader_query.cpp
@@ -1013,11 +1013,16 @@ get_buffer_property(struct gl_shader_program *shProg,
  *val = 0;
  for (unsigned i = 0; i < RESOURCE_UBO(res)->NumUniforms; i++) {
 const char *iname = RESOURCE_UBO(res)->Uniforms[i].IndexName;
-struct gl_program_resource *uni =
-   _mesa_program_resource_find_name(shProg, GL_UNIFORM, iname,
-NULL);
-if (!uni)
-   continue;
+/* IndexName can be NULL if we are using a SPIR-V shader
+ * (ARB_gl_spirv).
+ */
+if (iname != NULL) {
+   struct gl_program_resource *uni =
+  _mesa_program_resource_find_name(shProg, GL_UNIFORM, iname,
+   NULL);
+   if (!uni)
+  continue;
+}
 (*val)++;
  }
  return 1;
@@ -1049,11 +1054,16 @@ get_buffer_property(struct gl_shader_program *shProg,
  *val = 0;
  for (unsigned i = 0; i < RESOURCE_UBO(res)->NumUniforms; i++) {
 const char *iname = RESOURCE_UBO(res)->Uniforms[i].IndexName;
-struct gl_program_resource *uni =
-   _mesa_program_resource_find_name(shProg, GL_BUFFER_VARIABLE,
-iname, NULL);
-if (!uni)
-   continue;
+/* IndexName can be NULL if we are using a SPIR-V shader
+ * (ARB_gl_spirv).
+ */
+if (iname != NULL) {
+   struct gl_program_resource *uni =
+  _mesa_program_resource_find_name(shProg, GL_BUFFER_VARIABLE,
+   iname, NULL);
+   if (!uni)
+  continue;
+}
 (*val)++;
  }
  return 1;
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 28/28] nir/linker: Add inputs/outputs to the program resource list

2018-10-22 Thread Alejandro Piñeiro
From: Antia Puentes 

---
 src/compiler/glsl/gl_nir_linker.c | 79 +++
 1 file changed, 79 insertions(+)

diff --git a/src/compiler/glsl/gl_nir_linker.c 
b/src/compiler/glsl/gl_nir_linker.c
index 138a12e532d..acec0fe1f03 100644
--- a/src/compiler/glsl/gl_nir_linker.c
+++ b/src/compiler/glsl/gl_nir_linker.c
@@ -33,6 +33,58 @@
  * Also note that this is tailored for ARB_gl_spirv needs and particularities
  */
 
+static bool
+add_interface_variables(const struct gl_context *cts,
+struct gl_shader_program *prog,
+struct set *resource_set,
+unsigned stage, GLenum programInterface)
+{
+   const struct exec_list *var_list = NULL;
+
+   struct gl_linked_shader *sh = prog->_LinkedShaders[stage];
+   if (!sh)
+  return true;
+
+   nir_shader *nir = sh->Program->nir;
+   assert(nir);
+
+   switch (programInterface) {
+   case GL_PROGRAM_INPUT:
+  var_list = >inputs;
+  break;
+   case GL_PROGRAM_OUTPUT:
+  var_list = >outputs;
+  break;
+   default:
+  assert("!Should not get here");
+  break;
+   }
+
+   nir_foreach_variable(var, var_list) {
+  if (var->data.how_declared == nir_var_hidden)
+ continue;
+
+  struct gl_shader_variable *sh_var =
+ rzalloc(prog, struct gl_shader_variable);
+
+  /* In the ARB_gl_spirv spec, names are considered optional debug info, so
+   * the linker needs to work without them. Returning them is optional.
+   * For simplicity, we ignore names.
+   */
+  sh_var->name = NULL;
+  sh_var->type = var->type;
+  sh_var->location = var->data.location;
+
+  if (!link_util_add_program_resource(prog, resource_set,
+  programInterface,
+  sh_var, 1 << stage)) {
+ return false;
+  }
+   }
+
+   return true;
+}
+
 void
 nir_build_program_resource_list(struct gl_context *ctx,
 struct gl_shader_program *prog)
@@ -44,10 +96,37 @@ nir_build_program_resource_list(struct gl_context *ctx,
   prog->data->NumProgramResourceList = 0;
}
 
+   int input_stage = MESA_SHADER_STAGES, output_stage = 0;
+
+   /* Determine first input and final output stage. These are used to
+* detect which variables should be enumerated in the resource list
+* for GL_PROGRAM_INPUT and GL_PROGRAM_OUTPUT.
+*/
+   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+  if (!prog->_LinkedShaders[i])
+ continue;
+  if (input_stage == MESA_SHADER_STAGES)
+ input_stage = i;
+  output_stage = i;
+   }
+
+   /* Empty shader, no resources. */
+   if (input_stage == MESA_SHADER_STAGES && output_stage == 0)
+  return;
+
struct set *resource_set = _mesa_set_create(NULL,
_mesa_hash_pointer,
_mesa_key_pointer_equal);
 
+   /* Add inputs and outputs to the resource list. */
+   if (!add_interface_variables(ctx, prog, resource_set, input_stage,
+GL_PROGRAM_INPUT))
+  return;
+
+   if (!add_interface_variables(ctx, prog, resource_set, output_stage,
+GL_PROGRAM_OUTPUT))
+  return;
+
/* Add uniforms
 *
 * Here, it is expected that nir_link_uniforms() has already been
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 24/28] i965: use GLboolean for all brw_link_shader returns

2018-10-22 Thread Alejandro Piñeiro
The function had a mix of true/GL_TRUE and false/GL_FALSE
returns. Using GL_TRUE/GL_FALSE as the function returns a GLboolean.
---
 src/mesa/drivers/dri/i965/brw_link.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp 
b/src/mesa/drivers/dri/i965/brw_link.cpp
index 37b775637b4..03b32d1fe7a 100644
--- a/src/mesa/drivers/dri/i965/brw_link.cpp
+++ b/src/mesa/drivers/dri/i965/brw_link.cpp
@@ -264,7 +264,7 @@ brw_link_shader(struct gl_context *ctx, struct 
gl_shader_program *shProg)
/* SPIR-V programs use a NIR linker */
if (shProg->data->spirv) {
   if (!gl_nir_link_uniforms(ctx, shProg))
- return false;
+ return GL_FALSE;
 
   gl_nir_link_assign_atomic_counter_resources(ctx, shProg);
   gl_nir_link_assign_xfb_resources(ctx, shProg);
@@ -375,7 +375,7 @@ brw_link_shader(struct gl_context *ctx, struct 
gl_shader_program *shProg)
}
 
if (brw->precompile && !brw_shader_precompile(ctx, shProg))
-  return false;
+  return GL_FALSE;
 
/* SPIR-V programs build its resource list from linked NIR shaders. */
if (!shProg->data->spirv)
@@ -393,5 +393,5 @@ brw_link_shader(struct gl_context *ctx, struct 
gl_shader_program *shProg)
   shader->ir = NULL;
}
 
-   return true;
+   return GL_TRUE;
 }
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 21/28] nir/linker: update already processed uniforms search for UBOs/SSBOs

2018-10-22 Thread Alejandro Piñeiro
Until now, we were using the uniform explicit location to check if the
current nir variable already was processed, and entries on the uniform
storage added. But for UBOs/SSBOs, entries are added but we lack a
explicit location.

For those we need to rely on the UBO/SSBO binding (to the nir variable
binding, and the uniform storage block_index). In that case several
uniforms would need to be updated at once.
---
 src/compiler/glsl/gl_nir_link_uniforms.c | 78 
 1 file changed, 68 insertions(+), 10 deletions(-)

diff --git a/src/compiler/glsl/gl_nir_link_uniforms.c 
b/src/compiler/glsl/gl_nir_link_uniforms.c
index 448f8277c16..d266091ba80 100644
--- a/src/compiler/glsl/gl_nir_link_uniforms.c
+++ b/src/compiler/glsl/gl_nir_link_uniforms.c
@@ -130,20 +130,79 @@ nir_setup_uniform_remap_tables(struct gl_context *ctx,
}
 }
 
+static void
+update_uniform_storage(struct gl_uniform_storage *uniform,
+   unsigned stage)
+{
+   uniform->active_shader_mask |= 1 << stage;
+}
+
+/**
+ * Finds, return, and update the stage infor for any uniform at the
+ * UniformStorage any uniform defined by @var. In general this is done using
+ * the explicit location, except:
+ *
+ * * UBOs/SSBOs: as they lack explicit location, binding is used to locate
+ *   them. That means that more that one entry at the uniform storage can be
+ *   found. In that case all of them are updated, and the first entry is
+ *   returned, in order to update the location of nir variable.
+ *
+ * * Expecial uniforms: like atomic counters. They lack a explicit location,
+ *   so they are skipped, handled in any case, and assign a location later.
+ *
+ */
 static struct gl_uniform_storage *
-find_previous_uniform_storage(struct gl_shader_program *prog,
-  int location)
+find_and_update_previous_uniform_storage(struct gl_shader_program *prog,
+ nir_variable *var,
+ unsigned stage)
 {
-   /* This would only work for uniform with explicit location, as all the
-* uniforms without location (ie: atomic counters) would have a initial
-* location equal to -1. We early return in that case.
+   if (nir_variable_is_in_block(var)) {
+  struct gl_uniform_storage *uniform = NULL;
+
+  unsigned num_blks = nir_variable_is_in_ubo(var) ?
+ prog->data->NumUniformBlocks :
+ prog->data->NumShaderStorageBlocks;
+
+  struct gl_uniform_block *blks = nir_variable_is_in_ubo(var) ?
+ prog->data->UniformBlocks : prog->data->ShaderStorageBlocks;
+
+  for (unsigned i = 0; i < prog->data->NumUniformStorage; i++) {
+ /* UniformStorage contains both variables from ubos and ssbos */
+ if ( prog->data->UniformStorage[i].is_shader_storage !=
+  nir_variable_is_in_ssbo(var))
+continue;
+
+ int block_index = prog->data->UniformStorage[i].block_index;
+ if (block_index != -1) {
+assert(block_index < num_blks);
+
+if (var->data.binding == blks[block_index].Binding) {
+   if (!uniform)
+  uniform = >data->UniformStorage[i];
+   update_uniform_storage(>data->UniformStorage[i],
+  stage);
+}
+ }
+  }
+
+  return uniform;
+   }
+
+   /* Beyond blocks, there are still some corner cases of uniforms without
+* location (ie: atomic counters) that would have a initial location equal
+* to -1. We just return on that case. Those uniforms will be handled
+* later.
 */
-   if (location == -1)
+   if (var->data.location == -1)
   return NULL;
 
-   for (unsigned i = 0; i < prog->data->NumUniformStorage; i++)
-  if (prog->data->UniformStorage[i].remap_location == location)
+   for (unsigned i = 0; i < prog->data->NumUniformStorage; i++) {
+  if (prog->data->UniformStorage[i].remap_location == var->data.location) {
+ update_uniform_storage(>data->UniformStorage[i], stage);
+
  return >data->UniformStorage[i];
+  }
+   }
 
return NULL;
 }
@@ -504,9 +563,8 @@ gl_nir_link_uniforms(struct gl_context *ctx,
   * other stage. If so, validate they are compatible and update
   * the active stage mask.
   */
- uniform = find_previous_uniform_storage(prog, var->data.location);
+ uniform = find_and_update_previous_uniform_storage(prog, var, 
shader_type);
  if (uniform) {
-uniform->active_shader_mask |= 1 << shader_type;
 var->data.location = uniform - prog->data->UniformStorage;
 
 continue;
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 20/28] nir/linker: fill up uniform_storage with explicit data

2018-10-22 Thread Alejandro Piñeiro
Specifically, offset, array_stride, matrix_stride and row_major.

On GLSL, most of that info is computed, but on ARB_gl_spirv they are
explicit, and for Mesa, included on the glsl_type.

From ARB_gl_spirv spec:

   "Mapping of layouts

  std140/std430  ->  explicit *Offset*, *ArrayStride*, and *MatrixStride*
 Decoration on struct members""

"7.6.2.spv SPIR-V Uniform Offsets and Strides

The SPIR-V decorations *GLSLShared* or *GLSLPacked* must not be used. A
variable in the *Uniform* Storage Class decorated as a *Block* must be
explicitly laid out using the *Offset*, *ArrayStride*, and *MatrixStride*
decorations"

For offset, matrix_stride and row_major we needed to include the
parent and index_in_parent while processing the type, as
matrix_stride/row_major are maintained as fields of the parent type,
not on the type itself.
---
 src/compiler/glsl/gl_nir_link_uniforms.c | 31 ++-
 1 file changed, 26 insertions(+), 5 deletions(-)

diff --git a/src/compiler/glsl/gl_nir_link_uniforms.c 
b/src/compiler/glsl/gl_nir_link_uniforms.c
index ac445c8560a..448f8277c16 100644
--- a/src/compiler/glsl/gl_nir_link_uniforms.c
+++ b/src/compiler/glsl/gl_nir_link_uniforms.c
@@ -282,6 +282,8 @@ nir_link_uniform(struct gl_context *ctx,
  struct gl_program *stage_program,
  gl_shader_stage stage,
  const struct glsl_type *type,
+ const struct glsl_type *parent_type,
+ unsigned index_in_parent,
  int location,
  struct nir_link_uniforms_state *state)
 {
@@ -309,7 +311,7 @@ nir_link_uniform(struct gl_context *ctx,
 field_type = glsl_get_array_element(type);
 
  int entries = nir_link_uniform(ctx, prog, stage_program, stage,
-field_type, location,
+field_type, type, i, location,
 state);
  if (entries == -1)
 return -1;
@@ -352,9 +354,11 @@ nir_link_uniform(struct gl_context *ctx,
   if (glsl_type_is_array(type)) {
  uniform->type = type_no_array;
  uniform->array_elements = glsl_get_length(type);
+ uniform->array_stride = glsl_get_explicit_array_stride(type);
   } else {
  uniform->type = type;
  uniform->array_elements = 0;
+ uniform->array_stride = 0;
   }
   uniform->active_shader_mask |= 1 << stage;
 
@@ -371,15 +375,31 @@ nir_link_uniform(struct gl_context *ctx,
 
   uniform->is_shader_storage = nir_variable_is_in_ssbo(state->current_var);
 
+  if (nir_variable_is_in_block(state->current_var) &&
+  glsl_type_is_matrix(type)) {
+ assert(parent_type);
+
+ uniform->matrix_stride =
+glsl_get_struct_field_explicit_matrix_stride(parent_type, 
index_in_parent);
+
+ uniform->row_major =
+glsl_get_struct_field_matrix_layout(parent_type, index_in_parent) 
==
+GLSL_MATRIX_LAYOUT_ROW_MAJOR;
+  } else {
+ uniform->matrix_stride = 0;
+ uniform->row_major = false;
+  }
+
+  if (parent_type)
+ uniform->offset = glsl_get_struct_field_offset(parent_type, 
index_in_parent);
+  else
+ uniform->offset = 0;
+
   /* @FIXME: the initialization of the following will be done as we
* implement support for their specific features, like SSBO, atomics,
* etc.
*/
   uniform->block_index = -1;
-  uniform->offset = -1;
-  uniform->matrix_stride = -1;
-  uniform->array_stride = -1;
-  uniform->row_major = false;
   uniform->builtin = false;
   uniform->atomic_buffer_index = -1;
   uniform->top_level_array_size = 0;
@@ -543,6 +563,7 @@ gl_nir_link_uniforms(struct gl_context *ctx,
  state.current_type = type_tree;
 
  int res = nir_link_uniform(ctx, prog, sh->Program, shader_type, type,
+NULL, 0,
 location, );
 
  free_type_tree(type_tree);
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 22/28] nir/linker: Set the uniform's block_index

2018-10-22 Thread Alejandro Piñeiro
From: Antia Puentes 

Binding comparison is used to determine the block the uniform is part
of. To do the binding comparison we need the information in
UniformBlocks[] and ShaderStorageBlocks[] to be available, so we have
to call gl_nir_link_uniform_blocks() before linking the uniforms.
---
 src/compiler/glsl/gl_nir_link_uniforms.c | 22 +-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/src/compiler/glsl/gl_nir_link_uniforms.c 
b/src/compiler/glsl/gl_nir_link_uniforms.c
index d266091ba80..77def1a623f 100644
--- a/src/compiler/glsl/gl_nir_link_uniforms.c
+++ b/src/compiler/glsl/gl_nir_link_uniforms.c
@@ -454,11 +454,31 @@ nir_link_uniform(struct gl_context *ctx,
   else
  uniform->offset = 0;
 
+  int buffer_block_index = -1;
+  /* If the uniform is inside a uniform block determine its block index by
+   * comparing the bindings, we can not use names.
+   */
+  if (nir_variable_is_in_block(state->current_var)) {
+ struct gl_uniform_block *blocks = 
nir_variable_is_in_ssbo(state->current_var) ?
+prog->data->ShaderStorageBlocks : prog->data->UniformBlocks;
+
+ int num_blocks = nir_variable_is_in_ssbo(state->current_var) ?
+prog->data->NumShaderStorageBlocks : prog->data->NumUniformBlocks;
+
+ for (unsigned i = 0; i < num_blocks; i++) {
+if (state->current_var->data.binding == blocks[i].Binding) {
+   buffer_block_index = i;
+}
+ }
+ assert(buffer_block_index >= 0);
+  }
+
+  uniform->block_index = buffer_block_index;
+
   /* @FIXME: the initialization of the following will be done as we
* implement support for their specific features, like SSBO, atomics,
* etc.
*/
-  uniform->block_index = -1;
   uniform->builtin = false;
   uniform->atomic_buffer_index = -1;
   uniform->top_level_array_size = 0;
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 19/28] nir/linker: use only the array element type for array of ssbo/ubo

2018-10-22 Thread Alejandro Piñeiro
For this interfaces, the inner members are added only once as uniforms
or resources, in opposite to other cases, like a uniform array of
structs.

For those guessing why a issue (16) from ARB_program_interface_query
was used, instead of a quote of the core spec: The core spec is not
really clear about how members of arrays of blocks should be
enumerated.

On GLSL this was also problematic, specially when we were trying to
pass the 4.5 CTS tests. See commit "glsl: Fix program interface
queries relating to interface blocks"
(4c4d9e4f032d5753034361ee70aa88d16d3a04b4), as a reference. That one
also needed to rely on issue (16) to justify the change, pointing that
the core spec needs to be clarified.
---
 src/compiler/glsl/gl_nir_link_uniforms.c | 44 ++--
 1 file changed, 42 insertions(+), 2 deletions(-)

diff --git a/src/compiler/glsl/gl_nir_link_uniforms.c 
b/src/compiler/glsl/gl_nir_link_uniforms.c
index 00995fb3f76..ac445c8560a 100644
--- a/src/compiler/glsl/gl_nir_link_uniforms.c
+++ b/src/compiler/glsl/gl_nir_link_uniforms.c
@@ -498,11 +498,51 @@ gl_nir_link_uniforms(struct gl_context *ctx,
 
  state.current_var = var;
 
+ /*
+  * From ARB_program_interface spec, issue (16):
+  *
+  * "RESOLVED: We will follow the default rule for enumerating block
+  *  members in the OpenGL API, which is:
+  *
+  *  * If a variable is a member of an interface block without an
+  *instance name, it is enumerated using just the variable name.
+  *
+  *  * If a variable is a member of an interface block with an
+  *instance name, it is enumerated as "BlockName.Member", where
+  *"BlockName" is the name of the interface block (not the
+  *instance name) and "Member" is the name of the variable.
+  *
+  * For example, in the following code:
+  *
+  * uniform Block1 {
+  *   int member1;
+  * };
+  * uniform Block2 {
+  *   int member2;
+  * } instance2;
+  * uniform Block3 {
+  *  int member3;
+  * } instance3[2];  // uses two separate buffer bindings
+  *
+  * the three uniforms (if active) are enumerated as "member1",
+  * "Block2.member2", and "Block3.member3"."
+  *
+  * Note that in the last example, with an array of ubo, only one
+  * uniform is generated. For that reason, while unrolling the
+  * uniforms of a ubo, or the variables of a ssbo, we need to treat
+  * arrays of instance as a single block.
+  */
+ const struct glsl_type *type = var->type;
+ if (nir_variable_is_in_block(var) &&
+ glsl_type_is_array(type)) {
+type = glsl_without_array(type);
+ }
+
  struct type_tree_entry *type_tree =
-build_type_tree_for_type(var->type);
+build_type_tree_for_type(type);
  state.current_type = type_tree;
 
- int res = nir_link_uniform(ctx, prog, sh->Program, shader_type, 
var->type,
+ int res = nir_link_uniform(ctx, prog, sh->Program, shader_type, type,
 location, );
 
  free_type_tree(type_tree);
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/28] glsl_types/nir: add matrix_stride plus nir wrapper helpers

2018-10-22 Thread Alejandro Piñeiro
From ARB_gl_spirv spec:

"7.6.2.spv SPIR-V Uniform Offsets and Strides

The SPIR-V decorations *GLSLShared* or *GLSLPacked* must not be
used. A variable in the *Uniform* Storage Class decorated as a
*Block* must be explicitly laid out using the *Offset*,
*ArrayStride*, and *MatrixStride* decorations. If the variable is
decorated as a *BufferBlock*, its offsets and strides must not
contradict std430 alignment and minimum offset requirements.
Otherwise, its offsets and strides must not contradict std140
alignment and minimum offset requirements.

From that paragraph, the first conclusion is that we can rely on the
content of the SPIR-V in order to compute the buffer sizes, as they
are mandatory. That would make the buffer size computation easier. The
second conclusion, from the last sentence, is that *we need* to do
that. As if just needs to not contradict alignments and minimum
offsets, providing a matrix stride of 16 when 8 is enough would be
valid.

This explicit matrix_stride is assumed to only be used on
ARB_gl_spirv. On GLSL there is no way to set it, and it is internally
handled and computed.
---
 src/compiler/glsl_types.cpp |  3 +++
 src/compiler/glsl_types.h   | 10 --
 src/compiler/nir_types.cpp  |  6 ++
 src/compiler/nir_types.h|  2 ++
 4 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/src/compiler/glsl_types.cpp b/src/compiler/glsl_types.cpp
index 70bce6ace8e..104a5104aaa 100644
--- a/src/compiler/glsl_types.cpp
+++ b/src/compiler/glsl_types.cpp
@@ -961,6 +961,9 @@ glsl_type::record_compare(const glsl_type *b, bool 
match_locations) const
   if (this->fields.structure[i].xfb_stride
   != b->fields.structure[i].xfb_stride)
  return false;
+  if (this->fields.structure[i].explicit_matrix_stride
+  != b->fields.structure[i].explicit_matrix_stride)
+ return false;
}
 
return true;
diff --git a/src/compiler/glsl_types.h b/src/compiler/glsl_types.h
index d32b580acc1..9e8332e6cbf 100644
--- a/src/compiler/glsl_types.h
+++ b/src/compiler/glsl_types.h
@@ -1007,6 +1007,12 @@ struct glsl_struct_field {
 */
unsigned matrix_layout:2;
 
+   /**
+* Explicit matrix stride. For ARB_gl_spirv, it is mandatory to set it
+* explicitly. -1 otherwise.
+*/
+   int explicit_matrix_stride;
+
/**
 * For interface blocks, 1 if this variable is a per-patch input or output
 * (as in ir_variable::patch). 0 otherwise.
@@ -1045,7 +1051,7 @@ struct glsl_struct_field {
glsl_struct_field(const struct glsl_type *_type, const char *_name)
   : type(_type), name(_name), location(-1), offset(0), xfb_buffer(0),
 xfb_stride(0), interpolation(0), centroid(0),
-sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0),
+sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), 
explicit_matrix_stride(-1), patch(0),
 precision(GLSL_PRECISION_NONE), memory_read_only(0),
 memory_write_only(0), memory_coherent(0), memory_volatile(0),
 memory_restrict(0), image_format(0), explicit_xfb_buffer(0),
@@ -1057,7 +1063,7 @@ struct glsl_struct_field {
glsl_struct_field()
   : type(NULL), name(NULL), location(0), offset(0), xfb_buffer(0),
 xfb_stride(0), interpolation(0), centroid(0),
-sample(0), matrix_layout(0), patch(0),
+sample(0), matrix_layout(0), explicit_matrix_stride(-1), patch(0),
 precision(0), memory_read_only(0),
 memory_write_only(0), memory_coherent(0), memory_volatile(0),
 memory_restrict(0), image_format(0), explicit_xfb_buffer(0),
diff --git a/src/compiler/nir_types.cpp b/src/compiler/nir_types.cpp
index 2a1ae42a9bb..b2a5da1dc6c 100644
--- a/src/compiler/nir_types.cpp
+++ b/src/compiler/nir_types.cpp
@@ -86,6 +86,12 @@ glsl_get_struct_field_matrix_layout(const struct glsl_type 
*type,
return type->fields.structure[index].matrix_layout;
 }
 
+const int
+glsl_get_struct_field_explicit_matrix_stride(const struct glsl_type *type,
+ unsigned index)
+{
+   return type->fields.structure[index].explicit_matrix_stride;
+}
 
 const glsl_type *
 glsl_get_function_return_type(const glsl_type *type)
diff --git a/src/compiler/nir_types.h b/src/compiler/nir_types.h
index 69de44c3423..d3c00ca5e1a 100644
--- a/src/compiler/nir_types.h
+++ b/src/compiler/nir_types.h
@@ -51,6 +51,8 @@ const int glsl_get_struct_field_offset(const struct glsl_type 
*type,
 
 const unsigned glsl_get_struct_field_matrix_layout(const struct glsl_type 
*type,
unsigned index);
+const int glsl_get_struct_field_explicit_matrix_stride(const struct glsl_type 
*type,
+   unsigned index);
 const struct glsl_type *glsl_get_array_element(const struct glsl_type *type);
 const struct glsl_type *glsl_without_array(const struct glsl_type *type);
 const struct glsl_type 

[Mesa-dev] [PATCH 11/28] spirv/nir: fill glsl_struct_field explicit_matrix_stride

2018-10-22 Thread Alejandro Piñeiro
---
 src/compiler/spirv/spirv_to_nir.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/compiler/spirv/spirv_to_nir.c 
b/src/compiler/spirv/spirv_to_nir.c
index 52c3c968bb7..1201143d2f4 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -816,6 +816,12 @@ struct_member_matrix_stride_cb(struct vtn_builder *b,
   vtn_assert(mat_type->array_element->stride > 0);
   mat_type->stride = dec->literals[0];
}
+
+   /* For the glsl_type we use the stride defined at SPIR-V, as anyone (ie:
+* ARB_gl_spirv linker) that wants to use it would be also using the matrix
+* layout.
+*/
+   ctx->fields[member].explicit_matrix_stride = dec->literals[0];
 }
 
 static void
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 12/28] glsl_types/nir: add explicit_array_stride plus nir wrapper helpers

2018-10-22 Thread Alejandro Piñeiro
From ARB_gl_spirv:
"Mapping of layouts

  std140/std430  ->  explicit *Offset*, *ArrayStride*, and *MatrixStride*
 Decoration on struct members"

That means that we would not have available any kind of layout info,
and we should use explicit array strides.

This commit adds explicit_array_stride. The default value is -1
meaning that it is not set (as with offset). That should be the
default value for GLSL.

In general, the default constructor is ok. We just need to be careful
with some array lowerings, as it should try to get the explicit array
stride when creating new types.

Note that this means that for the ARB_gl_spirv case
std430_array_stride, std140_size etc are meaningless (unless you guess
the layout, something that you shouldn't).

v2: add missing glsl_full_array_type call, found while testing
ARB_gl_spirv with borrowed tests (Alejandro)
---
 src/compiler/glsl/gl_nir_lower_samplers_as_deref.c |  2 +-
 src/compiler/glsl_types.cpp| 28 +-
 src/compiler/glsl_types.h  | 13 +++---
 src/compiler/nir/nir_lower_io_arrays_to_elements.c |  3 ++-
 src/compiler/nir/nir_split_per_member_structs.c|  3 ++-
 src/compiler/nir/nir_split_vars.c  |  7 --
 src/compiler/nir_types.cpp | 20 +---
 src/compiler/nir_types.h   | 10 +++-
 src/compiler/spirv/vtn_variables.c |  3 ++-
 9 files changed, 65 insertions(+), 24 deletions(-)

diff --git a/src/compiler/glsl/gl_nir_lower_samplers_as_deref.c 
b/src/compiler/glsl/gl_nir_lower_samplers_as_deref.c
index 9ff5708f503..9716ac4562a 100644
--- a/src/compiler/glsl/gl_nir_lower_samplers_as_deref.c
+++ b/src/compiler/glsl/gl_nir_lower_samplers_as_deref.c
@@ -99,7 +99,7 @@ remove_struct_derefs_prep(nir_deref_instr **p, char **name,
 
   remove_struct_derefs_prep([1], name, location, type);
 
-  *type = glsl_get_array_instance(*type, length);
+  *type = glsl_get_array_instance(*type, length, 
glsl_get_explicit_array_stride(cur->type));
   break;
}
 
diff --git a/src/compiler/glsl_types.cpp b/src/compiler/glsl_types.cpp
index 104a5104aaa..ef3058a3911 100644
--- a/src/compiler/glsl_types.cpp
+++ b/src/compiler/glsl_types.cpp
@@ -44,7 +44,7 @@ glsl_type::glsl_type(GLenum gl_type,
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
interface_packing(0), interface_row_major(0),
vector_elements(vector_elements), matrix_columns(matrix_columns),
-   length(0)
+   length(0), explicit_array_stride(-1)
 {
/* Values of these types must fit in the two bits of
 * glsl_type::sampled_type.
@@ -77,7 +77,7 @@ glsl_type::glsl_type(GLenum gl_type, glsl_base_type base_type,
base_type(base_type), sampled_type(type),
sampler_dimensionality(dim), sampler_shadow(shadow),
sampler_array(array), interface_packing(0),
-   interface_row_major(0), length(0)
+   interface_row_major(0), length(0), explicit_array_stride(-1)
 {
this->mem_ctx = ralloc_context(NULL);
assert(this->mem_ctx != NULL);
@@ -97,7 +97,7 @@ glsl_type::glsl_type(const glsl_struct_field *fields, 
unsigned num_fields,
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
interface_packing(0), interface_row_major(0),
vector_elements(0), matrix_columns(0),
-   length(num_fields)
+   length(num_fields), explicit_array_stride(-1)
 {
unsigned int i;
 
@@ -127,7 +127,7 @@ glsl_type::glsl_type(const glsl_struct_field *fields, 
unsigned num_fields,
interface_packing((unsigned) packing),
interface_row_major((unsigned) row_major),
vector_elements(0), matrix_columns(0),
-   length(num_fields)
+   length(num_fields), explicit_array_stride(-1)
 {
unsigned int i;
 
@@ -152,7 +152,7 @@ glsl_type::glsl_type(const glsl_type *return_type,
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
interface_packing(0), interface_row_major(0),
vector_elements(0), matrix_columns(0),
-   length(num_params)
+   length(num_params), explicit_array_stride(-1)
 {
unsigned int i;
 
@@ -181,7 +181,7 @@ glsl_type::glsl_type(const char *subroutine_name) :
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
interface_packing(0), interface_row_major(0),
vector_elements(1), matrix_columns(1),
-   length(0)
+   length(0), explicit_array_stride(-1)
 {
this->mem_ctx = ralloc_context(NULL);
assert(this->mem_ctx != NULL);
@@ -434,12 +434,12 @@ _mesa_glsl_release_types(void)
 }
 
 
-glsl_type::glsl_type(const glsl_type *array, unsigned length) :
+glsl_type::glsl_type(const glsl_type *array, unsigned length, int 
explicit_array_stride) :
base_type(GLSL_TYPE_ARRAY), sampled_type(GLSL_TYPE_VOID),
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
interface_packing(0), interface_row_major(0),
vector_elements(0), matrix_columns(0),
-   length(length), name(NULL)
+   length(length), 

  1   2   >