[Mesa-dev] [PATCH] softpipe: fix integer texture swizzling for 1 vs 1.0f

2019-03-20 Thread Dave Airlie
From: Dave Airlie 

The swizzling was putting float one in not integer 1.

This fixes a lot of arb_texture_view-rendering-formats cases.
---
 src/gallium/drivers/softpipe/sp_tex_sample.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c 
b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 92c78da86f3..26d38296073 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -2754,6 +2754,7 @@ do_swizzling(const struct pipe_sampler_view *sview,
const unsigned swizzle_g = sview->swizzle_g;
const unsigned swizzle_b = sview->swizzle_b;
const unsigned swizzle_a = sview->swizzle_a;
+   float oneval = util_format_is_pure_integer(sview->format) ? uif(1) : 1.0f;
 
switch (swizzle_r) {
case PIPE_SWIZZLE_0:
@@ -2762,7 +2763,7 @@ do_swizzling(const struct pipe_sampler_view *sview,
   break;
case PIPE_SWIZZLE_1:
   for (j = 0; j < 4; j++)
- out[0][j] = 1.0f;
+ out[0][j] = oneval;
   break;
default:
   assert(swizzle_r < 4);
@@ -2777,7 +2778,7 @@ do_swizzling(const struct pipe_sampler_view *sview,
   break;
case PIPE_SWIZZLE_1:
   for (j = 0; j < 4; j++)
- out[1][j] = 1.0f;
+ out[1][j] = oneval;
   break;
default:
   assert(swizzle_g < 4);
@@ -2792,7 +2793,7 @@ do_swizzling(const struct pipe_sampler_view *sview,
   break;
case PIPE_SWIZZLE_1:
   for (j = 0; j < 4; j++)
- out[2][j] = 1.0f;
+ out[2][j] = oneval;
   break;
default:
   assert(swizzle_b < 4);
@@ -2807,7 +2808,7 @@ do_swizzling(const struct pipe_sampler_view *sview,
   break;
case PIPE_SWIZZLE_1:
   for (j = 0; j < 4; j++)
- out[3][j] = 1.0f;
+ out[3][j] = oneval;
   break;
default:
   assert(swizzle_a < 4);
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] softpipe: remove shadow_ref assert.

2019-03-20 Thread Dave Airlie
From: Dave Airlie 

I don't think this really buys us anything and TG4 with cubemap arrays
falls over because sampler == 2, but otherwise works fine.

Fixes:
./bin/textureGather fs shadow r  CubeArray repeat

on softpipe with ARB_gpu_shader5 enabled.
---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c 
b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 78159fc1d9f..be637d0be56 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -2306,7 +2306,6 @@ exec_tex(struct tgsi_exec_machine *mach,
  FETCH([last], 0, TGSI_CHAN_W);
   }
   else {
- assert(shadow_ref != 4);
  FETCH([last], 1, TGSI_CHAN_X);
   }
 
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/2] softpipe: fix 32-bit bitfield extract

2019-03-20 Thread Dave Airlie
From: Dave Airlie 

These didn't deal with the width == 32 case that TGSI is defined with.

Fixes piglit tests if ARB_gpu_shader5 is enabled.
---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c 
b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index e1181aa1932..c93e4e26e40 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -4944,8 +4944,13 @@ micro_ibfe(union tgsi_exec_channel *dst,
 {
int i;
for (i = 0; i < 4; i++) {
-  int width = src2->i[i] & 0x1f;
+  int width = src2->i[i];
   int offset = src1->i[i] & 0x1f;
+  if (width == 32 && offset == 0) {
+ dst->i[i] = src0->i[i];
+ continue;
+  }
+  width &= 0x1f;
   if (width == 0)
  dst->i[i] = 0;
   else if (width + offset < 32)
@@ -4966,8 +4971,13 @@ micro_ubfe(union tgsi_exec_channel *dst,
 {
int i;
for (i = 0; i < 4; i++) {
-  int width = src2->u[i] & 0x1f;
+  int width = src2->u[i];
   int offset = src1->u[i] & 0x1f;
+  if (width == 32 && offset == 0) {
+ dst->u[i] = src0->u[i];
+ continue;
+  }
+  width &= 0x1f;
   if (width == 0)
  dst->u[i] = 0;
   else if (width + offset < 32)
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/2] softpipe: handle 32-bit bitfield inserts

2019-03-20 Thread Dave Airlie
From: Dave Airlie 

Fixes piglits if ARB_gpu_shader5 is enabled
---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c 
b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index c93e4e26e40..78159fc1d9f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -4999,10 +4999,14 @@ micro_bfi(union tgsi_exec_channel *dst,
 {
int i;
for (i = 0; i < 4; i++) {
-  int width = src3->u[i] & 0x1f;
+  int width = src3->u[i];
   int offset = src2->u[i] & 0x1f;
-  int bitmask = ((1 << width) - 1) << offset;
-  dst->u[i] = ((src1->u[i] << offset) & bitmask) | (src0->u[i] & ~bitmask);
+  if (width == 32) {
+ dst->u[i] = src1->u[i];
+  } else {
+ int bitmask = ((1 << width) - 1) << offset;
+ dst->u[i] = ((src1->u[i] << offset) & bitmask) | (src0->u[i] & 
~bitmask);
+  }
}
 }
 
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] svga build warning

2019-03-20 Thread Dave Airlie
src/gallium/winsys/svga/drm/vmw_screen.c: In function 'vmw_dev_compare':
src/gallium/winsys/svga/drm/vmw_screen.c:48:13: warning: In the GNU C
Library, "major" is defined
 by . For historical compatibility, it is
 currently defined by  as well, but we plan to
 remove this soon. To use "major", include 
 directly. If you did not intend to use a system-defined macro
 "major", you should undefine it after including .
return (major(*(dev_t *)key1) == major(*(dev_t *)key2) &&
 ^~~~
Not sure how serious it is, just was looking at the CI build logs.

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v4 1/6] glsl/linker: location aliasing requires types to have the same width

2019-03-20 Thread Timothy Arceri



On 21/3/19 2:07 am, Andres Gomez wrote:

On Wed, 2019-03-20 at 16:46 +0200, Andres Gomez wrote:

On Thu, 2019-03-21 at 00:20 +1100, Timothy Arceri wrote:

On 20/3/19 9:31 pm, Andres Gomez wrote:

On Wed, 2019-03-20 at 20:46 +1100, Timothy Arceri wrote:

On 2/2/19 5:05 am, Andres Gomez wrote:

From: Iago Toral Quiroga 

Regarding location aliasing requirements, the OpenGL spec says:

 "Further, when location aliasing, the aliases sharing the location
  must have the same underlying numerical type  (floating-point or
  integer)."

Khronos has further clarified that this also requires the underlying
types to have the same width, so we can't put a float and a double
in the same location slot for example. Future versions of the spec will
be corrected to make this clear.


The spec has always been very clear for example that it is ok to pack a
float with a dvec3:

   From section 4.4.1 (Input Layout Qualifiers) of the GLSL 4.6 spec:

"layout(location=8) in dvec3 h; // components 0,1,2 and 3 of location 8
   // and components 0 and 1 of location 9
layout(location=9, component=2) in float i; // okay, compts 2 and 3"


Mmmm ... I didn't notice that example before. I think it is just
incorrect or, rather, a typo. The inline comment says that the float
takes components 2 and 3. A float will count only for *1* component.
Hence, the intended type there is a double, in which case the aliasing
is allowed.

The spec is actually very clear just some paragraphs below:

  From Section 4.4.1 (Input Layout Qualifiers) of the GLSL 4.60 spec:

" Further, when location aliasing, the aliases sharing the location
  must have the same underlying numerical type and bit
  width (floating-point or integer, 32-bit versus 64-bit, etc.)
  and the same auxiliary storage and interpolation
  qualification. The one exception where component aliasing is
  permitted is for two input variables (not block members) to a
  vertex shader, which are allowed to have component aliasing. This
  vertex-variable component aliasing is intended only to support
  vertex shaders where each execution path accesses at most one
  input per each aliased component. Implementations are permitted,
  but not required, to generate link-time errors if they detect
  that every path through the vertex shader executable accesses
  multiple inputs aliased to any single component."


Yeah I noticed this when reviewing the piglit tests. It does seem we
need to fix this. However rather than a custom
get_numerical_sized_type() function we should be able to scrap it
completely and possibly future proof the code by using:

glsl_base_type_get_bit_size() and glsl_base_type_is_integer() instead
for the checks.

e.g.

info->is_integer =
glsl_base_type_is_integer(type->without_array()->base_type);
info->bit_size =
glsl_base_type_get_bit_size(type->without_array()->base_type);

And compare these instead.


I don't think this is a better option. Some of the reasons, as
commented in my other mail in this thread, are explained in the
previous (unfinished) review process for this patch.

Additionally, glsl_base_type_is_integer is only true for 32bit
integers. Also, only with these 2 checks, we would be failing for
images and samplers. Finally, we would end with quite a big comparison
that will have to be scattered in several points of the
check_location_aliasing function, making more difficult to understand
the logic behind it.


I disagree. IMO it would actually be much easier to follow and you can 
make the error message much more precise by including the bit-size and 
integer vs float difference.




Scratch the last paragraph. I committed the mistake of checking
"is_integer" instead of "glsl_base_type_is_integer"


This is all addressed at single point, the (already pre-existent)
get_numerical_sized_type function, which makes this easier to
understand and, IMHO, more future proof.


Even with the possibility of using those 2, I would rather keep the
get_numerical_sized_type to address the cases of "non-numerical"
variables beyond structs.


As per my reply to the other thread you shouldn't need to be testing for 
anything other than structs, everything else should already have been 
disallowed by compile-time errors. If I'm wrong can you please give me 
an example of what you are trying to fix.







Your going to need more information bug number etc to convince me this
change is correct.


I'll file a bug against the specs.

In the meanwhile, this is the expected behavior also in the CTS tests,
which is also confirmed with the nVIDIA blob.


This patch amends our implementation to account for this restriction.

In the process of doing this, I also noticed that we would attempt
to check aliasing requirements for record variables (including the test
for the numerical type) which is not allowed, instead, we should be
producing a linker error as soon as we see any attempt to do 

Re: [Mesa-dev] [PATCH v4 1/6] glsl/linker: location aliasing requires types to have the same width

2019-03-20 Thread Timothy Arceri

On 21/3/19 1:38 am, Andres Gomez wrote:

On Wed, 2019-03-20 at 20:46 +1100, Timothy Arceri wrote:

On 2/2/19 5:05 am, Andres Gomez wrote:

From: Iago Toral Quiroga 

Regarding location aliasing requirements, the OpenGL spec says:

"Further, when location aliasing, the aliases sharing the location
 must have the same underlying numerical type  (floating-point or
 integer)."

Khronos has further clarified that this also requires the underlying
types to have the same width, so we can't put a float and a double
in the same location slot for example. Future versions of the spec will
be corrected to make this clear.


The spec has always been very clear for example that it is ok to pack a
float with a dvec3:

  From section 4.4.1 (Input Layout Qualifiers) of the GLSL 4.6 spec:

"layout(location=8) in dvec3 h; // components 0,1,2 and 3 of location 8
  // and components 0 and 1 of location 9
   layout(location=9, component=2) in float i; // okay, compts 2 and 3"


Your going to need more information bug number etc to convince me this
change is correct.


This patch amends our implementation to account for this restriction.

In the process of doing this, I also noticed that we would attempt
to check aliasing requirements for record variables (including the test
for the numerical type) which is not allowed, instead, we should be
producing a linker error as soon as we see any attempt to do location
aliasing on non-numerical variables. For the particular case of structs,
we were producing a linker error in this case, but only because we
assumed that struct fields use all components in each location, so
any attempt to alias locations consumed by struct fields would produce
a link error due to component aliasing, which is not accurate of the
actual problem. This patch would make it produce an error for attempting
to alias a non-numerical variable instead, which is always accurate.


None of this should be needed at all. It is an error to use
location/component layouts on struct members.

"It is a compile-time error to use a location qualifier on a member of a
structure."

"It is a compile-time error to use *component* without also specifying
*location*"

So depending on the component aliasing check is sufficient. If you
really want to add a better error message then see my comments below.


I believe this is already answered in the previous (unfinished) review
process through which this patch went through. Specifically, the 2nd
review that gave place to the v3 patch.


No that doesn't address my comment. It shows why you did what you did, 
but what I'm saying is you don't need to check for this type of thing. 
It should all already be handled by compile-time error checking. You are 
making things more complicated than they need to be.


If I'm wrong please point me to a piglit test that can't be resolved by 
simply checking for a struct as I suggest below.



You can check it here:

https://lists.freedesktop.org/archives/mesa-dev/2017-November/175366.html
https://lists.freedesktop.org/archives/mesa-dev/2017-November/175573.html
https://lists.freedesktop.org/archives/mesa-dev/2017-November/175705.html



v2:
- Do not assert if we see invalid numerical types. These come
  straight from shader code, so we should produce linker errors if
  shaders attempt to do location aliasing on variables that are not
  numerical such as records.
- While we are at it, improve error reporting for the case of
  numerical type mismatch to include the shader stage.

v3:
- Allow location aliasing of images and samplers. If we get these
  it means bindless support is active and they should be handled
  as 64-bit integers (Ilia)
- Make sure we produce link errors for any non-numerical type
  for which we attempt location aliasing, not just structs.

v4:
- Rebased with minor fixes (Andres).
- Added fixing tag to the commit log (Andres).

Fixes: 13652e7516a ("glsl/linker: Fix type checks for location aliasing")
Cc: Ilia Mirkin 
Signed-off-by: Andres Gomez 
---
   src/compiler/glsl/link_varyings.cpp | 64 +
   1 file changed, 46 insertions(+), 18 deletions(-)

diff --git a/src/compiler/glsl/link_varyings.cpp 
b/src/compiler/glsl/link_varyings.cpp
index 3969c0120b3..3f41832ac93 100644
--- a/src/compiler/glsl/link_varyings.cpp
+++ b/src/compiler/glsl/link_varyings.cpp
@@ -424,15 +424,15 @@ compute_variable_location_slot(ir_variable *var, 
gl_shader_stage stage)
   
   struct explicit_location_info {

  ir_variable *var;
-   unsigned numerical_type;
+   int numerical_type;
  unsigned interpolation;
  bool centroid;
  bool sample;
  bool patch;
   };
   
-static inline unsigned

-get_numerical_type(const glsl_type *type)
+static inline int
+get_numerical_sized_type(const glsl_type *type)
   {
  /* From the OpenGL 4.6 spec, section 4.4.1 Input Layout Qualifiers, Page 
68,
   * (Location aliasing):
@@ 

Re: [Mesa-dev] [PATCH] st/nine: enable csmt per default on iris

2019-03-20 Thread Axel Davy

On 20/03/2019 21:38, Andre Heider wrote:

iris is thread safe, enable csmt for a ~5% performace boost.

Signed-off-by: Andre Heider 
---
  src/gallium/state_trackers/nine/device9.c | 8 +---
  1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/gallium/state_trackers/nine/device9.c 
b/src/gallium/state_trackers/nine/device9.c
index 24c8ce062b3..db1c3a1d23d 100644
--- a/src/gallium/state_trackers/nine/device9.c
+++ b/src/gallium/state_trackers/nine/device9.c
@@ -266,13 +266,15 @@ NineDevice9_ctor( struct NineDevice9 *This,
  }
  
  /* Initialize CSMT */

+/* r600, radeonsi and iris are thread safe. */
  if (pCTX->csmt_force == 1)
  This->csmt_active = true;
  else if (pCTX->csmt_force == 0)
  This->csmt_active = false;
-else
-/* r600 and radeonsi are thread safe. */
-This->csmt_active = strstr(pScreen->get_name(pScreen), "AMD") != NULL;
+else if (strstr(pScreen->get_name(pScreen), "AMD") != NULL)
+This->csmt_active = true;
+else if (strstr(pScreen->get_name(pScreen), "Intel") != NULL)
+This->csmt_active = true;
  
  /* We rely on u_upload_mgr using persistent coherent buffers (which don't

   * require flush to work in multi-pipe_context scenario) for vertex and



Could have been an || inside the same if, but maybe it is easier to read 
that way.



Reviewed-by: Axel Davy 


Axel

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] virgl: Use right key to insert resource to hash.

2019-03-20 Thread Chia-I Wu
On Mon, Mar 18, 2019 at 2:22 PM Lepton Wu  wrote:

> The old code could use gem name as key when inserting it to bo_handles
> hash table while trying to remove it from hash table with bo_handle as
> key in virgl_hw_res_destroy. This triggers use after free. Also, we
> should only reuse resource from bo_handle hash when the handle type is
> FD.
>
Reuse is not very accurate.  Opening a shared handle (flink name) twice
gives two GEM handles.  Importing an fd handle (prime fd) twice gives the
same GEM handle.  In all cases, within a virgl_winsys, we want only one GEM
handle and only one virgl_resource for each kernel GEM object.

I think the logic should go like:

  if (HANDLE_TYPE_SHARED) {
if (bo_names.has(flink_name))
  return bo_names[flink_name];
gem_handle = gem_open(flink_name);
  } else {
gem_handle = drmPrimeFDToHandle(prime_fd);
  }

  if (bo_handles.has(gem_handle))
return bo_handles[gem_handle];
  bo_handles[gem_handle] = create_new_resource();


> Signed-off-by: Lepton Wu 
> ---
>  src/gallium/winsys/virgl/drm/virgl_drm_winsys.c | 15 ---
>  1 file changed, 8 insertions(+), 7 deletions(-)
>
> diff --git a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
> b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
> index 120e8eda2cd..01811a0e997 100644
> --- a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
> +++ b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
> @@ -424,13 +424,13 @@ virgl_drm_winsys_resource_create_handle(struct
> virgl_winsys *qws,
>   res = NULL;
>   goto done;
>}
> -   }
>
> -   res = util_hash_table_get(qdws->bo_handles, (void*)(uintptr_t)handle);
> -   if (res) {
> -  struct virgl_hw_res *r = NULL;
> -  virgl_drm_resource_reference(qdws, , res);
> -  goto done;
> +  res = util_hash_table_get(qdws->bo_handles,
> (void*)(uintptr_t)handle);
> +  if (res) {
> +struct virgl_hw_res *r = NULL;
> +virgl_drm_resource_reference(qdws, , res);
> +goto done;
> +  }
> }
>
> res = CALLOC_STRUCT(virgl_hw_res);
> @@ -469,7 +469,8 @@ virgl_drm_winsys_resource_create_handle(struct
> virgl_winsys *qws,
> res->num_cs_references = 0;
> res->fence_fd = -1;
>
> -   util_hash_table_set(qdws->bo_handles, (void *)(uintptr_t)handle, res);
> +   util_hash_table_set(qdws->bo_handles, (void
> *)(uintptr_t)res->bo_handle,
> +   res);
>
>  done:
> mtx_unlock(>bo_handles_mutex);
> --
> 2.21.0.225.g810b269d1ac-goog
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 110211] If DESTDIR is set to an empty string, the dri drivers are not installed

2019-03-20 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=110211

Dylan Baker  changed:

   What|Removed |Added

 Resolution|--- |FIXED
 Status|ASSIGNED|RESOLVED

--- Comment #2 from Dylan Baker  ---
This should be fixed in master, and the patch will be in the next 19.0 and 18.3
releases.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] virgl: close drm fd when destroying virgl screen.

2019-03-20 Thread Chia-I Wu
Reviewed-by: Chia-I Wu 

On Mon, Mar 18, 2019 at 4:40 PM Lepton Wu  wrote:

> This fd was create in virgl_drm_screen_create and should be closed
> in virgl_drm_screen_destroy.
>
> Signed-off-by: Lepton Wu 
> ---
>  src/gallium/winsys/virgl/drm/virgl_drm_winsys.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
> b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
> index 01811a0e997..5501fe3ed48 100644
> --- a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
> +++ b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
> @@ -973,6 +973,7 @@ virgl_drm_screen_destroy(struct pipe_screen *pscreen)
> if (destroy) {
>int fd = virgl_drm_winsys(screen->vws)->fd;
>util_hash_table_remove(fd_tab, intptr_to_pointer(fd));
> +  close(fd);
> }
> mtx_unlock(_screen_mutex);
>
> --
> 2.21.0.225.g810b269d1ac-goog
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 5/8] ac/nir: implement 8-bit ssbo stores

2019-03-20 Thread Samuel Pitoiset


On 3/20/19 1:07 AM, Bas Nieuwenhuizen wrote:

On Tue, Mar 19, 2019 at 9:28 AM Samuel Pitoiset
 wrote:

From: Rhys Perry 

Signed-off-by: Rhys Perry 
---
  src/amd/common/ac_nir_to_llvm.c | 9 +++--
  1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 34c4e2a69fa..f3e8f89ba9b 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1553,7 +1553,7 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,

 LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
 get_src(ctx, instr->src[1]), true);
-   LLVMValueRef base_data = ac_to_float(>ac, src_data);
+   LLVMValueRef base_data = src_data;

Does this work with LLVM 7? (I have vague recollection that the
earlier intrinsics only did floats).


I will double check before pushing.

Thanks for the reviews.




 base_data = ac_trim_vector(>ac, base_data, instr->num_components);
 LLVMValueRef base_offset = get_src(ctx, instr->src[2]);

@@ -1591,7 +1591,12 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
 offset = LLVMBuildAdd(ctx->ac.builder, base_offset,
   LLVMConstInt(ctx->ac.i32, start * elem_size_bytes, 
false), "");

-   if (num_bytes == 2) {
+   if (num_bytes == 1) {
+   ac_build_tbuffer_store_byte(>ac, rsrc, data,
+   offset, ctx->ac.i32_0,
+   cache_policy & ac_glc,
+   writeonly_memory);
+   } else if (num_bytes == 2) {
 ac_build_tbuffer_store_short(>ac, rsrc, data,
  offset, ctx->ac.i32_0,
  cache_policy & ac_glc,
--
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 0/8] radv: VK_KHR_8bit_storage

2019-03-20 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

for the series.

On Tue, Mar 19, 2019 at 9:28 AM Samuel Pitoiset
 wrote:
>
> Hi,
>
> This series implements VK_KHR_8bit_storage for RADV. Original work
> is from Rhys Perry, I did rebase, update some patches and test.
>
> Please review,
> thanks!
>
> Rhys Perry (5):
>   ac/nir: implement 8-bit push constant, ssbo and ubo loads
>   ac/nir: implement 8-bit ssbo stores
>   ac/nir: add 8-bit types to glsl_base_to_llvm_type
>   ac/nir: implement 8-bit conversions
>   radv: enable VK_KHR_8bit_storage
>
> Samuel Pitoiset (3):
>   ac: add various int8 definitions
>   ac: add ac_build_tbuffer_load_byte() helper
>   ac: add ac_build_tbuffer_store_byte() helper
>
>  docs/features.txt |  2 +-
>  src/amd/common/ac_llvm_build.c| 47 +-
>  src/amd/common/ac_llvm_build.h| 19 
>  src/amd/common/ac_nir_to_llvm.c   | 81 ++-
>  src/amd/vulkan/radv_device.c  |  9 
>  src/amd/vulkan/radv_extensions.py |  1 +
>  src/amd/vulkan/radv_shader.c  |  1 +
>  7 files changed, 145 insertions(+), 15 deletions(-)
>
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] st/nine: enable csmt per default on iris

2019-03-20 Thread Kenneth Graunke
On Wednesday, March 20, 2019 1:38:40 PM PDT Andre Heider wrote:
> iris is thread safe, enable csmt for a ~5% performace boost.
> 
> Signed-off-by: Andre Heider 
> ---
>  src/gallium/state_trackers/nine/device9.c | 8 +---
>  1 file changed, 5 insertions(+), 3 deletions(-)
> 
> diff --git a/src/gallium/state_trackers/nine/device9.c 
> b/src/gallium/state_trackers/nine/device9.c
> index 24c8ce062b3..db1c3a1d23d 100644
> --- a/src/gallium/state_trackers/nine/device9.c
> +++ b/src/gallium/state_trackers/nine/device9.c
> @@ -266,13 +266,15 @@ NineDevice9_ctor( struct NineDevice9 *This,
>  }
>  
>  /* Initialize CSMT */
> +/* r600, radeonsi and iris are thread safe. */
>  if (pCTX->csmt_force == 1)
>  This->csmt_active = true;
>  else if (pCTX->csmt_force == 0)
>  This->csmt_active = false;
> -else
> -/* r600 and radeonsi are thread safe. */
> -This->csmt_active = strstr(pScreen->get_name(pScreen), "AMD") != 
> NULL;
> +else if (strstr(pScreen->get_name(pScreen), "AMD") != NULL)
> +This->csmt_active = true;
> +else if (strstr(pScreen->get_name(pScreen), "Intel") != NULL)
> +This->csmt_active = true;
>  
>  /* We rely on u_upload_mgr using persistent coherent buffers (which don't
>   * require flush to work in multi-pipe_context scenario) for vertex and
> 

Thanks Andre!

Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] st/nine: enable csmt per default on iris

2019-03-20 Thread Andre Heider
iris is thread safe, enable csmt for a ~5% performace boost.

Signed-off-by: Andre Heider 
---
 src/gallium/state_trackers/nine/device9.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/gallium/state_trackers/nine/device9.c 
b/src/gallium/state_trackers/nine/device9.c
index 24c8ce062b3..db1c3a1d23d 100644
--- a/src/gallium/state_trackers/nine/device9.c
+++ b/src/gallium/state_trackers/nine/device9.c
@@ -266,13 +266,15 @@ NineDevice9_ctor( struct NineDevice9 *This,
 }
 
 /* Initialize CSMT */
+/* r600, radeonsi and iris are thread safe. */
 if (pCTX->csmt_force == 1)
 This->csmt_active = true;
 else if (pCTX->csmt_force == 0)
 This->csmt_active = false;
-else
-/* r600 and radeonsi are thread safe. */
-This->csmt_active = strstr(pScreen->get_name(pScreen), "AMD") != NULL;
+else if (strstr(pScreen->get_name(pScreen), "AMD") != NULL)
+This->csmt_active = true;
+else if (strstr(pScreen->get_name(pScreen), "Intel") != NULL)
+This->csmt_active = true;
 
 /* We rely on u_upload_mgr using persistent coherent buffers (which don't
  * require flush to work in multi-pipe_context scenario) for vertex and
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] gallium: Add PIPE_BARRIER_UPDATE_BUFFER and UPDATE_TEXTURE bits.

2019-03-20 Thread Kenneth Graunke
On Wednesday, March 20, 2019 6:37:07 AM PDT Ilia Mirkin wrote:
> On Wed, Mar 6, 2019 at 3:32 AM Kenneth Graunke  wrote:
> > There are no nouveau changes in this patch, but that's only because none
> > appeared to be necessary.  Most drivers performed some kind of flush on
> > any memory_barrier() call, regardless of the bits - but nouveau's hooks
> > don't.  So the newly added case would already be a no-op.
> 
> I didn't go back to check the code earlier, but just saw the pushed
> patch, forgot this comment, and decided to check. Looks like
> 
> https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nvc0_context.c#n90
> 
> would get executed, no?
> 
>   -ilia

Yikes...apparently failed at reading.  I pushed a quick fix with the
same code to bail that I put in all the other drivers:

https://cgit.freedesktop.org/mesa/mesa/commit/?id=3c3f250456623d9892042a6d296e77d359702add

I also re-checked nv50 and it seemed fine.

Thanks!
--Ken


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 108841] [RADV] SPIRV's control flow attributes do not propagate to LLVM

2019-03-20 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=108841

--- Comment #5 from Alex Smith  ---
Thanks for doing this. I'm out of the office this week so I can't get an
example right now, will get one and test the MR once I'm back.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] vl/dri3: remove the wait before getting back buffer

2019-03-20 Thread Liu, Leo
The wait here is unnecessary since we got a pool of back buffers,
and the wait for swap buffer will happen before the present pixmap,
at the same time the previous back buffer will be put back to pool
for reuse after the check for PresentIdleNotify event

Signed-off-by: Leo Liu 
---
 src/gallium/auxiliary/vl/vl_winsys_dri3.c | 18 +++---
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_winsys_dri3.c 
b/src/gallium/auxiliary/vl/vl_winsys_dri3.c
index 152d28e59fc..1558d832555 100644
--- a/src/gallium/auxiliary/vl/vl_winsys_dri3.c
+++ b/src/gallium/auxiliary/vl/vl_winsys_dri3.c
@@ -88,7 +88,6 @@ struct vl_dri3_screen
uint64_t send_sbc, recv_sbc;
int64_t last_ust, ns_frame, last_msc, next_msc;
 
-   bool flushed;
bool is_different_gpu;
 };
 
@@ -570,11 +569,9 @@ vl_dri3_flush_frontbuffer(struct pipe_screen *screen,
if (!back)
return;
 
-   if (scrn->flushed) {
-  while (scrn->special_event && scrn->recv_sbc < scrn->send_sbc)
- if (!dri3_wait_present_events(scrn))
-return;
-   }
+   while (scrn->special_event && scrn->recv_sbc < scrn->send_sbc)
+  if (!dri3_wait_present_events(scrn))
+ return;
 
rectangle.x = 0;
rectangle.y = 0;
@@ -610,8 +607,6 @@ vl_dri3_flush_frontbuffer(struct pipe_screen *screen,
 
xcb_flush(scrn->conn);
 
-   scrn->flushed = true;
-
return;
 }
 
@@ -626,13 +621,6 @@ vl_dri3_screen_texture_from_drawable(struct vl_screen 
*vscreen, void *drawable)
if (!dri3_set_drawable(scrn, (Drawable)drawable))
   return NULL;
 
-   if (scrn->flushed) {
-  while (scrn->special_event && scrn->recv_sbc < scrn->send_sbc)
- if (!dri3_wait_present_events(scrn))
-return NULL;
-   }
-   scrn->flushed = false;
-
buffer = (scrn->is_pixmap) ?
 dri3_get_front_buffer(scrn) :
 dri3_get_back_buffer(scrn);
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 110211] If DESTDIR is set to an empty string, the dri drivers are not installed

2019-03-20 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=110211

Bug ID: 110211
   Summary: If DESTDIR is set to an empty string, the dri drivers
are not installed
   Product: Mesa
   Version: 19.0
  Hardware: All
OS: All
Status: NEW
  Severity: normal
  Priority: medium
 Component: Other
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: pierre.labas...@neuf.fr
QA Contact: mesa-dev@lists.freedesktop.org

Dear maintainer,

The script "bin/install_megadrivers.py" contains the following:
---
if os.path.isabs(args.libdir):
to = os.path.join(os.environ.get('DESTDIR', '/'), args.libdir[1:])
else:
to = os.path.join(os.environ['MESON_INSTALL_DESTDIR_PREFIX'],
args.libdir)
---
The issue is when libdir is absolute, and DESTDIR is set to the empty string.
This is very likely to occur in build scripts of the form (many of us at
http://www.linuxfromscratch.org do that):
---
#PKG_DEST=/some/where
...
DESTDIR=$PKG_DEST ninja install
---
where we comment out the PKG_DEST assignment or not, depending on whether we
want a DESTDIR install or not.

Then the "to" variable becomes a relative path, and the drivers are not
installed. This is not really what is expected, I guess. I'd suggest using (but
I am far from being a Python geek):
---
if os.path.isabs(args.libdir):
to = os.path.join(os.environ.get('DESTDIR', ''), args.libdir)
---
That worked for me in all cases I have tried (may not have thought of all of
them...)

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v4 1/6] glsl/linker: location aliasing requires types to have the same width

2019-03-20 Thread Andres Gomez
On Wed, 2019-03-20 at 16:46 +0200, Andres Gomez wrote:
> On Thu, 2019-03-21 at 00:20 +1100, Timothy Arceri wrote:
> > On 20/3/19 9:31 pm, Andres Gomez wrote:
> > > On Wed, 2019-03-20 at 20:46 +1100, Timothy Arceri wrote:
> > > > On 2/2/19 5:05 am, Andres Gomez wrote:
> > > > > From: Iago Toral Quiroga 
> > > > > 
> > > > > Regarding location aliasing requirements, the OpenGL spec says:
> > > > > 
> > > > > "Further, when location aliasing, the aliases sharing the location
> > > > >  must have the same underlying numerical type  (floating-point or
> > > > >  integer)."
> > > > > 
> > > > > Khronos has further clarified that this also requires the underlying
> > > > > types to have the same width, so we can't put a float and a double
> > > > > in the same location slot for example. Future versions of the spec 
> > > > > will
> > > > > be corrected to make this clear.
> > > > 
> > > > The spec has always been very clear for example that it is ok to pack a
> > > > float with a dvec3:
> > > > 
> > > >   From section 4.4.1 (Input Layout Qualifiers) of the GLSL 4.6 spec:
> > > > 
> > > > "layout(location=8) in dvec3 h; // components 0,1,2 and 3 of location 8
> > > >   // and components 0 and 1 of location 
> > > > 9
> > > >layout(location=9, component=2) in float i; // okay, compts 2 and 3"
> > > 
> > > Mmmm ... I didn't notice that example before. I think it is just
> > > incorrect or, rather, a typo. The inline comment says that the float
> > > takes components 2 and 3. A float will count only for *1* component.
> > > Hence, the intended type there is a double, in which case the aliasing
> > > is allowed.
> > > 
> > > The spec is actually very clear just some paragraphs below:
> > > 
> > >  From Section 4.4.1 (Input Layout Qualifiers) of the GLSL 4.60 spec:
> > > 
> > >" Further, when location aliasing, the aliases sharing the location
> > >  must have the same underlying numerical type and bit
> > >  width (floating-point or integer, 32-bit versus 64-bit, etc.)
> > >  and the same auxiliary storage and interpolation
> > >  qualification. The one exception where component aliasing is
> > >  permitted is for two input variables (not block members) to a
> > >  vertex shader, which are allowed to have component aliasing. This
> > >  vertex-variable component aliasing is intended only to support
> > >  vertex shaders where each execution path accesses at most one
> > >  input per each aliased component. Implementations are permitted,
> > >  but not required, to generate link-time errors if they detect
> > >  that every path through the vertex shader executable accesses
> > >  multiple inputs aliased to any single component."
> > 
> > Yeah I noticed this when reviewing the piglit tests. It does seem we 
> > need to fix this. However rather than a custom 
> > get_numerical_sized_type() function we should be able to scrap it 
> > completely and possibly future proof the code by using:
> > 
> > glsl_base_type_get_bit_size() and glsl_base_type_is_integer() instead 
> > for the checks.
> > 
> > e.g.
> > 
> > info->is_integer = 
> > glsl_base_type_is_integer(type->without_array()->base_type);
> > info->bit_size = 
> > glsl_base_type_get_bit_size(type->without_array()->base_type);
> > 
> > And compare these instead.
> 
> I don't think this is a better option. Some of the reasons, as
> commented in my other mail in this thread, are explained in the
> previous (unfinished) review process for this patch.
> 
> Additionally, glsl_base_type_is_integer is only true for 32bit
> integers. Also, only with these 2 checks, we would be failing for
> images and samplers. Finally, we would end with quite a big comparison
> that will have to be scattered in several points of the
> check_location_aliasing function, making more difficult to understand
> the logic behind it. 

Scratch the last paragraph. I committed the mistake of checking
"is_integer" instead of "glsl_base_type_is_integer"

> This is all addressed at single point, the (already pre-existent)
> get_numerical_sized_type function, which makes this easier to
> understand and, IMHO, more future proof.

Even with the possibility of using those 2, I would rather keep the
get_numerical_sized_type to address the cases of "non-numerical"
variables beyond structs.

> 
> > > > Your going to need more information bug number etc to convince me this
> > > > change is correct.
> > > 
> > > I'll file a bug against the specs.
> > > 
> > > In the meanwhile, this is the expected behavior also in the CTS tests,
> > > which is also confirmed with the nVIDIA blob.
> > > 
> > > > > This patch amends our implementation to account for this restriction.
> > > > > 
> > > > > In the process of doing this, I also noticed that we would attempt
> > > > > to check aliasing requirements for record variables (including the 
> > > > > test
> > > > > for the numerical type) which is not allowed, instead, we 

Re: [Mesa-dev] [PATCH] softpipe: fix texture view crashes

2019-03-20 Thread Brian Paul

On 03/19/2019 09:13 PM, Dave Airlie wrote:

From: Dave Airlie 

I noticed we crashed piglit arb_texture_view-rendering-formats
when run on softpipe.

This fixes the clear tiles to use the surface format not the
underlying storage format.

This fixes a bunch of srgb piglits as well.
---
  src/gallium/drivers/softpipe/sp_tile_cache.c | 11 ++-
  1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c 
b/src/gallium/drivers/softpipe/sp_tile_cache.c
index 351736ee421..998939bdf30 100644
--- a/src/gallium/drivers/softpipe/sp_tile_cache.c
+++ b/src/gallium/drivers/softpipe/sp_tile_cache.c
@@ -373,17 +373,18 @@ sp_tile_cache_flush_clear(struct softpipe_tile_cache *tc, 
int layer)
 if (util_format_is_pure_uint(tc->surface->format)) {
pipe_put_tile_ui_format(pt, tc->transfer_map[layer],
x, y, TILE_SIZE, TILE_SIZE,
-  pt->resource->format,
+  tc->surface->format,
(unsigned *) 
tc->tile->data.colorui128);
 } else if (util_format_is_pure_sint(tc->surface->format)) {
pipe_put_tile_i_format(pt, tc->transfer_map[layer],
   x, y, TILE_SIZE, TILE_SIZE,
- pt->resource->format,
+ tc->surface->format,
   (int *) tc->tile->data.colori128);
 } else {
-  pipe_put_tile_rgba(pt, tc->transfer_map[layer],
- x, y, TILE_SIZE, TILE_SIZE,
- (float *) tc->tile->data.color);
+  pipe_put_tile_rgba_format(pt, tc->transfer_map[layer],
+x, y, TILE_SIZE, TILE_SIZE,
+tc->surface->format,
+(float *) tc->tile->data.color);
 }
  }
  numCleared++;



Reviewed-by: Brian Paul 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v4 1/6] glsl/linker: location aliasing requires types to have the same width

2019-03-20 Thread Andres Gomez
On Thu, 2019-03-21 at 00:20 +1100, Timothy Arceri wrote:
> On 20/3/19 9:31 pm, Andres Gomez wrote:
> > On Wed, 2019-03-20 at 20:46 +1100, Timothy Arceri wrote:
> > > On 2/2/19 5:05 am, Andres Gomez wrote:
> > > > From: Iago Toral Quiroga 
> > > > 
> > > > Regarding location aliasing requirements, the OpenGL spec says:
> > > > 
> > > > "Further, when location aliasing, the aliases sharing the location
> > > >  must have the same underlying numerical type  (floating-point or
> > > >  integer)."
> > > > 
> > > > Khronos has further clarified that this also requires the underlying
> > > > types to have the same width, so we can't put a float and a double
> > > > in the same location slot for example. Future versions of the spec will
> > > > be corrected to make this clear.
> > > 
> > > The spec has always been very clear for example that it is ok to pack a
> > > float with a dvec3:
> > > 
> > >   From section 4.4.1 (Input Layout Qualifiers) of the GLSL 4.6 spec:
> > > 
> > > "layout(location=8) in dvec3 h; // components 0,1,2 and 3 of location 8
> > >   // and components 0 and 1 of location 9
> > >layout(location=9, component=2) in float i; // okay, compts 2 and 3"
> > 
> > Mmmm ... I didn't notice that example before. I think it is just
> > incorrect or, rather, a typo. The inline comment says that the float
> > takes components 2 and 3. A float will count only for *1* component.
> > Hence, the intended type there is a double, in which case the aliasing
> > is allowed.
> > 
> > The spec is actually very clear just some paragraphs below:
> > 
> >  From Section 4.4.1 (Input Layout Qualifiers) of the GLSL 4.60 spec:
> > 
> >" Further, when location aliasing, the aliases sharing the location
> >  must have the same underlying numerical type and bit
> >  width (floating-point or integer, 32-bit versus 64-bit, etc.)
> >  and the same auxiliary storage and interpolation
> >  qualification. The one exception where component aliasing is
> >  permitted is for two input variables (not block members) to a
> >  vertex shader, which are allowed to have component aliasing. This
> >  vertex-variable component aliasing is intended only to support
> >  vertex shaders where each execution path accesses at most one
> >  input per each aliased component. Implementations are permitted,
> >  but not required, to generate link-time errors if they detect
> >  that every path through the vertex shader executable accesses
> >  multiple inputs aliased to any single component."
> 
> Yeah I noticed this when reviewing the piglit tests. It does seem we 
> need to fix this. However rather than a custom 
> get_numerical_sized_type() function we should be able to scrap it 
> completely and possibly future proof the code by using:
> 
> glsl_base_type_get_bit_size() and glsl_base_type_is_integer() instead 
> for the checks.
> 
> e.g.
> 
> info->is_integer = 
> glsl_base_type_is_integer(type->without_array()->base_type);
> info->bit_size = 
> glsl_base_type_get_bit_size(type->without_array()->base_type);
> 
> And compare these instead.

I don't think this is a better option. Some of the reasons, as
commented in my other mail in this thread, are explained in the
previous (unfinished) review process for this patch.

Additionally, glsl_base_type_is_integer is only true for 32bit
integers. Also, only with these 2 checks, we would be failing for
images and samplers. Finally, we would end with quite a big comparison
that will have to be scattered in several points of the
check_location_aliasing function, making more difficult to understand
the logic behind it. 

This is all addressed at single point, the (already pre-existent)
get_numerical_sized_type function, which makes this easier to
understand and, IMHO, more future proof.

> 
> > > Your going to need more information bug number etc to convince me this
> > > change is correct.
> > 
> > I'll file a bug against the specs.
> > 
> > In the meanwhile, this is the expected behavior also in the CTS tests,
> > which is also confirmed with the nVIDIA blob.
> > 
> > > > This patch amends our implementation to account for this restriction.
> > > > 
> > > > In the process of doing this, I also noticed that we would attempt
> > > > to check aliasing requirements for record variables (including the test
> > > > for the numerical type) which is not allowed, instead, we should be
> > > > producing a linker error as soon as we see any attempt to do location
> > > > aliasing on non-numerical variables. For the particular case of structs,
> > > > we were producing a linker error in this case, but only because we
> > > > assumed that struct fields use all components in each location, so
> > > > any attempt to alias locations consumed by struct fields would produce
> > > > a link error due to component aliasing, which is not accurate of the
> > > > actual problem. This patch would make it produce an error for 

Re: [Mesa-dev] [PATCH v4 1/6] glsl/linker: location aliasing requires types to have the same width

2019-03-20 Thread Andres Gomez
On Wed, 2019-03-20 at 20:46 +1100, Timothy Arceri wrote:
> On 2/2/19 5:05 am, Andres Gomez wrote:
> > From: Iago Toral Quiroga 
> > 
> > Regarding location aliasing requirements, the OpenGL spec says:
> > 
> >"Further, when location aliasing, the aliases sharing the location
> > must have the same underlying numerical type  (floating-point or
> > integer)."
> > 
> > Khronos has further clarified that this also requires the underlying
> > types to have the same width, so we can't put a float and a double
> > in the same location slot for example. Future versions of the spec will
> > be corrected to make this clear.
> 
> The spec has always been very clear for example that it is ok to pack a 
> float with a dvec3:
> 
>  From section 4.4.1 (Input Layout Qualifiers) of the GLSL 4.6 spec:
> 
> "layout(location=8) in dvec3 h; // components 0,1,2 and 3 of location 8
>  // and components 0 and 1 of location 9
>   layout(location=9, component=2) in float i; // okay, compts 2 and 3"
> 
> 
> Your going to need more information bug number etc to convince me this 
> change is correct.
> 
> > This patch amends our implementation to account for this restriction.
> > 
> > In the process of doing this, I also noticed that we would attempt
> > to check aliasing requirements for record variables (including the test
> > for the numerical type) which is not allowed, instead, we should be
> > producing a linker error as soon as we see any attempt to do location
> > aliasing on non-numerical variables. For the particular case of structs,
> > we were producing a linker error in this case, but only because we
> > assumed that struct fields use all components in each location, so
> > any attempt to alias locations consumed by struct fields would produce
> > a link error due to component aliasing, which is not accurate of the
> > actual problem. This patch would make it produce an error for attempting
> > to alias a non-numerical variable instead, which is always accurate.
> 
> None of this should be needed at all. It is an error to use 
> location/component layouts on struct members.
> 
> "It is a compile-time error to use a location qualifier on a member of a 
> structure."
> 
> "It is a compile-time error to use *component* without also specifying 
> *location*"
> 
> So depending on the component aliasing check is sufficient. If you 
> really want to add a better error message then see my comments below.

I believe this is already answered in the previous (unfinished) review
process through which this patch went through. Specifically, the 2nd
review that gave place to the v3 patch. You can check it here:

https://lists.freedesktop.org/archives/mesa-dev/2017-November/175366.html
https://lists.freedesktop.org/archives/mesa-dev/2017-November/175573.html
https://lists.freedesktop.org/archives/mesa-dev/2017-November/175705.html


> > v2:
> >- Do not assert if we see invalid numerical types. These come
> >  straight from shader code, so we should produce linker errors if
> >  shaders attempt to do location aliasing on variables that are not
> >  numerical such as records.
> >- While we are at it, improve error reporting for the case of
> >  numerical type mismatch to include the shader stage.
> > 
> > v3:
> >- Allow location aliasing of images and samplers. If we get these
> >  it means bindless support is active and they should be handled
> >  as 64-bit integers (Ilia)
> >- Make sure we produce link errors for any non-numerical type
> >  for which we attempt location aliasing, not just structs.
> > 
> > v4:
> >- Rebased with minor fixes (Andres).
> >- Added fixing tag to the commit log (Andres).
> > 
> > Fixes: 13652e7516a ("glsl/linker: Fix type checks for location aliasing")
> > Cc: Ilia Mirkin 
> > Signed-off-by: Andres Gomez 
> > ---
> >   src/compiler/glsl/link_varyings.cpp | 64 +
> >   1 file changed, 46 insertions(+), 18 deletions(-)
> > 
> > diff --git a/src/compiler/glsl/link_varyings.cpp 
> > b/src/compiler/glsl/link_varyings.cpp
> > index 3969c0120b3..3f41832ac93 100644
> > --- a/src/compiler/glsl/link_varyings.cpp
> > +++ b/src/compiler/glsl/link_varyings.cpp
> > @@ -424,15 +424,15 @@ compute_variable_location_slot(ir_variable *var, 
> > gl_shader_stage stage)
> >   
> >   struct explicit_location_info {
> >  ir_variable *var;
> > -   unsigned numerical_type;
> > +   int numerical_type;
> >  unsigned interpolation;
> >  bool centroid;
> >  bool sample;
> >  bool patch;
> >   };
> >   
> > -static inline unsigned
> > -get_numerical_type(const glsl_type *type)
> > +static inline int
> > +get_numerical_sized_type(const glsl_type *type)
> >   {
> >  /* From the OpenGL 4.6 spec, section 4.4.1 Input Layout Qualifiers, 
> > Page 68,
> >   * (Location aliasing):
> > @@ -440,10 +440,25 @@ get_numerical_type(const glsl_type *type)
> >   *"Further, when location aliasing, the 

Re: [Mesa-dev] [PATCH] nir: Constant values are per-column not per-component

2019-03-20 Thread Karol Herbst
Reviewed-by: Karol Herbst 

On Wed, Mar 20, 2019 at 1:22 PM Lionel Landwerlin
 wrote:
>
> Reviewed-by: Lionel Landwerlin 
>
> On 19/03/2019 19:15, Jason Ekstrand wrote:
> > ---
> >   src/compiler/nir/nir.h | 3 ++-
> >   1 file changed, 2 insertions(+), 1 deletion(-)
> >
> > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> > index 67304af1d64..e4f012809e5 100644
> > --- a/src/compiler/nir/nir.h
> > +++ b/src/compiler/nir/nir.h
> > @@ -59,6 +59,7 @@ extern "C" {
> >   #define NIR_FALSE 0u
> >   #define NIR_TRUE (~0u)
> >   #define NIR_MAX_VEC_COMPONENTS 4
> > +#define NIR_MAX_MATRIX_COLUMNS 4
> >   typedef uint8_t nir_component_mask_t;
> >
> >   /** Defines a cast function
> > @@ -141,7 +142,7 @@ typedef struct nir_constant {
> >   * by the type associated with the \c nir_variable.  Constants may be
> >   * scalars, vectors, or matrices.
> >   */
> > -   nir_const_value values[NIR_MAX_VEC_COMPONENTS];
> > +   nir_const_value values[NIR_MAX_MATRIX_COLUMNS];
> >
> >  /* we could get this from the var->type but makes clone *much* easier 
> > to
> >   * not have to care about the type.
>
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] gallium: Add PIPE_BARRIER_UPDATE_BUFFER and UPDATE_TEXTURE bits.

2019-03-20 Thread Ilia Mirkin
On Wed, Mar 6, 2019 at 3:32 AM Kenneth Graunke  wrote:
>
> The glMemoryBarrier() function makes shader memory stores ordered with
> respect to things specified by the given bits.  Until now, st/mesa has
> ignored GL_TEXTURE_UPDATE_BARRIER_BIT and GL_BUFFER_UPDATE_BARRIER_BIT,
> saying that drivers should implicitly perform the needed flushing.
>
> This seems like a pretty big assumption to make.  Instead, this commit
> opts to translate them to new PIPE_BARRIER bits, and adjusts existing
> drivers to continue ignoring them (preserving the current behavior).
>
> The i965 driver performs actions on these memory barriers.  Shader
> memory stores go through a "data cache" which is separate from the
> render cache and other read caches (like the texture cache).  All
> memory barriers need to flush the data cache (to ensure shader memory
> stores are visible), and possibly invalidate read caches (to ensure
> stale data is no longer visible).  The driver implicitly flushes for
> most caches, but not for data cache, since ARB_shader_image_load_store
> introduced MemoryBarrier() precisely to order these explicitly.
>
> I would like to follow i965's approach in iris, flushing the data cache
> on any MemoryBarrier() call, so I need st/mesa to actually call the
> pipe->memory_barrier() callback.
> ---
>  .../drivers/freedreno/freedreno_context.c |  3 ++
>  src/gallium/drivers/r600/r600_state_common.c  |  4 +++
>  src/gallium/drivers/radeonsi/si_state.c   |  3 ++
>  src/gallium/drivers/softpipe/sp_flush.c   |  3 ++
>  src/gallium/drivers/tegra/tegra_context.c |  3 ++
>  src/gallium/drivers/v3d/v3d_context.c |  3 ++
>  src/gallium/include/pipe/p_defines.h  |  7 +++-
>  src/mesa/state_tracker/st_cb_texturebarrier.c | 34 +++
>  8 files changed, 44 insertions(+), 16 deletions(-)
>
> I am curious to hear people's thoughts on this.  It seems useful for the
> driver to receive a memory_barrier() call...and adding a few bits seemed
> to be the cleanest way to make that happen.  But I'm open to ideas.
>
> There are no nouveau changes in this patch, but that's only because none
> appeared to be necessary.  Most drivers performed some kind of flush on
> any memory_barrier() call, regardless of the bits - but nouveau's hooks
> don't.  So the newly added case would already be a no-op.

I didn't go back to check the code earlier, but just saw the pushed
patch, forgot this comment, and decided to check. Looks like

https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nvc0_context.c#n90

would get executed, no?

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v4 1/6] glsl/linker: location aliasing requires types to have the same width

2019-03-20 Thread Timothy Arceri

On 20/3/19 9:31 pm, Andres Gomez wrote:

On Wed, 2019-03-20 at 20:46 +1100, Timothy Arceri wrote:

On 2/2/19 5:05 am, Andres Gomez wrote:

From: Iago Toral Quiroga 

Regarding location aliasing requirements, the OpenGL spec says:

"Further, when location aliasing, the aliases sharing the location
 must have the same underlying numerical type  (floating-point or
 integer)."

Khronos has further clarified that this also requires the underlying
types to have the same width, so we can't put a float and a double
in the same location slot for example. Future versions of the spec will
be corrected to make this clear.


The spec has always been very clear for example that it is ok to pack a
float with a dvec3:

  From section 4.4.1 (Input Layout Qualifiers) of the GLSL 4.6 spec:

"layout(location=8) in dvec3 h; // components 0,1,2 and 3 of location 8
  // and components 0 and 1 of location 9
   layout(location=9, component=2) in float i; // okay, compts 2 and 3"


Mmmm ... I didn't notice that example before. I think it is just
incorrect or, rather, a typo. The inline comment says that the float
takes components 2 and 3. A float will count only for *1* component.
Hence, the intended type there is a double, in which case the aliasing
is allowed.

The spec is actually very clear just some paragraphs below:

 From Section 4.4.1 (Input Layout Qualifiers) of the GLSL 4.60 spec:

   " Further, when location aliasing, the aliases sharing the location
 must have the same underlying numerical type and bit
 width (floating-point or integer, 32-bit versus 64-bit, etc.)
 and the same auxiliary storage and interpolation
 qualification. The one exception where component aliasing is
 permitted is for two input variables (not block members) to a
 vertex shader, which are allowed to have component aliasing. This
 vertex-variable component aliasing is intended only to support
 vertex shaders where each execution path accesses at most one
 input per each aliased component. Implementations are permitted,
 but not required, to generate link-time errors if they detect
 that every path through the vertex shader executable accesses
 multiple inputs aliased to any single component."


Yeah I noticed this when reviewing the piglit tests. It does seem we 
need to fix this. However rather than a custom 
get_numerical_sized_type() function we should be able to scrap it 
completely and possibly future proof the code by using:


glsl_base_type_get_bit_size() and glsl_base_type_is_integer() instead 
for the checks.


e.g.

info->is_integer = 
glsl_base_type_is_integer(type->without_array()->base_type);
info->bit_size = 
glsl_base_type_get_bit_size(type->without_array()->base_type);


And compare these instead.




Your going to need more information bug number etc to convince me this
change is correct.


I'll file a bug against the specs.

In the meanwhile, this is the expected behavior also in the CTS tests,
which is also confirmed with the nVIDIA blob.




This patch amends our implementation to account for this restriction.

In the process of doing this, I also noticed that we would attempt
to check aliasing requirements for record variables (including the test
for the numerical type) which is not allowed, instead, we should be
producing a linker error as soon as we see any attempt to do location
aliasing on non-numerical variables. For the particular case of structs,
we were producing a linker error in this case, but only because we
assumed that struct fields use all components in each location, so
any attempt to alias locations consumed by struct fields would produce
a link error due to component aliasing, which is not accurate of the
actual problem. This patch would make it produce an error for attempting
to alias a non-numerical variable instead, which is always accurate.


None of this should be needed at all. It is an error to use
location/component layouts on struct members.

"It is a compile-time error to use a location qualifier on a member of a
structure."

"It is a compile-time error to use *component* without also specifying
*location*"

So depending on the component aliasing check is sufficient. If you
really want to add a better error message then see my comments below.



v2:
- Do not assert if we see invalid numerical types. These come
  straight from shader code, so we should produce linker errors if
  shaders attempt to do location aliasing on variables that are not
  numerical such as records.
- While we are at it, improve error reporting for the case of
  numerical type mismatch to include the shader stage.

v3:
- Allow location aliasing of images and samplers. If we get these
  it means bindless support is active and they should be handled
  as 64-bit integers (Ilia)
- Make sure we produce link errors for any non-numerical type
  for which we attempt location aliasing, not just structs.

Re: [Mesa-dev] [PATCH 1/1] nir: add a pass that removes continue blocks

2019-03-20 Thread Timothy Arceri



On 20/3/19 9:41 pm, Samuel Pitoiset wrote:

28717 shaders in 14931 tests
Totals:
SGPRS: 1267317 -> 1267549 (0.02 %)
VGPRS: 896876 -> 895920 (-0.11 %)
Spilled SGPRs: 24701 -> 26367 (6.74 %)
Code Size: 48379452 -> 48507880 (0.27 %) bytes
Max Waves: 241159 -> 241190 (0.01 %)

Totals from affected shaders:
SGPRS: 23584 -> 23816 (0.98 %)
VGPRS: 25908 -> 24952 (-3.69 %)
Spilled SGPRs: 503 -> 2169 (331.21 %)
Code Size: 2471392 -> 2599820 (5.20 %) bytes
Max Waves: 586 -> 617 (5.29 %)

The codesize increases is related to Wolfenstein II, maybe it has
too many continue blocks and we should add an arbitrary limit.

This gives +10% FPS with Doom on my Vega56.

Signed-off-by: Samuel Pitoiset 
---
  src/compiler/nir/nir_opt_if.c | 87 +++
  1 file changed, 87 insertions(+)

diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c
index bc128f79f3c..47a8a65aad3 100644
--- a/src/compiler/nir/nir_opt_if.c
+++ b/src/compiler/nir/nir_opt_if.c
@@ -890,6 +890,92 @@ opt_if_loop_last_continue(nir_loop *loop)
 return true;
  }
  
+/**

+ * This optimization allows to remove continue blocks by moving the rest of the
+ * loop to the other side. This is only applied if continue blocks are in the
+ * top-level control flow of the loop. This turns:
+ *
+ *if (cond) {
+ *   ... code ...
+ *   continue
+ *} else {
+ *}
+ *... code ...
+ *
+ * into:
+ *
+ *if (cond) {
+ *   ... code ...
+ *   continue
+ *} else {
+ *   ... code ...
+ *}
+ */
+static bool
+opt_if_loop_remove_continue(nir_loop *loop)
+{
+   nir_block *header_block = nir_loop_first_block(loop);
+   nir_block *bottom_block = nir_loop_last_block(loop);
+   nir_block *prev_block =
+  nir_cf_node_as_block(nir_cf_node_prev(>cf_node));
+
+   /* It would be insane if this were not true */
+   assert(_mesa_set_search(header_block->predecessors, prev_block));
+
+   /* The loop must have, at least, two continue blocks (including the normal
+* continue at the end of loop).
+*/
+   if (header_block->predecessors->entries < 3)
+  return false;
+
+   /* Scan the control flow of the loop from the last to the first node, and
+* optimize when an if block contains a continue.
+*/
+   nir_cf_node *last_node = _block->cf_node;
+   nir_cf_node *first_node = _block->cf_node;
+
+   while (last_node != first_node) {
+  last_node = nir_cf_node_prev(last_node);
+  if (last_node->type != nir_cf_node_if)
+ continue;
+
+  nir_if *nif = nir_cf_node_as_if(last_node);
+  nir_block *then_block = nir_if_last_then_block(nif);
+
+  /* Nothing to do if the block isn't a continue block. */
+  nir_instr *instr = nir_block_last_instr(then_block);
+  if (!instr ||
+  instr->type != nir_instr_type_jump ||
+  nir_instr_as_jump(instr)->type != nir_jump_continue)
+ continue;


We also must check the other branch for a jump. Otherwise we risk changing:

if (cond) {
   ... code ...
   continue
} else {
   break;
}
... code ...

 into:

if (cond) {
   ... code ...
   continue
} else {
   break;
   ... code ...
}

It would be good if you could test out this branch [1] to see how it 
compares with this patch, it does the check and like the existing 
opt_if_loop_last_continue() also handles the alternate case of:


if (cond) {
   ... code ...
} else {
   ... code ...
   continue
}
... code ...


[1] https://gitlab.freedesktop.org/tarceri/mesa/commits/continue_opt


+
+  /* Get the block immediately following the if statement. */
+  nir_block *after_if_block =
+ nir_cf_node_as_block(nir_cf_node_next(>cf_node));
+
+  /* Nothing to do if the block following the if statement is empty. */
+  if (is_block_empty(after_if_block))
+ continue;
+
+  nir_cf_node *if_first_node = _if_block->cf_node;
+  nir_cf_node *if_last_node = _block->cf_node;
+  nir_block *else_block = nir_if_last_else_block(nif);
+
+  /* Move all nodes following the if statement to the last else block, in
+   * case there is complicated control flow.
+   */
+  nir_cf_list tmp;
+  nir_cf_extract(, nir_before_cf_node(if_first_node),
+   nir_after_cf_node(if_last_node));
+  nir_cf_reinsert(, nir_after_block(else_block));
+  nir_cf_delete();
+
+  return true;
+   }
+
+   return false;
+}
+
  /* Walk all the phis in the block immediately following the if statement and
   * swap the blocks.
   */
@@ -1355,6 +1441,7 @@ opt_if_cf_list(nir_builder *b, struct exec_list *cf_list)
   progress |= opt_simplify_bcsel_of_phi(b, loop);
   progress |= opt_peel_loop_initial_if(loop);
   progress |= opt_if_loop_last_continue(loop);
+ progress |= opt_if_loop_remove_continue(loop);
   break;
}
  


___
mesa-dev mailing list

Re: [Mesa-dev] [PATCH] nir: Constant values are per-column not per-component

2019-03-20 Thread Lionel Landwerlin

Reviewed-by: Lionel Landwerlin 

On 19/03/2019 19:15, Jason Ekstrand wrote:

---
  src/compiler/nir/nir.h | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 67304af1d64..e4f012809e5 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -59,6 +59,7 @@ extern "C" {
  #define NIR_FALSE 0u
  #define NIR_TRUE (~0u)
  #define NIR_MAX_VEC_COMPONENTS 4
+#define NIR_MAX_MATRIX_COLUMNS 4
  typedef uint8_t nir_component_mask_t;
  
  /** Defines a cast function

@@ -141,7 +142,7 @@ typedef struct nir_constant {
  * by the type associated with the \c nir_variable.  Constants may be
  * scalars, vectors, or matrices.
  */
-   nir_const_value values[NIR_MAX_VEC_COMPONENTS];
+   nir_const_value values[NIR_MAX_MATRIX_COLUMNS];
  
 /* we could get this from the var->type but makes clone *much* easier to

  * not have to care about the type.



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/1] nir: add a pass that removes continue blocks

2019-03-20 Thread Samuel Pitoiset


On 3/20/19 11:47 AM, Timothy Arceri wrote:

On 20/3/19 9:41 pm, Samuel Pitoiset wrote:

28717 shaders in 14931 tests
Totals:
SGPRS: 1267317 -> 1267549 (0.02 %)
VGPRS: 896876 -> 895920 (-0.11 %)
Spilled SGPRs: 24701 -> 26367 (6.74 %)
Code Size: 48379452 -> 48507880 (0.27 %) bytes
Max Waves: 241159 -> 241190 (0.01 %)

Totals from affected shaders:
SGPRS: 23584 -> 23816 (0.98 %)
VGPRS: 25908 -> 24952 (-3.69 %)
Spilled SGPRs: 503 -> 2169 (331.21 %)
Code Size: 2471392 -> 2599820 (5.20 %) bytes
Max Waves: 586 -> 617 (5.29 %)

The codesize increases is related to Wolfenstein II, maybe it has
too many continue blocks and we should add an arbitrary limit.

This gives +10% FPS with Doom on my Vega56.


Nice find. However as discussed on IRC this is just an unrestricted 
version of opt_if_loop_last_continue(), we should merge them but I 
would also like time to test this again with shader-db. The reason I 
restricted opt_if_loop_last_continue() was because it was causing 
extra register pressure in shaders. Its late so I wont be able to test 
until tomorrow.


Yes, no rush.





Signed-off-by: Samuel Pitoiset 
---
  src/compiler/nir/nir_opt_if.c | 87 +++
  1 file changed, 87 insertions(+)

diff --git a/src/compiler/nir/nir_opt_if.c 
b/src/compiler/nir/nir_opt_if.c

index bc128f79f3c..47a8a65aad3 100644
--- a/src/compiler/nir/nir_opt_if.c
+++ b/src/compiler/nir/nir_opt_if.c
@@ -890,6 +890,92 @@ opt_if_loop_last_continue(nir_loop *loop)
 return true;
  }
  +/**
+ * This optimization allows to remove continue blocks by moving the 
rest of the
+ * loop to the other side. This is only applied if continue blocks 
are in the

+ * top-level control flow of the loop. This turns:
+ *
+ *    if (cond) {
+ *   ... code ...
+ *   continue
+ *    } else {
+ *    }
+ *    ... code ...
+ *
+ * into:
+ *
+ *    if (cond) {
+ *   ... code ...
+ *   continue
+ *    } else {
+ *   ... code ...
+ *    }
+ */
+static bool
+opt_if_loop_remove_continue(nir_loop *loop)
+{
+   nir_block *header_block = nir_loop_first_block(loop);
+   nir_block *bottom_block = nir_loop_last_block(loop);
+   nir_block *prev_block =
+ nir_cf_node_as_block(nir_cf_node_prev(>cf_node));
+
+   /* It would be insane if this were not true */
+   assert(_mesa_set_search(header_block->predecessors, prev_block));
+
+   /* The loop must have, at least, two continue blocks (including 
the normal

+    * continue at the end of loop).
+    */
+   if (header_block->predecessors->entries < 3)
+  return false;
+
+   /* Scan the control flow of the loop from the last to the first 
node, and

+    * optimize when an if block contains a continue.
+    */
+   nir_cf_node *last_node = _block->cf_node;
+   nir_cf_node *first_node = _block->cf_node;
+
+   while (last_node != first_node) {
+  last_node = nir_cf_node_prev(last_node);
+  if (last_node->type != nir_cf_node_if)
+ continue;
+
+  nir_if *nif = nir_cf_node_as_if(last_node);
+  nir_block *then_block = nir_if_last_then_block(nif);
+
+  /* Nothing to do if the block isn't a continue block. */
+  nir_instr *instr = nir_block_last_instr(then_block);
+  if (!instr ||
+  instr->type != nir_instr_type_jump ||
+  nir_instr_as_jump(instr)->type != nir_jump_continue)
+ continue;
+
+  /* Get the block immediately following the if statement. */
+  nir_block *after_if_block =
+ nir_cf_node_as_block(nir_cf_node_next(>cf_node));
+
+  /* Nothing to do if the block following the if statement is 
empty. */

+  if (is_block_empty(after_if_block))
+ continue;
+
+  nir_cf_node *if_first_node = _if_block->cf_node;
+  nir_cf_node *if_last_node = _block->cf_node;
+  nir_block *else_block = nir_if_last_else_block(nif);
+
+  /* Move all nodes following the if statement to the last else 
block, in

+   * case there is complicated control flow.
+   */
+  nir_cf_list tmp;
+  nir_cf_extract(, nir_before_cf_node(if_first_node),
+   nir_after_cf_node(if_last_node));
+  nir_cf_reinsert(, nir_after_block(else_block));
+  nir_cf_delete();
+
+  return true;
+   }
+
+   return false;
+}
+
  /* Walk all the phis in the block immediately following the if 
statement and

   * swap the blocks.
   */
@@ -1355,6 +1441,7 @@ opt_if_cf_list(nir_builder *b, struct exec_list 
*cf_list)

   progress |= opt_simplify_bcsel_of_phi(b, loop);
   progress |= opt_peel_loop_initial_if(loop);
   progress |= opt_if_loop_last_continue(loop);
+ progress |= opt_if_loop_remove_continue(loop);
   break;
    }


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/1] nir: add a pass that removes continue blocks

2019-03-20 Thread Timothy Arceri

On 20/3/19 9:41 pm, Samuel Pitoiset wrote:

28717 shaders in 14931 tests
Totals:
SGPRS: 1267317 -> 1267549 (0.02 %)
VGPRS: 896876 -> 895920 (-0.11 %)
Spilled SGPRs: 24701 -> 26367 (6.74 %)
Code Size: 48379452 -> 48507880 (0.27 %) bytes
Max Waves: 241159 -> 241190 (0.01 %)

Totals from affected shaders:
SGPRS: 23584 -> 23816 (0.98 %)
VGPRS: 25908 -> 24952 (-3.69 %)
Spilled SGPRs: 503 -> 2169 (331.21 %)
Code Size: 2471392 -> 2599820 (5.20 %) bytes
Max Waves: 586 -> 617 (5.29 %)

The codesize increases is related to Wolfenstein II, maybe it has
too many continue blocks and we should add an arbitrary limit.

This gives +10% FPS with Doom on my Vega56.


Nice find. However as discussed on IRC this is just an unrestricted 
version of opt_if_loop_last_continue(), we should merge them but I would 
also like time to test this again with shader-db. The reason I 
restricted opt_if_loop_last_continue() was because it was causing extra 
register pressure in shaders. Its late so I wont be able to test until 
tomorrow.




Signed-off-by: Samuel Pitoiset 
---
  src/compiler/nir/nir_opt_if.c | 87 +++
  1 file changed, 87 insertions(+)

diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c
index bc128f79f3c..47a8a65aad3 100644
--- a/src/compiler/nir/nir_opt_if.c
+++ b/src/compiler/nir/nir_opt_if.c
@@ -890,6 +890,92 @@ opt_if_loop_last_continue(nir_loop *loop)
 return true;
  }
  
+/**

+ * This optimization allows to remove continue blocks by moving the rest of the
+ * loop to the other side. This is only applied if continue blocks are in the
+ * top-level control flow of the loop. This turns:
+ *
+ *if (cond) {
+ *   ... code ...
+ *   continue
+ *} else {
+ *}
+ *... code ...
+ *
+ * into:
+ *
+ *if (cond) {
+ *   ... code ...
+ *   continue
+ *} else {
+ *   ... code ...
+ *}
+ */
+static bool
+opt_if_loop_remove_continue(nir_loop *loop)
+{
+   nir_block *header_block = nir_loop_first_block(loop);
+   nir_block *bottom_block = nir_loop_last_block(loop);
+   nir_block *prev_block =
+  nir_cf_node_as_block(nir_cf_node_prev(>cf_node));
+
+   /* It would be insane if this were not true */
+   assert(_mesa_set_search(header_block->predecessors, prev_block));
+
+   /* The loop must have, at least, two continue blocks (including the normal
+* continue at the end of loop).
+*/
+   if (header_block->predecessors->entries < 3)
+  return false;
+
+   /* Scan the control flow of the loop from the last to the first node, and
+* optimize when an if block contains a continue.
+*/
+   nir_cf_node *last_node = _block->cf_node;
+   nir_cf_node *first_node = _block->cf_node;
+
+   while (last_node != first_node) {
+  last_node = nir_cf_node_prev(last_node);
+  if (last_node->type != nir_cf_node_if)
+ continue;
+
+  nir_if *nif = nir_cf_node_as_if(last_node);
+  nir_block *then_block = nir_if_last_then_block(nif);
+
+  /* Nothing to do if the block isn't a continue block. */
+  nir_instr *instr = nir_block_last_instr(then_block);
+  if (!instr ||
+  instr->type != nir_instr_type_jump ||
+  nir_instr_as_jump(instr)->type != nir_jump_continue)
+ continue;
+
+  /* Get the block immediately following the if statement. */
+  nir_block *after_if_block =
+ nir_cf_node_as_block(nir_cf_node_next(>cf_node));
+
+  /* Nothing to do if the block following the if statement is empty. */
+  if (is_block_empty(after_if_block))
+ continue;
+
+  nir_cf_node *if_first_node = _if_block->cf_node;
+  nir_cf_node *if_last_node = _block->cf_node;
+  nir_block *else_block = nir_if_last_else_block(nif);
+
+  /* Move all nodes following the if statement to the last else block, in
+   * case there is complicated control flow.
+   */
+  nir_cf_list tmp;
+  nir_cf_extract(, nir_before_cf_node(if_first_node),
+   nir_after_cf_node(if_last_node));
+  nir_cf_reinsert(, nir_after_block(else_block));
+  nir_cf_delete();
+
+  return true;
+   }
+
+   return false;
+}
+
  /* Walk all the phis in the block immediately following the if statement and
   * swap the blocks.
   */
@@ -1355,6 +1441,7 @@ opt_if_cf_list(nir_builder *b, struct exec_list *cf_list)
   progress |= opt_simplify_bcsel_of_phi(b, loop);
   progress |= opt_peel_loop_initial_if(loop);
   progress |= opt_if_loop_last_continue(loop);
+ progress |= opt_if_loop_remove_continue(loop);
   break;
}
  


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 0/1] nir: new pass that removes continue blocks (+10% FPS with Doom on RADV)

2019-03-20 Thread Samuel Pitoiset
Hi,

I wrote that NIR pass few weeks ago when I was trying to improve performance
with Shadow Of The Tomb Raider, which has a bunch of complex loops.

While this pass doesn't really improve SotTR, it gives a huge boost
with DOOM (and probably DOOM VFR), +10% FPS on my Vega 56.

I have no ideas why LLVM isn't able to improve that itself, but I think
it might good to have this optimization directly in NIR.

Please review,
Thanks!

Samuel Pitoiset (1):
  nir: add a pass that removes continue blocks

 src/compiler/nir/nir_opt_if.c | 87 +++
 1 file changed, 87 insertions(+)

-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/1] nir: add a pass that removes continue blocks

2019-03-20 Thread Samuel Pitoiset
28717 shaders in 14931 tests
Totals:
SGPRS: 1267317 -> 1267549 (0.02 %)
VGPRS: 896876 -> 895920 (-0.11 %)
Spilled SGPRs: 24701 -> 26367 (6.74 %)
Code Size: 48379452 -> 48507880 (0.27 %) bytes
Max Waves: 241159 -> 241190 (0.01 %)

Totals from affected shaders:
SGPRS: 23584 -> 23816 (0.98 %)
VGPRS: 25908 -> 24952 (-3.69 %)
Spilled SGPRs: 503 -> 2169 (331.21 %)
Code Size: 2471392 -> 2599820 (5.20 %) bytes
Max Waves: 586 -> 617 (5.29 %)

The codesize increases is related to Wolfenstein II, maybe it has
too many continue blocks and we should add an arbitrary limit.

This gives +10% FPS with Doom on my Vega56.

Signed-off-by: Samuel Pitoiset 
---
 src/compiler/nir/nir_opt_if.c | 87 +++
 1 file changed, 87 insertions(+)

diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c
index bc128f79f3c..47a8a65aad3 100644
--- a/src/compiler/nir/nir_opt_if.c
+++ b/src/compiler/nir/nir_opt_if.c
@@ -890,6 +890,92 @@ opt_if_loop_last_continue(nir_loop *loop)
return true;
 }
 
+/**
+ * This optimization allows to remove continue blocks by moving the rest of the
+ * loop to the other side. This is only applied if continue blocks are in the
+ * top-level control flow of the loop. This turns:
+ *
+ *if (cond) {
+ *   ... code ...
+ *   continue
+ *} else {
+ *}
+ *... code ...
+ *
+ * into:
+ *
+ *if (cond) {
+ *   ... code ...
+ *   continue
+ *} else {
+ *   ... code ...
+ *}
+ */
+static bool
+opt_if_loop_remove_continue(nir_loop *loop)
+{
+   nir_block *header_block = nir_loop_first_block(loop);
+   nir_block *bottom_block = nir_loop_last_block(loop);
+   nir_block *prev_block =
+  nir_cf_node_as_block(nir_cf_node_prev(>cf_node));
+
+   /* It would be insane if this were not true */
+   assert(_mesa_set_search(header_block->predecessors, prev_block));
+
+   /* The loop must have, at least, two continue blocks (including the normal
+* continue at the end of loop).
+*/
+   if (header_block->predecessors->entries < 3)
+  return false;
+
+   /* Scan the control flow of the loop from the last to the first node, and
+* optimize when an if block contains a continue.
+*/
+   nir_cf_node *last_node = _block->cf_node;
+   nir_cf_node *first_node = _block->cf_node;
+
+   while (last_node != first_node) {
+  last_node = nir_cf_node_prev(last_node);
+  if (last_node->type != nir_cf_node_if)
+ continue;
+
+  nir_if *nif = nir_cf_node_as_if(last_node);
+  nir_block *then_block = nir_if_last_then_block(nif);
+
+  /* Nothing to do if the block isn't a continue block. */
+  nir_instr *instr = nir_block_last_instr(then_block);
+  if (!instr ||
+  instr->type != nir_instr_type_jump ||
+  nir_instr_as_jump(instr)->type != nir_jump_continue)
+ continue;
+
+  /* Get the block immediately following the if statement. */
+  nir_block *after_if_block =
+ nir_cf_node_as_block(nir_cf_node_next(>cf_node));
+
+  /* Nothing to do if the block following the if statement is empty. */
+  if (is_block_empty(after_if_block))
+ continue;
+
+  nir_cf_node *if_first_node = _if_block->cf_node;
+  nir_cf_node *if_last_node = _block->cf_node;
+  nir_block *else_block = nir_if_last_else_block(nif);
+
+  /* Move all nodes following the if statement to the last else block, in
+   * case there is complicated control flow.
+   */
+  nir_cf_list tmp;
+  nir_cf_extract(, nir_before_cf_node(if_first_node),
+   nir_after_cf_node(if_last_node));
+  nir_cf_reinsert(, nir_after_block(else_block));
+  nir_cf_delete();
+
+  return true;
+   }
+
+   return false;
+}
+
 /* Walk all the phis in the block immediately following the if statement and
  * swap the blocks.
  */
@@ -1355,6 +1441,7 @@ opt_if_cf_list(nir_builder *b, struct exec_list *cf_list)
  progress |= opt_simplify_bcsel_of_phi(b, loop);
  progress |= opt_peel_loop_initial_if(loop);
  progress |= opt_if_loop_last_continue(loop);
+ progress |= opt_if_loop_remove_continue(loop);
  break;
   }
 
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v4 1/6] glsl/linker: location aliasing requires types to have the same width

2019-03-20 Thread Andres Gomez
On Wed, 2019-03-20 at 20:46 +1100, Timothy Arceri wrote:
> On 2/2/19 5:05 am, Andres Gomez wrote:
> > From: Iago Toral Quiroga 
> > 
> > Regarding location aliasing requirements, the OpenGL spec says:
> > 
> >"Further, when location aliasing, the aliases sharing the location
> > must have the same underlying numerical type  (floating-point or
> > integer)."
> > 
> > Khronos has further clarified that this also requires the underlying
> > types to have the same width, so we can't put a float and a double
> > in the same location slot for example. Future versions of the spec will
> > be corrected to make this clear.
> 
> The spec has always been very clear for example that it is ok to pack a 
> float with a dvec3:
> 
>  From section 4.4.1 (Input Layout Qualifiers) of the GLSL 4.6 spec:
> 
> "layout(location=8) in dvec3 h; // components 0,1,2 and 3 of location 8
>  // and components 0 and 1 of location 9
>   layout(location=9, component=2) in float i; // okay, compts 2 and 3"

Mmmm ... I didn't notice that example before. I think it is just
incorrect or, rather, a typo. The inline comment says that the float
takes components 2 and 3. A float will count only for *1* component.
Hence, the intended type there is a double, in which case the aliasing
is allowed.

The spec is actually very clear just some paragraphs below:

From Section 4.4.1 (Input Layout Qualifiers) of the GLSL 4.60 spec:

  " Further, when location aliasing, the aliases sharing the location
must have the same underlying numerical type and bit
width (floating-point or integer, 32-bit versus 64-bit, etc.)
and the same auxiliary storage and interpolation
qualification. The one exception where component aliasing is
permitted is for two input variables (not block members) to a
vertex shader, which are allowed to have component aliasing. This
vertex-variable component aliasing is intended only to support
vertex shaders where each execution path accesses at most one
input per each aliased component. Implementations are permitted,
but not required, to generate link-time errors if they detect
that every path through the vertex shader executable accesses
multiple inputs aliased to any single component."

> Your going to need more information bug number etc to convince me this 
> change is correct.

I'll file a bug against the specs.

In the meanwhile, this is the expected behavior also in the CTS tests,
which is also confirmed with the nVIDIA blob.

> 
> > This patch amends our implementation to account for this restriction.
> > 
> > In the process of doing this, I also noticed that we would attempt
> > to check aliasing requirements for record variables (including the test
> > for the numerical type) which is not allowed, instead, we should be
> > producing a linker error as soon as we see any attempt to do location
> > aliasing on non-numerical variables. For the particular case of structs,
> > we were producing a linker error in this case, but only because we
> > assumed that struct fields use all components in each location, so
> > any attempt to alias locations consumed by struct fields would produce
> > a link error due to component aliasing, which is not accurate of the
> > actual problem. This patch would make it produce an error for attempting
> > to alias a non-numerical variable instead, which is always accurate.
> 
> None of this should be needed at all. It is an error to use 
> location/component layouts on struct members.
> 
> "It is a compile-time error to use a location qualifier on a member of a 
> structure."
> 
> "It is a compile-time error to use *component* without also specifying 
> *location*"
> 
> So depending on the component aliasing check is sufficient. If you 
> really want to add a better error message then see my comments below.
> 
> 
> > v2:
> >- Do not assert if we see invalid numerical types. These come
> >  straight from shader code, so we should produce linker errors if
> >  shaders attempt to do location aliasing on variables that are not
> >  numerical such as records.
> >- While we are at it, improve error reporting for the case of
> >  numerical type mismatch to include the shader stage.
> > 
> > v3:
> >- Allow location aliasing of images and samplers. If we get these
> >  it means bindless support is active and they should be handled
> >  as 64-bit integers (Ilia)
> >- Make sure we produce link errors for any non-numerical type
> >  for which we attempt location aliasing, not just structs.
> > 
> > v4:
> >- Rebased with minor fixes (Andres).
> >- Added fixing tag to the commit log (Andres).
> > 
> > Fixes: 13652e7516a ("glsl/linker: Fix type checks for location aliasing")
> > Cc: Ilia Mirkin 
> > Signed-off-by: Andres Gomez 
> > ---
> >   src/compiler/glsl/link_varyings.cpp | 64 +
> >   1 file changed, 46 insertions(+), 18 deletions(-)
> > 
> > 

Re: [Mesa-dev] [PATCH v4 1/6] glsl/linker: location aliasing requires types to have the same width

2019-03-20 Thread Timothy Arceri

On 2/2/19 5:05 am, Andres Gomez wrote:

From: Iago Toral Quiroga 

Regarding location aliasing requirements, the OpenGL spec says:

   "Further, when location aliasing, the aliases sharing the location
must have the same underlying numerical type  (floating-point or
integer)."

Khronos has further clarified that this also requires the underlying
types to have the same width, so we can't put a float and a double
in the same location slot for example. Future versions of the spec will
be corrected to make this clear.


The spec has always been very clear for example that it is ok to pack a 
float with a dvec3:


From section 4.4.1 (Input Layout Qualifiers) of the GLSL 4.6 spec:

"layout(location=8) in dvec3 h; // components 0,1,2 and 3 of location 8
// and components 0 and 1 of location 9
 layout(location=9, component=2) in float i; // okay, compts 2 and 3"


Your going to need more information bug number etc to convince me this 
change is correct.




This patch amends our implementation to account for this restriction.

In the process of doing this, I also noticed that we would attempt
to check aliasing requirements for record variables (including the test
for the numerical type) which is not allowed, instead, we should be
producing a linker error as soon as we see any attempt to do location
aliasing on non-numerical variables. For the particular case of structs,
we were producing a linker error in this case, but only because we
assumed that struct fields use all components in each location, so
any attempt to alias locations consumed by struct fields would produce
a link error due to component aliasing, which is not accurate of the
actual problem. This patch would make it produce an error for attempting
to alias a non-numerical variable instead, which is always accurate.


None of this should be needed at all. It is an error to use 
location/component layouts on struct members.


"It is a compile-time error to use a location qualifier on a member of a 
structure."


"It is a compile-time error to use *component* without also specifying 
*location*"


So depending on the component aliasing check is sufficient. If you 
really want to add a better error message then see my comments below.





v2:
   - Do not assert if we see invalid numerical types. These come
 straight from shader code, so we should produce linker errors if
 shaders attempt to do location aliasing on variables that are not
 numerical such as records.
   - While we are at it, improve error reporting for the case of
 numerical type mismatch to include the shader stage.

v3:
   - Allow location aliasing of images and samplers. If we get these
 it means bindless support is active and they should be handled
 as 64-bit integers (Ilia)
   - Make sure we produce link errors for any non-numerical type
 for which we attempt location aliasing, not just structs.

v4:
   - Rebased with minor fixes (Andres).
   - Added fixing tag to the commit log (Andres).

Fixes: 13652e7516a ("glsl/linker: Fix type checks for location aliasing")
Cc: Ilia Mirkin 
Signed-off-by: Andres Gomez 
---
  src/compiler/glsl/link_varyings.cpp | 64 +
  1 file changed, 46 insertions(+), 18 deletions(-)

diff --git a/src/compiler/glsl/link_varyings.cpp 
b/src/compiler/glsl/link_varyings.cpp
index 3969c0120b3..3f41832ac93 100644
--- a/src/compiler/glsl/link_varyings.cpp
+++ b/src/compiler/glsl/link_varyings.cpp
@@ -424,15 +424,15 @@ compute_variable_location_slot(ir_variable *var, 
gl_shader_stage stage)
  
  struct explicit_location_info {

 ir_variable *var;
-   unsigned numerical_type;
+   int numerical_type;
 unsigned interpolation;
 bool centroid;
 bool sample;
 bool patch;
  };
  
-static inline unsigned

-get_numerical_type(const glsl_type *type)
+static inline int
+get_numerical_sized_type(const glsl_type *type)
  {
 /* From the OpenGL 4.6 spec, section 4.4.1 Input Layout Qualifiers, Page 
68,
  * (Location aliasing):
@@ -440,10 +440,25 @@ get_numerical_type(const glsl_type *type)
  *"Further, when location aliasing, the aliases sharing the location
  * must have the same underlying numerical type  (floating-point or
  * integer)
+*
+* Khronos has further clarified that this also requires the underlying
+* types to have the same width, so we can't put a float and a double
+* in the same location slot for example. Future versions of the spec will
+* be corrected to make this clear.
+*
+* Notice that we allow location aliasing for bindless image/samplers too
+* since these are defined as 64-bit integers.
  */
-   if (type->is_float() || type->is_double())
+   if (type->is_float())
return GLSL_TYPE_FLOAT;
-   return GLSL_TYPE_INT;
+   else if (type->is_integer())
+  return GLSL_TYPE_INT;
+   else if (type->is_double())
+  return GLSL_TYPE_DOUBLE;
+   else if (type->is_integer_64()