[Mesa-dev] [PATCH] clover: Fix build against clang SVN >= r265359

2016-04-04 Thread Michel Dänzer
From: Michel Dänzer 

Signed-off-by: Michel Dänzer 
---
 src/gallium/state_trackers/clover/llvm/invocation.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp 
b/src/gallium/state_trackers/clover/llvm/invocation.cpp
index 4d11c24..3fb3596 100644
--- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
+++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
@@ -206,6 +206,9 @@ namespace {
   // http://www.llvm.org/bugs/show_bug.cgi?id=19735
   c.getDiagnosticOpts().ShowCarets = false;
   c.getInvocation().setLangDefaults(c.getLangOpts(), clang::IK_OpenCL,
+#if HAVE_LLVM >= 0x0309
+llvm::Triple(triple),
+#endif
 clang::LangStandard::lang_opencl11);
   c.createDiagnostics(
   new clang::TextDiagnosticPrinter(
-- 
2.8.0.rc3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: use has_shader_storage_buffer_objects helper

2016-04-04 Thread Iago Toral
Reviewed-by: Iago Toral Quiroga 

On Mon, 2016-04-04 at 11:55 -0400, Ilia Mirkin wrote:
> Replaces open-coded logic with existing helper.
> 
> Signed-off-by: Ilia Mirkin 
> ---
>  src/compiler/glsl/lower_ubo_reference.cpp | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/src/compiler/glsl/lower_ubo_reference.cpp 
> b/src/compiler/glsl/lower_ubo_reference.cpp
> index 3155ab6..1a0140f 100644
> --- a/src/compiler/glsl/lower_ubo_reference.cpp
> +++ b/src/compiler/glsl/lower_ubo_reference.cpp
> @@ -372,8 +372,7 @@ lower_ubo_reference_visitor::ubo_load(void *mem_ctx,
>  static bool
>  shader_storage_buffer_object(const _mesa_glsl_parse_state *state)
>  {
> -   return state->ARB_shader_storage_buffer_object_enable ||
> -  state->is_version(430, 310);
> +   return state->has_shader_storage_buffer_objects();
>  }
>  
>  uint32_t


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/4] i965/fs: Make the repclear shader support either a uniform or a flat input

2016-04-04 Thread Jordan Justen
On 2016-04-04 23:04:01, Jordan Justen wrote:
> On 2016-04-04 15:51:13, Jason Ekstrand wrote:
> > In the Vulkan driver we use a single flat input instead of a uniform
> > because setting up push constants is more disruptive to the pipeline than
> > setting up another vertex input.  This uses the number of uniforms as a key
> > to keep it working for the GL driver.
> 
> How about "This uses the number of uniforms to distinguish the uniform
> vs attribute methods."
> 
> > ---
> >  src/mesa/drivers/dri/i965/brw_fs.cpp | 25 -
> >  1 file changed, 20 insertions(+), 5 deletions(-)
> > 
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
> > b/src/mesa/drivers/dri/i965/brw_fs.cpp
> > index 1a6a229..b138c15 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> > @@ -2787,10 +2787,23 @@ fs_visitor::emit_repclear_shader()
> > brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
> > int base_mrf = 1;
> > int color_mrf = base_mrf + 2;
> > +   fs_inst *mov;
> >  
> > -   fs_inst *mov = bld.exec_all().group(4, 0)
> > - .MOV(brw_message_reg(color_mrf),
> > -  fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F));
> > +   if (uniforms > 0) {
> 
> On the vulkan branch, we have '== 1', and you do below as well. Should
> we move both to '> 0'?
> 

These were the only comments I had for the series.

Series Reviewed-by: Jordan Justen 

> 
> > +  mov = bld.exec_all().group(4, 0)
> > +   .MOV(brw_message_reg(color_mrf),
> > +fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F));
> > +   } else {
> > +  struct brw_reg reg =
> > + brw_reg(BRW_GENERAL_REGISTER_FILE,
> > + 2, 3, 0, 0, BRW_REGISTER_TYPE_F,
> > + BRW_VERTICAL_STRIDE_8,
> > + BRW_WIDTH_2,
> > + BRW_HORIZONTAL_STRIDE_4, BRW_SWIZZLE_XYZW, 
> > WRITEMASK_XYZW);
> > +
> > +  mov = bld.exec_all().group(4, 0)
> > +   .MOV(vec4(brw_message_reg(color_mrf)), fs_reg(reg));
> > +   }
> >  
> > fs_inst *write;
> > if (key->nr_color_regions == 1) {
> > @@ -2819,8 +2832,10 @@ fs_visitor::emit_repclear_shader()
> > assign_curb_setup();
> >  
> > /* Now that we have the uniform assigned, go ahead and force it to a 
> > vec4. */
> > -   assert(mov->src[0].file == FIXED_GRF);
> > -   mov->src[0] = brw_vec4_grf(mov->src[0].nr, 0);
> > +   if (uniforms == 1) {
> > +  assert(mov->src[0].file == FIXED_GRF);
> > +  mov->src[0] = brw_vec4_grf(mov->src[0].nr, 0);
> > +   }
> >  }
> >  
> >  /**
> > -- 
> > 2.5.0.400.gff86faf
> > 
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/4] i965/fs: Make the repclear shader support either a uniform or a flat input

2016-04-04 Thread Jordan Justen
On 2016-04-04 15:51:13, Jason Ekstrand wrote:
> In the Vulkan driver we use a single flat input instead of a uniform
> because setting up push constants is more disruptive to the pipeline than
> setting up another vertex input.  This uses the number of uniforms as a key
> to keep it working for the GL driver.

How about "This uses the number of uniforms to distinguish the uniform
vs attribute methods."

> ---
>  src/mesa/drivers/dri/i965/brw_fs.cpp | 25 -
>  1 file changed, 20 insertions(+), 5 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index 1a6a229..b138c15 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -2787,10 +2787,23 @@ fs_visitor::emit_repclear_shader()
> brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
> int base_mrf = 1;
> int color_mrf = base_mrf + 2;
> +   fs_inst *mov;
>  
> -   fs_inst *mov = bld.exec_all().group(4, 0)
> - .MOV(brw_message_reg(color_mrf),
> -  fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F));
> +   if (uniforms > 0) {

On the vulkan branch, we have '== 1', and you do below as well. Should
we move both to '> 0'?

-Jordan

> +  mov = bld.exec_all().group(4, 0)
> +   .MOV(brw_message_reg(color_mrf),
> +fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F));
> +   } else {
> +  struct brw_reg reg =
> + brw_reg(BRW_GENERAL_REGISTER_FILE,
> + 2, 3, 0, 0, BRW_REGISTER_TYPE_F,
> + BRW_VERTICAL_STRIDE_8,
> + BRW_WIDTH_2,
> + BRW_HORIZONTAL_STRIDE_4, BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
> +
> +  mov = bld.exec_all().group(4, 0)
> +   .MOV(vec4(brw_message_reg(color_mrf)), fs_reg(reg));
> +   }
>  
> fs_inst *write;
> if (key->nr_color_regions == 1) {
> @@ -2819,8 +2832,10 @@ fs_visitor::emit_repclear_shader()
> assign_curb_setup();
>  
> /* Now that we have the uniform assigned, go ahead and force it to a 
> vec4. */
> -   assert(mov->src[0].file == FIXED_GRF);
> -   mov->src[0] = brw_vec4_grf(mov->src[0].nr, 0);
> +   if (uniforms == 1) {
> +  assert(mov->src[0].file == FIXED_GRF);
> +  mov->src[0] = brw_vec4_grf(mov->src[0].nr, 0);
> +   }
>  }
>  
>  /**
> -- 
> 2.5.0.400.gff86faf
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium/radeon: allow multiple exports of the same texture with different usage

2016-04-04 Thread Michel Dänzer
On 01.04.2016 23:42, Marek Olšák wrote:
> The updated patch is attached.

Reviewed-by: Michel Dänzer 


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] doxygen: update glsl link

2016-04-04 Thread Rhys Kidd
On 4 April 2016 at 17:44, Emil Velikov  wrote:

> On 4 April 2016 at 21:38, Elie TOURNIER  wrote:
> > ---
> >  doxygen/glsl.doxy | 7 +++
> >  1 file changed, 3 insertions(+), 4 deletions(-)
> >
> > diff --git a/doxygen/glsl.doxy b/doxygen/glsl.doxy
> > index 9915ba2..0f4c822 100644
> > --- a/doxygen/glsl.doxy
> > +++ b/doxygen/glsl.doxy
> > @@ -9,11 +9,10 @@ PROJECT_NAME   = "Mesa GLSL module"
> >
> #---
> >  # configuration options related to the input files
> >
> #---
> > -INPUT  = ../src/glsl/
> > +INPUT  = ../src/compiler/glsl/
> > +FILE_PATTERNS  = *.c *.cpp *.h
> >  RECURSIVE  = NO
> > -EXCLUDE= ../src/glsl/glsl_lexer.cpp \
> > - ../src/glsl/glsl_parser.cpp \
> > - ../src/glsl/glsl_parser.h
> > +EXCLUDE=
> Speaking of doxygen, has anyone managed to get it in shape ? I've seen
> some efforts in https://fossies.org/dox/mesa-11.0.9/ although I'm not
> sure how much thinkering was required to get that far.
>

Emil,

I have a patchset to come which includes further clean ups and fixes to
doxygen.

Fixes some little oversights and copy-paste introduced bugs.

Regards,
Rhys


> For the patch itself - I second Matt's suggestion. Let's keep the
> generated sources out, for the time being at least.
>
> -Emil
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] i965/tiled_memcpy: Protect against wrong alignments

2016-04-04 Thread Jason Ekstrand
It's possible, when doing an x-tiled copy, to end up with a case where the
bytes parameter is equal to 16 but the pointer is not actually aligned.
This causes asserts in debug mode and segfaults in release builds due to
doing an aligned operation on an unaligned pointer.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93962
---
 src/mesa/drivers/dri/i965/intel_tiled_memcpy.c | 48 +-
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c 
b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
index 19079d0..823d8b0 100644
--- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
+++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
@@ -85,19 +85,19 @@ rgba8_copy_aligned_dst(void *dst, const void *src, size_t 
bytes)
uint8_t const *s = src;
 
 #ifdef __SSSE3__
-   if (bytes == 16) {
-  assert(!(((uintptr_t)dst) & 0xf));
-  rgba8_copy_16_aligned_dst(d+ 0, s+ 0);
-  return dst;
-   }
+   if uintptr_t)dst) & 0xf) == 0) {
+  if (bytes == 16) {
+ rgba8_copy_16_aligned_dst(d+ 0, s+ 0);
+ return dst;
+  }
 
-   if (bytes == 64) {
-  assert(!(((uintptr_t)dst) & 0xf));
-  rgba8_copy_16_aligned_dst(d+ 0, s+ 0);
-  rgba8_copy_16_aligned_dst(d+16, s+16);
-  rgba8_copy_16_aligned_dst(d+32, s+32);
-  rgba8_copy_16_aligned_dst(d+48, s+48);
-  return dst;
+  if (bytes == 64) {
+ rgba8_copy_16_aligned_dst(d+ 0, s+ 0);
+ rgba8_copy_16_aligned_dst(d+16, s+16);
+ rgba8_copy_16_aligned_dst(d+32, s+32);
+ rgba8_copy_16_aligned_dst(d+48, s+48);
+ return dst;
+  }
}
 #endif
 
@@ -123,19 +123,19 @@ rgba8_copy_aligned_src(void *dst, const void *src, size_t 
bytes)
uint8_t const *s = src;
 
 #ifdef __SSSE3__
-   if (bytes == 16) {
-  assert(!(((uintptr_t)src) & 0xf));
-  rgba8_copy_16_aligned_src(d+ 0, s+ 0);
-  return dst;
-   }
+   if uintptr_t)src) & 0xf) == 0) {
+  if (bytes == 16) {
+ rgba8_copy_16_aligned_src(d+ 0, s+ 0);
+ return dst;
+  }
 
-   if (bytes == 64) {
-  assert(!(((uintptr_t)src) & 0xf));
-  rgba8_copy_16_aligned_src(d+ 0, s+ 0);
-  rgba8_copy_16_aligned_src(d+16, s+16);
-  rgba8_copy_16_aligned_src(d+32, s+32);
-  rgba8_copy_16_aligned_src(d+48, s+48);
-  return dst;
+  if (bytes == 64) {
+ rgba8_copy_16_aligned_src(d+ 0, s+ 0);
+ rgba8_copy_16_aligned_src(d+16, s+16);
+ rgba8_copy_16_aligned_src(d+32, s+32);
+ rgba8_copy_16_aligned_src(d+48, s+48);
+ return dst;
+  }
}
 #endif
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] i965/tiled_memcpy: Add alignment assumption decorations

2016-04-04 Thread Jason Ekstrand
This should help GCC when inlining memcpy to be able to better choose an
aligned version.  It should also fix potential performance issues in the
next commit.
---
 src/mesa/drivers/dri/i965/intel_tiled_memcpy.c | 30 +++---
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c 
b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
index 3135458..19079d0 100644
--- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
+++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
@@ -56,6 +56,8 @@ static const uint32_t ytile_width = 128;
 static const uint32_t ytile_height = 32;
 static const uint32_t ytile_span = 16;
 
+#define assume_aligned(x, n) __builtin_assume_aligned(x, n)
+
 #ifdef __SSSE3__
 static const uint8_t rgba8_permutation[16] =
{ 2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15 };
@@ -200,10 +202,12 @@ linear_to_xtiled(uint32_t x0, uint32_t x1, uint32_t x2, 
uint32_t x3,
   mem_copy(dst + ((x0 + yo) ^ swizzle), src + x0, x1 - x0);
 
   for (xo = x1; xo < x2; xo += xtile_span) {
- mem_copy(dst + ((xo + yo) ^ swizzle), src + xo, xtile_span);
+ mem_copy(assume_aligned(dst + ((xo + yo) ^ swizzle), xtile_span),
+  src + xo, xtile_span);
   }
 
-  mem_copy(dst + ((xo + yo) ^ swizzle), src + x2, x3 - x2);
+  mem_copy(assume_aligned(dst + ((xo + yo) ^ swizzle), xtile_span),
+   src + x2, x3 - x2);
 
   src += src_pitch;
}
@@ -259,12 +263,14 @@ linear_to_ytiled(uint32_t x0, uint32_t x1, uint32_t x2, 
uint32_t x3,
* at each step so we don't need to calculate it explicitly.
*/
   for (x = x1; x < x2; x += ytile_span) {
- mem_copy(dst + ((xo + yo) ^ swizzle), src + x, ytile_span);
+ mem_copy(assume_aligned(dst + ((xo + yo) ^ swizzle), ytile_span),
+  src + x, ytile_span);
  xo += bytes_per_column;
  swizzle ^= swizzle_bit;
   }
 
-  mem_copy(dst + ((xo + yo) ^ swizzle), src + x2, x3 - x2);
+  mem_copy(assume_aligned(dst + ((xo + yo) ^ swizzle), ytile_span),
+   src + x2, x3 - x2);
 
   src += src_pitch;
}
@@ -302,10 +308,14 @@ xtiled_to_linear(uint32_t x0, uint32_t x1, uint32_t x2, 
uint32_t x3,
   mem_copy(dst + x0, src + ((x0 + yo) ^ swizzle), x1 - x0);
 
   for (xo = x1; xo < x2; xo += xtile_span) {
- mem_copy(dst + xo, src + ((xo + yo) ^ swizzle), xtile_span);
+ mem_copy(dst + xo,
+  assume_aligned(src + ((xo + yo) ^ swizzle), xtile_span),
+  xtile_span);
   }
 
-  mem_copy(dst + x2, src + ((xo + yo) ^ swizzle), x3 - x2);
+  mem_copy(dst + x2,
+   assume_aligned(src + ((xo + yo) ^ swizzle), xtile_span),
+   x3 - x2);
 
   dst += dst_pitch;
}
@@ -361,12 +371,16 @@ ytiled_to_linear(uint32_t x0, uint32_t x1, uint32_t x2, 
uint32_t x3,
* at each step so we don't need to calculate it explicitly.
*/
   for (x = x1; x < x2; x += ytile_span) {
- mem_copy(dst + x, src + ((xo + yo) ^ swizzle), ytile_span);
+ mem_copy(dst + x,
+  assume_aligned(src + ((xo + yo) ^ swizzle), ytile_span),
+  ytile_span);
  xo += bytes_per_column;
  swizzle ^= swizzle_bit;
   }
 
-  mem_copy(dst + x2, src + ((xo + yo) ^ swizzle), x3 - x2);
+  mem_copy(dst + x2,
+   assume_aligned(src + ((xo + yo) ^ swizzle), ytile_span),
+   x3 - x2);
 
   dst += dst_pitch;
}
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3] egl: add EGL_KHR_reusable_sync to egl_dri

2016-04-04 Thread Dongwon Kim
This patch enables an EGL extension, EGL_KHR_reusable_sync.
This new extension basically provides a way for multiple APIs or
threads to be excuted synchronously via a "reusable sync"
primitive shared by those threads/API calls.

This was implemented based on the specification at

https://www.khronos.org/registry/egl/extensions/KHR/EGL_KHR_reusable_sync.txt

v2
- use thread functions defined in C11/threads.h instead of
  using direct pthread calls
- make the timeout set with reference to CLOCK_MONOTONIC
- cleaned up the way expiration time is calculated
- (bug fix) in dri2_client_wait_sync, case EGL_SYNC_CL_EVENT_KHR
  has been added.
- (bug fix) in dri2_destroy_sync, return from cond_broadcast
  call is now stored in 'err' intead of 'ret' to prevent 'ret'
  from being reset to 'EGL_FALSE' even in successful case
- corrected minor syntax problems

v3
- dri2_egl_unref_sync now became 'void' type. No more error check
  is needed for this function call as a result.
- (bug fix) resolved issue with duplicated unlocking of display in
  eglClientWaitSync when type of sync is "EGL_KHR_REUSABLE_SYNC"

Signed-off-by: Dongwon Kim 
---
 src/egl/drivers/dri2/egl_dri2.c | 192 ++--
 src/egl/drivers/dri2/egl_dri2.h |   2 +
 src/egl/main/eglapi.c   |  17 +++-
 src/egl/main/eglsync.c  |   3 +-
 4 files changed, 206 insertions(+), 8 deletions(-)

diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index 8f50f0c..490b040 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -38,6 +38,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #ifdef HAVE_LIBDRM
 #include 
 #include 
@@ -623,6 +625,8 @@ dri2_setup_screen(_EGLDisplay *disp)
  disp->Extensions.KHR_cl_event2 = EGL_TRUE;
}
 
+   disp->Extensions.KHR_reusable_sync = EGL_TRUE;
+
if (dri2_dpy->image) {
   if (dri2_dpy->image->base.version >= 10 &&
   dri2_dpy->image->getCapabilities != NULL) {
@@ -2394,7 +2398,12 @@ dri2_egl_unref_sync(struct dri2_egl_display *dri2_dpy,
 struct dri2_egl_sync *dri2_sync)
 {
if (p_atomic_dec_zero(&dri2_sync->refcount)) {
-  dri2_dpy->fence->destroy_fence(dri2_dpy->dri_screen, dri2_sync->fence);
+  if (dri2_sync->base.Type == EGL_SYNC_REUSABLE_KHR)
+ cnd_destroy(&dri2_sync->cond);
+
+  if (dri2_sync->fence)
+ dri2_dpy->fence->destroy_fence(dri2_dpy->dri_screen, 
dri2_sync->fence);
+
   free(dri2_sync);
}
 }
@@ -2408,6 +2417,8 @@ dri2_create_sync(_EGLDriver *drv, _EGLDisplay *dpy,
struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy);
struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx);
struct dri2_egl_sync *dri2_sync;
+   EGLint ret;
+   pthread_condattr_t attr;
 
dri2_sync = calloc(1, sizeof(struct dri2_egl_sync));
if (!dri2_sync) {
@@ -2450,6 +2461,37 @@ dri2_create_sync(_EGLDriver *drv, _EGLDisplay *dpy,
 dri2_sync->fence, 0, 0))
  dri2_sync->base.SyncStatus = EGL_SIGNALED_KHR;
   break;
+
+   case EGL_SYNC_REUSABLE_KHR:
+  /* intialize attr */
+  ret = pthread_condattr_init(&attr);
+
+  if (ret) {
+ _eglError(EGL_BAD_ACCESS, "eglCreateSyncKHR");
+ free(dri2_sync);
+ return NULL;
+  }
+
+  /* change clock attribute to CLOCK_MONOTONIC */
+  ret = pthread_condattr_setclock(&attr, CLOCK_MONOTONIC);
+
+  if (ret) {
+ _eglError(EGL_BAD_ACCESS, "eglCreateSyncKHR");
+ free(dri2_sync);
+ return NULL;
+  }
+
+  ret = pthread_cond_init(&dri2_sync->cond, &attr);
+
+  if (ret) {
+ _eglError(EGL_BAD_ACCESS, "eglCreateSyncKHR");
+ free(dri2_sync);
+ return NULL;
+  }
+
+  /* initial status of reusable sync must be "unsignaled" */
+  dri2_sync->base.SyncStatus = EGL_UNSIGNALED_KHR;
+  break;
}
 
p_atomic_set(&dri2_sync->refcount, 1);
@@ -2461,9 +2503,27 @@ dri2_destroy_sync(_EGLDriver *drv, _EGLDisplay *dpy, 
_EGLSync *sync)
 {
struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy);
struct dri2_egl_sync *dri2_sync = dri2_egl_sync(sync);
+   EGLint ret = EGL_TRUE;
+   EGLint err;
+
+   /* if type of sync is EGL_SYNC_REUSABLE_KHR and it is not signaled yet,
+* then unlock all threads possibly blocked by the reusable sync before
+* destroying it.
+*/
+   if (dri2_sync->base.Type == EGL_SYNC_REUSABLE_KHR &&
+   dri2_sync->base.SyncStatus == EGL_UNSIGNALED_KHR) {
+  dri2_sync->base.SyncStatus = EGL_SIGNALED_KHR;
+  /* unblock all threads currently blocked by sync */
+  err = cnd_broadcast(&dri2_sync->cond);
 
+  if (err) {
+ _eglError(EGL_BAD_ACCESS, "eglDestroySyncKHR");
+ ret = EGL_FALSE;
+  }
+   }
dri2_egl_unref_sync(dri2_dpy, dri2_sync);
-   return EGL_TRUE;
+
+   return ret;
 }
 
 static EGLint
@@ -2471,10 +2531,16 @@ dri2_client_wait_sync(_EGLDriver *drv, _EGLDisplay 

Re: [Mesa-dev] [PATCH 17/20] radeonsi: do not do two full flushes on every compute dispatch

2016-04-04 Thread Marek Olšák
On Tue, Apr 5, 2016 at 1:30 AM, Bas Nieuwenhuizen
 wrote:
> On Tue, Apr 5, 2016 at 1:18 AM, Marek Olšák  wrote:
>> On Sat, Apr 2, 2016 at 3:11 PM, Bas Nieuwenhuizen
>>  wrote:
>>> Signed-off-by: Bas Nieuwenhuizen 
>>> ---
>>>  src/gallium/drivers/radeonsi/si_compute.c | 17 ++---
>>>  src/gallium/drivers/radeonsi/si_state.c   |  6 --
>>>  2 files changed, 6 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
>>> b/src/gallium/drivers/radeonsi/si_compute.c
>>> index 6549396..8fe4fe7 100644
>>> --- a/src/gallium/drivers/radeonsi/si_compute.c
>>> +++ b/src/gallium/drivers/radeonsi/si_compute.c
>>> @@ -433,13 +433,8 @@ static void si_launch_grid(
>>> if (!sctx->cs_shader_state.initialized)
>>> si_initialize_compute(sctx);
>>>
>>> -   sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
>>> -SI_CONTEXT_INV_GLOBAL_L2 |
>>> -SI_CONTEXT_INV_ICACHE |
>>> -SI_CONTEXT_INV_SMEM_L1 |
>>> -SI_CONTEXT_FLUSH_WITH_INV_L2 |
>>> -SI_CONTEXT_FLAG_COMPUTE;
>>> -   si_emit_cache_flush(sctx, NULL);
>>> +   if(sctx->b.flags)
>>
>> "if (" please
>>
>>> +   si_emit_cache_flush(sctx, NULL);
>>>
>>> if (!si_switch_compute_shader(sctx, program, &program->shader, 
>>> info->pc))
>>> return;
>>> @@ -466,14 +461,6 @@ static void si_launch_grid(
>>> si_setup_tgsi_grid(sctx, info);
>>>
>>> si_emit_dispatch_packets(sctx, info);
>>> -
>>> -   sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |
>>> -SI_CONTEXT_INV_VMEM_L1 |
>>> -SI_CONTEXT_INV_GLOBAL_L2 |
>>> -SI_CONTEXT_INV_ICACHE |
>>> -SI_CONTEXT_INV_SMEM_L1 |
>>> -SI_CONTEXT_FLAG_COMPUTE;
>>> -   si_emit_cache_flush(sctx, NULL);
>>>  }
>>>
>>>
>>> diff --git a/src/gallium/drivers/radeonsi/si_state.c 
>>> b/src/gallium/drivers/radeonsi/si_state.c
>>> index 10d691a..95d2589 100644
>>> --- a/src/gallium/drivers/radeonsi/si_state.c
>>> +++ b/src/gallium/drivers/radeonsi/si_state.c
>>> @@ -3533,7 +3533,8 @@ static void si_memory_barrier(struct pipe_context 
>>> *ctx, unsigned flags)
>>>
>>> /* Subsequent commands must wait for all shader invocations to
>>>  * complete. */
>>> -   sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
>>> +   sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
>>> +SI_CONTEXT_CS_PARTIAL_FLUSH;
>>>
>>> if (flags & PIPE_BARRIER_CONSTANT_BUFFER)
>>> sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 |
>>> @@ -3543,7 +3544,8 @@ static void si_memory_barrier(struct pipe_context 
>>> *ctx, unsigned flags)
>>>  PIPE_BARRIER_SHADER_BUFFER |
>>>  PIPE_BARRIER_TEXTURE |
>>>  PIPE_BARRIER_IMAGE |
>>> -PIPE_BARRIER_STREAMOUT_BUFFER)) {
>>> +PIPE_BARRIER_STREAMOUT_BUFFER |
>>> +PIPE_BARRIER_GLOBAL_BUFFER)) {
>>> /* As far as I can tell, L1 contents are written back to L2
>>>  * automatically at end of shader, but the contents of other
>>>  * L1 caches might still be stale. */
>>
>> Do we have a test that writes to a buffer via a PS and then read from
>> it via a CS? And the same with a CS followed by a VS.
>>
>> I'm not sure if the hardware can execute compute and draw jobs in
>> parallel within one IB. (it can certainly do that with draw calls)
>
> Nothing in piglit as far as I know. I tested with a local test with
> two compute dispatches that not using CS_PARTIAL_FLUSH the second
> dispatch missed writes from the first and using CS_PARTIAL_FLUSH that
> synchronization was fixed.
>
> So I think the hardware is able to execute multiple compute dispatches
> at the same time in a single IB and that this add proper
> synchronization. I have not tried any compute<->graphics interaction.

Thanks. It looks like we do need partial flushes between dependent
draw and dispatch calls. We also need piglit tests for this. Streamout
does VS_PARTIAL_FLUSH so that written buffers are immediately visible
by the next draw call. If there are no dependencies, no partial
flushes are needed. If we can't track the dependencies, we should do a
partial flush always. Partial flushes just wait until the given shader
stage is idle. Without those, the hardware launches draw and dispatch
calls without waiting for previous ones.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: use has_shader_storage_buffer_objects helper

2016-04-04 Thread Timothy Arceri
On Mon, 2016-04-04 at 11:55 -0400, Ilia Mirkin wrote:
> Replaces open-coded logic with existing helper.
> 
> Signed-off-by: Ilia Mirkin 

Reviewed-by: Timothy Arceri 

> ---
>  src/compiler/glsl/lower_ubo_reference.cpp | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/src/compiler/glsl/lower_ubo_reference.cpp
> b/src/compiler/glsl/lower_ubo_reference.cpp
> index 3155ab6..1a0140f 100644
> --- a/src/compiler/glsl/lower_ubo_reference.cpp
> +++ b/src/compiler/glsl/lower_ubo_reference.cpp
> @@ -372,8 +372,7 @@ lower_ubo_reference_visitor::ubo_load(void
> *mem_ctx,
>  static bool
>  shader_storage_buffer_object(const _mesa_glsl_parse_state *state)
>  {
> -   return state->ARB_shader_storage_buffer_object_enable ||
> -  state->is_version(430, 310);
> +   return state->has_shader_storage_buffer_objects();
>  }
>  
>  uint32_t
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] dri/i965: fix incorrect rgbFormat in intelCreateBuffer().

2016-04-04 Thread Haixia Shi
It is incorrect to assume that pixel format is always in BGR byte order.
We need to check bitmask parameters (such as |redMask|) to determine whether
the RGB or BGR byte order is requested.

Furthermore when parameter |sRGBCapable| is set to false, we should be using
UNORM format by default.

Signed-off-by: Haixia Shi 
Reviewed-by: Stéphane Marchesin 
Cc: kenneth.w.grau...@intel.com

Change-Id: Ib75087aef1fbfb51baa72517207fed410dcd7b1e
---
 src/mesa/drivers/dri/i965/intel_screen.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index c6eb50a..a5ec3bf 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -1001,14 +1001,14 @@ intelCreateBuffer(__DRIscreen * driScrnPriv,
}
 
if (mesaVis->redBits == 5)
-  rgbFormat = MESA_FORMAT_B5G6R5_UNORM;
+  rgbFormat = mesaVis->redMask == 0x1f ? MESA_FORMAT_R5G6B5_UNORM : 
MESA_FORMAT_B5G6R5_UNORM;
else if (mesaVis->sRGBCapable)
-  rgbFormat = MESA_FORMAT_B8G8R8A8_SRGB;
+  rgbFormat = mesaVis->redMask == 0xff ? MESA_FORMAT_R8G8B8A8_SRGB : 
MESA_FORMAT_B8G8R8A8_SRGB;
else if (mesaVis->alphaBits == 0)
-  rgbFormat = MESA_FORMAT_B8G8R8X8_UNORM;
+  rgbFormat = mesaVis->redMask == 0xff ? MESA_FORMAT_R8G8B8X8_UNORM : 
MESA_FORMAT_B8G8R8X8_UNORM;
else {
-  rgbFormat = MESA_FORMAT_B8G8R8A8_SRGB;
-  fb->Visual.sRGBCapable = true;
+  rgbFormat = mesaVis->redMask == 0xff ? MESA_FORMAT_R8G8B8A8_UNORM : 
MESA_FORMAT_B8G8R8A8_UNORM;
+  fb->Visual.sRGBCapable = false;
}
 
/* setup the hardware-based renderbuffers */
-- 
2.8.0.rc3.226.g39d4020

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 07/12] glsl: Add arb_cull_distance support

2016-04-04 Thread Timothy Arceri
On Mon, 2016-04-04 at 15:07 +0200, Tobias Klausmann wrote:
> 
> On 04.04.2016 04:48, Timothy Arceri wrote:
> > 
> > On Mon, 2016-04-04 at 12:15 +1000, Dave Airlie wrote:
> > > 
> > > From: Tobias Klausmann 
> > > 
> > > Signed-off-by: Tobias Klausmann  > > m.de
> > > ---
> > >   src/compiler/glsl/ast_to_hir.cpp |  14 +++
> > >   src/compiler/glsl/builtin_variables.cpp  |  11 ++-
> > >   src/compiler/glsl/glcpp/glcpp-parse.y|   3 +
> > >   src/compiler/glsl/glsl_parser_extras.cpp |   1 +
> > >   src/compiler/glsl/glsl_parser_extras.h   |   2 +
> > >   src/compiler/glsl/link_varyings.cpp  |  10 +++
> > >   src/compiler/glsl/link_varyings.h|   1 +
> > >   src/compiler/glsl/linker.cpp | 122
> > > +--
> > >   src/compiler/glsl/standalone_scaffolding.cpp |   1 +
> > >   src/compiler/glsl/tests/varyings_test.cpp|  27 ++
> > >   src/compiler/shader_enums.h  |   4 +
> > >   11 files changed, 169 insertions(+), 27 deletions(-)
> > > 
> > > diff --git a/src/compiler/glsl/ast_to_hir.cpp
> > > b/src/compiler/glsl/ast_to_hir.cpp
> > > index 7c9be81..47db841 100644
> > > --- a/src/compiler/glsl/ast_to_hir.cpp
> > > +++ b/src/compiler/glsl/ast_to_hir.cpp
> > > @@ -1210,6 +1210,20 @@ check_builtin_array_max_size(const char
> > > *name,
> > > unsigned size,
> > > _mesa_glsl_error(&loc, state, "`gl_ClipDistance' array
> > > size
> > > cannot "
> > >  "be larger than gl_MaxClipDistances
> > > (%u)",
> > >  state->Const.MaxClipPlanes);
> > > +   } else if (strcmp("gl_CullDistance", name) == 0
> > > +  && size > state->Const.MaxClipPlanes) {
> > > +  /* From the ARB_cull_distance spec:
> > > +   *
> > > +   *   "The gl_CullDistance array is predeclared as unsized
> > > and
> > > +   *must be sized by the shader either redeclaring it
> > > with
> > > +   *a size or indexing it only with integral constant
> > > +   *expressions. The size determines the number and set
> > > of
> > > +   *enabled cull distances and can be at most
> > > +   *gl_MaxCullDistances."
> > > +   */
> > > +  _mesa_glsl_error(&loc, state, "`gl_CullDistance' array
> > > size
> > > cannot "
> > > +   "be larger than gl_MaxCullDistances
> > > (%u)",
> > > +   state->Const.MaxClipPlanes);
> > >  }
> > >   }
> > >   
> > > diff --git a/src/compiler/glsl/builtin_variables.cpp
> > > b/src/compiler/glsl/builtin_variables.cpp
> > > index f31f9f6..8d049c8 100644
> > > --- a/src/compiler/glsl/builtin_variables.cpp
> > > +++ b/src/compiler/glsl/builtin_variables.cpp
> > > @@ -302,7 +302,7 @@ public:
> > >  const glsl_type *construct_interface_instance() const;
> > >   
> > >   private:
> > > -   glsl_struct_field fields[10];
> > > +   glsl_struct_field fields[11];
> > >  unsigned num_fields;
> > >   };
> > >   
> > > @@ -675,6 +675,11 @@
> > > builtin_variable_generator::generate_constants()
> > > add_const("gl_MaxClipDistances", state-
> > > >Const.MaxClipPlanes);
> > > add_const("gl_MaxVaryingComponents", state->ctx-
> > > > 
> > > > Const.MaxVarying * 4);
> > >  }
> > > +   if (state->is_version(450, 0) || state-
> > > >ARB_cull_distance_enable)
> > > {
> > > +  add_const("gl_MaxCullDistances", state-
> > > >Const.MaxClipPlanes);
> > > +  add_const("gl_MaxCombinedClipAndCullDistances",
> > > +state->Const.MaxClipPlanes);
> > > +   }
> > >   
> > >  if (state->has_geometry_shader()) {
> > > add_const("gl_MaxVertexOutputComponents",
> > > @@ -1246,6 +1251,10 @@
> > > builtin_variable_generator::generate_varyings()
> > >  add_varying(VARYING_SLOT_CLIP_DIST0, array(float_t, 0),
> > >  "gl_ClipDistance");
> > >  }
> > > +   if (state->is_version(450, 0) || state-
> > > >ARB_cull_distance_enable)
> > > {
> > > +  add_varying(VARYING_SLOT_CULL_DIST0, array(float_t, 0),
> > > +   "gl_CullDistance");
> > > +   }
> > >   
> > >  if (compatibility) {
> > > add_varying(VARYING_SLOT_TEX0, array(vec4_t, 0),
> > > "gl_TexCoord");
> > > diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y
> > > b/src/compiler/glsl/glcpp/glcpp-parse.y
> > > index a48266c..e44f074 100644
> > > --- a/src/compiler/glsl/glcpp/glcpp-parse.y
> > > +++ b/src/compiler/glsl/glcpp/glcpp-parse.y
> > > @@ -2457,6 +2457,9 @@
> > > _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser,
> > > intmax_t versio
> > >   
> > >    if (extensions->ARB_shader_draw_parameters)
> > >   add_builtin_define(parser,
> > > "GL_ARB_shader_draw_parameters", 1);
> > > +
> > > + if (extensions->ARB_cull_distance)
> > > +add_builtin_define(parser, "GL_ARB_cull_distance",
> > > 1);
> > > }
> > >  }
> > >   
> > > diff --git a/src/compiler/glsl/glsl_parser_extras.cpp

Re: [Mesa-dev] [PATCH 17/20] radeonsi: do not do two full flushes on every compute dispatch

2016-04-04 Thread Bas Nieuwenhuizen
On Tue, Apr 5, 2016 at 1:18 AM, Marek Olšák  wrote:
> On Sat, Apr 2, 2016 at 3:11 PM, Bas Nieuwenhuizen
>  wrote:
>> Signed-off-by: Bas Nieuwenhuizen 
>> ---
>>  src/gallium/drivers/radeonsi/si_compute.c | 17 ++---
>>  src/gallium/drivers/radeonsi/si_state.c   |  6 --
>>  2 files changed, 6 insertions(+), 17 deletions(-)
>>
>> diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
>> b/src/gallium/drivers/radeonsi/si_compute.c
>> index 6549396..8fe4fe7 100644
>> --- a/src/gallium/drivers/radeonsi/si_compute.c
>> +++ b/src/gallium/drivers/radeonsi/si_compute.c
>> @@ -433,13 +433,8 @@ static void si_launch_grid(
>> if (!sctx->cs_shader_state.initialized)
>> si_initialize_compute(sctx);
>>
>> -   sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
>> -SI_CONTEXT_INV_GLOBAL_L2 |
>> -SI_CONTEXT_INV_ICACHE |
>> -SI_CONTEXT_INV_SMEM_L1 |
>> -SI_CONTEXT_FLUSH_WITH_INV_L2 |
>> -SI_CONTEXT_FLAG_COMPUTE;
>> -   si_emit_cache_flush(sctx, NULL);
>> +   if(sctx->b.flags)
>
> "if (" please
>
>> +   si_emit_cache_flush(sctx, NULL);
>>
>> if (!si_switch_compute_shader(sctx, program, &program->shader, 
>> info->pc))
>> return;
>> @@ -466,14 +461,6 @@ static void si_launch_grid(
>> si_setup_tgsi_grid(sctx, info);
>>
>> si_emit_dispatch_packets(sctx, info);
>> -
>> -   sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |
>> -SI_CONTEXT_INV_VMEM_L1 |
>> -SI_CONTEXT_INV_GLOBAL_L2 |
>> -SI_CONTEXT_INV_ICACHE |
>> -SI_CONTEXT_INV_SMEM_L1 |
>> -SI_CONTEXT_FLAG_COMPUTE;
>> -   si_emit_cache_flush(sctx, NULL);
>>  }
>>
>>
>> diff --git a/src/gallium/drivers/radeonsi/si_state.c 
>> b/src/gallium/drivers/radeonsi/si_state.c
>> index 10d691a..95d2589 100644
>> --- a/src/gallium/drivers/radeonsi/si_state.c
>> +++ b/src/gallium/drivers/radeonsi/si_state.c
>> @@ -3533,7 +3533,8 @@ static void si_memory_barrier(struct pipe_context 
>> *ctx, unsigned flags)
>>
>> /* Subsequent commands must wait for all shader invocations to
>>  * complete. */
>> -   sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
>> +   sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
>> +SI_CONTEXT_CS_PARTIAL_FLUSH;
>>
>> if (flags & PIPE_BARRIER_CONSTANT_BUFFER)
>> sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 |
>> @@ -3543,7 +3544,8 @@ static void si_memory_barrier(struct pipe_context 
>> *ctx, unsigned flags)
>>  PIPE_BARRIER_SHADER_BUFFER |
>>  PIPE_BARRIER_TEXTURE |
>>  PIPE_BARRIER_IMAGE |
>> -PIPE_BARRIER_STREAMOUT_BUFFER)) {
>> +PIPE_BARRIER_STREAMOUT_BUFFER |
>> +PIPE_BARRIER_GLOBAL_BUFFER)) {
>> /* As far as I can tell, L1 contents are written back to L2
>>  * automatically at end of shader, but the contents of other
>>  * L1 caches might still be stale. */
>
> Do we have a test that writes to a buffer via a PS and then read from
> it via a CS? And the same with a CS followed by a VS.
>
> I'm not sure if the hardware can execute compute and draw jobs in
> parallel within one IB. (it can certainly do that with draw calls)

Nothing in piglit as far as I know. I tested with a local test with
two compute dispatches that not using CS_PARTIAL_FLUSH the second
dispatch missed writes from the first and using CS_PARTIAL_FLUSH that
synchronization was fixed.

So I think the hardware is able to execute multiple compute dispatches
at the same time in a single IB and that this add proper
synchronization. I have not tried any compute<->graphics interaction.

- Bas
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 17/20] radeonsi: do not do two full flushes on every compute dispatch

2016-04-04 Thread Marek Olšák
On Sat, Apr 2, 2016 at 3:11 PM, Bas Nieuwenhuizen
 wrote:
> Signed-off-by: Bas Nieuwenhuizen 
> ---
>  src/gallium/drivers/radeonsi/si_compute.c | 17 ++---
>  src/gallium/drivers/radeonsi/si_state.c   |  6 --
>  2 files changed, 6 insertions(+), 17 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
> b/src/gallium/drivers/radeonsi/si_compute.c
> index 6549396..8fe4fe7 100644
> --- a/src/gallium/drivers/radeonsi/si_compute.c
> +++ b/src/gallium/drivers/radeonsi/si_compute.c
> @@ -433,13 +433,8 @@ static void si_launch_grid(
> if (!sctx->cs_shader_state.initialized)
> si_initialize_compute(sctx);
>
> -   sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
> -SI_CONTEXT_INV_GLOBAL_L2 |
> -SI_CONTEXT_INV_ICACHE |
> -SI_CONTEXT_INV_SMEM_L1 |
> -SI_CONTEXT_FLUSH_WITH_INV_L2 |
> -SI_CONTEXT_FLAG_COMPUTE;
> -   si_emit_cache_flush(sctx, NULL);
> +   if(sctx->b.flags)

"if (" please

> +   si_emit_cache_flush(sctx, NULL);
>
> if (!si_switch_compute_shader(sctx, program, &program->shader, 
> info->pc))
> return;
> @@ -466,14 +461,6 @@ static void si_launch_grid(
> si_setup_tgsi_grid(sctx, info);
>
> si_emit_dispatch_packets(sctx, info);
> -
> -   sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |
> -SI_CONTEXT_INV_VMEM_L1 |
> -SI_CONTEXT_INV_GLOBAL_L2 |
> -SI_CONTEXT_INV_ICACHE |
> -SI_CONTEXT_INV_SMEM_L1 |
> -SI_CONTEXT_FLAG_COMPUTE;
> -   si_emit_cache_flush(sctx, NULL);
>  }
>
>
> diff --git a/src/gallium/drivers/radeonsi/si_state.c 
> b/src/gallium/drivers/radeonsi/si_state.c
> index 10d691a..95d2589 100644
> --- a/src/gallium/drivers/radeonsi/si_state.c
> +++ b/src/gallium/drivers/radeonsi/si_state.c
> @@ -3533,7 +3533,8 @@ static void si_memory_barrier(struct pipe_context *ctx, 
> unsigned flags)
>
> /* Subsequent commands must wait for all shader invocations to
>  * complete. */
> -   sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
> +   sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
> +SI_CONTEXT_CS_PARTIAL_FLUSH;
>
> if (flags & PIPE_BARRIER_CONSTANT_BUFFER)
> sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 |
> @@ -3543,7 +3544,8 @@ static void si_memory_barrier(struct pipe_context *ctx, 
> unsigned flags)
>  PIPE_BARRIER_SHADER_BUFFER |
>  PIPE_BARRIER_TEXTURE |
>  PIPE_BARRIER_IMAGE |
> -PIPE_BARRIER_STREAMOUT_BUFFER)) {
> +PIPE_BARRIER_STREAMOUT_BUFFER |
> +PIPE_BARRIER_GLOBAL_BUFFER)) {
> /* As far as I can tell, L1 contents are written back to L2
>  * automatically at end of shader, but the contents of other
>  * L1 caches might still be stale. */

Do we have a test that writes to a buffer via a PS and then read from
it via a CS? And the same with a CS followed by a VS.

I'm not sure if the hardware can execute compute and draw jobs in
parallel within one IB. (it can certainly do that with draw calls)

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/4] i965/fs_surface_builder: Mask signed integers after conversion

2016-04-04 Thread Francisco Jerez
Jason Ekstrand  writes:

> ---
>  src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp | 18 ++
>  1 file changed, 18 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
> index 75734d2..4adffdd 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
> @@ -717,6 +717,15 @@ namespace {
>bld.emit_minmax(offset(dst, bld, c), offset(dst, bld, c),
>brw_imm_d(-(int)scale(widths[c] - s) - 1),
>BRW_CONDITIONAL_GE);
> +
> +   /* Mask off all but the bits we actually want.  Otherwise, if
> +* we pass a negative number into the hardware when it's
> +* expecting something like UINT8, it will happily clamp it to
> +* +255 for us.
> +*/
> +   if (is_signed && widths[c] < 32)
> +  bld.AND(offset(dst, bld, c), offset(dst, bld, c),
> +  brw_imm_d((1 << widths[c]) - 1));
>  }
>   }
>  
> @@ -787,6 +796,15 @@ namespace {
> /* Convert to integer. */
> bld.RNDE(offset(fdst, bld, c), offset(fdst, bld, c));
> bld.MOV(offset(dst, bld, c), offset(fdst, bld, c));
> +
> +   /* Mask off all but the bits we actually want.  Otherwise, if
> +* we pass a negative number into the hardware when it's
> +* expecting something like UINT8, it will happily clamp it to
> +* +255 for us.
> +*/
> +   if (is_signed && widths[c] < 32)
> +  bld.AND(offset(dst, bld, c), offset(dst, bld, c),
> +  brw_imm_d((1 << widths[c]) - 1));

Can you use the scale() helper to compute the bitmask like the code
above does?  Otherwise looks good:

Reviewed-by: Francisco Jerez 

Thanks!

>  }
>   }
>  
> -- 
> 2.5.0.400.gff86faf
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/4] brw/device_info: Add a helper for getting a device name

2016-04-04 Thread Jason Ekstrand
This is needed by the Vulkan driver
---
 src/mesa/drivers/dri/i965/brw_device_info.c | 12 
 src/mesa/drivers/dri/i965/brw_device_info.h |  1 +
 2 files changed, 13 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c 
b/src/mesa/drivers/dri/i965/brw_device_info.c
index c703fb5..3666190 100644
--- a/src/mesa/drivers/dri/i965/brw_device_info.c
+++ b/src/mesa/drivers/dri/i965/brw_device_info.c
@@ -482,3 +482,15 @@ brw_get_device_info(int devid)
 
return devinfo;
 }
+
+const char *
+brw_get_device_name(int devid)
+{
+   switch (devid) {
+#undef CHIPSET
+#define CHIPSET(id, family, name) case id: return name;
+#include "pci_ids/i965_pci_ids.h"
+   default:
+  return NULL;
+   }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_device_info.h 
b/src/mesa/drivers/dri/i965/brw_device_info.h
index c641ffc..4e7f313 100644
--- a/src/mesa/drivers/dri/i965/brw_device_info.h
+++ b/src/mesa/drivers/dri/i965/brw_device_info.h
@@ -144,3 +144,4 @@ struct brw_device_info
 };
 
 const struct brw_device_info *brw_get_device_info(int devid);
+const char *brw_get_device_name(int devid);
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] i965: Move get_hw_prim_for_gl_prim to brw_util.c

2016-04-04 Thread Jason Ekstrand
It's used by brw_compile_gs in brw_vec4_gs_visitor.cpp so it needs to be in
a file that's linked into libi965_compiler.la.
---
 src/mesa/drivers/dri/i965/brw_draw.c | 29 -
 src/mesa/drivers/dri/i965/brw_util.c | 28 
 2 files changed, 28 insertions(+), 29 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_draw.c 
b/src/mesa/drivers/dri/i965/brw_draw.c
index c295d91..afa8a4e 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -54,23 +54,6 @@
 
 #define FILE_DEBUG_FLAG DEBUG_PRIMS
 
-static const GLuint prim_to_hw_prim[GL_TRIANGLE_STRIP_ADJACENCY+1] = {
-   [GL_POINTS] =_3DPRIM_POINTLIST,
-   [GL_LINES] = _3DPRIM_LINELIST,
-   [GL_LINE_LOOP] = _3DPRIM_LINELOOP,
-   [GL_LINE_STRIP] = _3DPRIM_LINESTRIP,
-   [GL_TRIANGLES] = _3DPRIM_TRILIST,
-   [GL_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
-   [GL_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
-   [GL_QUADS] = _3DPRIM_QUADLIST,
-   [GL_QUAD_STRIP] = _3DPRIM_QUADSTRIP,
-   [GL_POLYGON] = _3DPRIM_POLYGON,
-   [GL_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
-   [GL_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
-   [GL_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
-   [GL_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
-};
-
 
 static const GLenum reduced_prim[GL_POLYGON+1] = {
[GL_POINTS] = GL_POINTS,
@@ -85,18 +68,6 @@ static const GLenum reduced_prim[GL_POLYGON+1] = {
[GL_POLYGON] = GL_TRIANGLES
 };
 
-uint32_t
-get_hw_prim_for_gl_prim(int mode)
-{
-   if (mode >= BRW_PRIM_OFFSET)
-  return mode - BRW_PRIM_OFFSET;
-   else {
-  assert(mode < ARRAY_SIZE(prim_to_hw_prim));
-  return prim_to_hw_prim[mode];
-   }
-}
-
-
 /* When the primitive changes, set a state bit and re-validate.  Not
  * the nicest and would rather deal with this by having all the
  * programs be immune to the active primitive (ie. cope with all
diff --git a/src/mesa/drivers/dri/i965/brw_util.c 
b/src/mesa/drivers/dri/i965/brw_util.c
index bf7f9c6..934b6b8 100644
--- a/src/mesa/drivers/dri/i965/brw_util.c
+++ b/src/mesa/drivers/dri/i965/brw_util.c
@@ -98,3 +98,31 @@ GLuint brw_translate_blend_factor( GLenum factor )
   unreachable("not reached");
}
 }
+
+static const GLuint prim_to_hw_prim[GL_TRIANGLE_STRIP_ADJACENCY+1] = {
+   [GL_POINTS] =_3DPRIM_POINTLIST,
+   [GL_LINES] = _3DPRIM_LINELIST,
+   [GL_LINE_LOOP] = _3DPRIM_LINELOOP,
+   [GL_LINE_STRIP] = _3DPRIM_LINESTRIP,
+   [GL_TRIANGLES] = _3DPRIM_TRILIST,
+   [GL_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
+   [GL_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
+   [GL_QUADS] = _3DPRIM_QUADLIST,
+   [GL_QUAD_STRIP] = _3DPRIM_QUADSTRIP,
+   [GL_POLYGON] = _3DPRIM_POLYGON,
+   [GL_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
+   [GL_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
+   [GL_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
+   [GL_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
+};
+
+uint32_t
+get_hw_prim_for_gl_prim(int mode)
+{
+   if (mode >= BRW_PRIM_OFFSET)
+  return mode - BRW_PRIM_OFFSET;
+   else {
+  assert(mode < ARRAY_SIZE(prim_to_hw_prim));
+  return prim_to_hw_prim[mode];
+   }
+}
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/4] i965/fs_surface_builder: Mask signed integers after conversion

2016-04-04 Thread Jason Ekstrand
---
 src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
index 75734d2..4adffdd 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
@@ -717,6 +717,15 @@ namespace {
   bld.emit_minmax(offset(dst, bld, c), offset(dst, bld, c),
   brw_imm_d(-(int)scale(widths[c] - s) - 1),
   BRW_CONDITIONAL_GE);
+
+   /* Mask off all but the bits we actually want.  Otherwise, if
+* we pass a negative number into the hardware when it's
+* expecting something like UINT8, it will happily clamp it to
+* +255 for us.
+*/
+   if (is_signed && widths[c] < 32)
+  bld.AND(offset(dst, bld, c), offset(dst, bld, c),
+  brw_imm_d((1 << widths[c]) - 1));
 }
  }
 
@@ -787,6 +796,15 @@ namespace {
/* Convert to integer. */
bld.RNDE(offset(fdst, bld, c), offset(fdst, bld, c));
bld.MOV(offset(dst, bld, c), offset(fdst, bld, c));
+
+   /* Mask off all but the bits we actually want.  Otherwise, if
+* we pass a negative number into the hardware when it's
+* expecting something like UINT8, it will happily clamp it to
+* +255 for us.
+*/
+   if (is_signed && widths[c] < 32)
+  bld.AND(offset(dst, bld, c), offset(dst, bld, c),
+  brw_imm_d((1 << widths[c]) - 1));
 }
  }
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] i965: Move get_hw_prim_for_gl_prim to brw_util.c

2016-04-04 Thread Jason Ekstrand
It's used by brw_compile_gs in brw_vec4_gs_visitor.cpp so it needs to be in
a file that's linked into libi965_compiler.la.
---
 src/mesa/drivers/dri/i965/brw_draw.c | 29 -
 src/mesa/drivers/dri/i965/brw_util.c | 28 
 2 files changed, 28 insertions(+), 29 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_draw.c 
b/src/mesa/drivers/dri/i965/brw_draw.c
index c295d91..afa8a4e 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -54,23 +54,6 @@
 
 #define FILE_DEBUG_FLAG DEBUG_PRIMS
 
-static const GLuint prim_to_hw_prim[GL_TRIANGLE_STRIP_ADJACENCY+1] = {
-   [GL_POINTS] =_3DPRIM_POINTLIST,
-   [GL_LINES] = _3DPRIM_LINELIST,
-   [GL_LINE_LOOP] = _3DPRIM_LINELOOP,
-   [GL_LINE_STRIP] = _3DPRIM_LINESTRIP,
-   [GL_TRIANGLES] = _3DPRIM_TRILIST,
-   [GL_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
-   [GL_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
-   [GL_QUADS] = _3DPRIM_QUADLIST,
-   [GL_QUAD_STRIP] = _3DPRIM_QUADSTRIP,
-   [GL_POLYGON] = _3DPRIM_POLYGON,
-   [GL_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
-   [GL_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
-   [GL_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
-   [GL_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
-};
-
 
 static const GLenum reduced_prim[GL_POLYGON+1] = {
[GL_POINTS] = GL_POINTS,
@@ -85,18 +68,6 @@ static const GLenum reduced_prim[GL_POLYGON+1] = {
[GL_POLYGON] = GL_TRIANGLES
 };
 
-uint32_t
-get_hw_prim_for_gl_prim(int mode)
-{
-   if (mode >= BRW_PRIM_OFFSET)
-  return mode - BRW_PRIM_OFFSET;
-   else {
-  assert(mode < ARRAY_SIZE(prim_to_hw_prim));
-  return prim_to_hw_prim[mode];
-   }
-}
-
-
 /* When the primitive changes, set a state bit and re-validate.  Not
  * the nicest and would rather deal with this by having all the
  * programs be immune to the active primitive (ie. cope with all
diff --git a/src/mesa/drivers/dri/i965/brw_util.c 
b/src/mesa/drivers/dri/i965/brw_util.c
index bf7f9c6..934b6b8 100644
--- a/src/mesa/drivers/dri/i965/brw_util.c
+++ b/src/mesa/drivers/dri/i965/brw_util.c
@@ -98,3 +98,31 @@ GLuint brw_translate_blend_factor( GLenum factor )
   unreachable("not reached");
}
 }
+
+static const GLuint prim_to_hw_prim[GL_TRIANGLE_STRIP_ADJACENCY+1] = {
+   [GL_POINTS] =_3DPRIM_POINTLIST,
+   [GL_LINES] = _3DPRIM_LINELIST,
+   [GL_LINE_LOOP] = _3DPRIM_LINELOOP,
+   [GL_LINE_STRIP] = _3DPRIM_LINESTRIP,
+   [GL_TRIANGLES] = _3DPRIM_TRILIST,
+   [GL_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
+   [GL_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
+   [GL_QUADS] = _3DPRIM_QUADLIST,
+   [GL_QUAD_STRIP] = _3DPRIM_QUADSTRIP,
+   [GL_POLYGON] = _3DPRIM_POLYGON,
+   [GL_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
+   [GL_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
+   [GL_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
+   [GL_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
+};
+
+uint32_t
+get_hw_prim_for_gl_prim(int mode)
+{
+   if (mode >= BRW_PRIM_OFFSET)
+  return mode - BRW_PRIM_OFFSET;
+   else {
+  assert(mode < ARRAY_SIZE(prim_to_hw_prim));
+  return prim_to_hw_prim[mode];
+   }
+}
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/4] i965/fs: Make the repclear shader support either a uniform or a flat input

2016-04-04 Thread Jason Ekstrand
In the Vulkan driver we use a single flat input instead of a uniform
because setting up push constants is more disruptive to the pipeline than
setting up another vertex input.  This uses the number of uniforms as a key
to keep it working for the GL driver.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 25 -
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 1a6a229..b138c15 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2787,10 +2787,23 @@ fs_visitor::emit_repclear_shader()
brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
int base_mrf = 1;
int color_mrf = base_mrf + 2;
+   fs_inst *mov;
 
-   fs_inst *mov = bld.exec_all().group(4, 0)
- .MOV(brw_message_reg(color_mrf),
-  fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F));
+   if (uniforms > 0) {
+  mov = bld.exec_all().group(4, 0)
+   .MOV(brw_message_reg(color_mrf),
+fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F));
+   } else {
+  struct brw_reg reg =
+ brw_reg(BRW_GENERAL_REGISTER_FILE,
+ 2, 3, 0, 0, BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_8,
+ BRW_WIDTH_2,
+ BRW_HORIZONTAL_STRIDE_4, BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+
+  mov = bld.exec_all().group(4, 0)
+   .MOV(vec4(brw_message_reg(color_mrf)), fs_reg(reg));
+   }
 
fs_inst *write;
if (key->nr_color_regions == 1) {
@@ -2819,8 +2832,10 @@ fs_visitor::emit_repclear_shader()
assign_curb_setup();
 
/* Now that we have the uniform assigned, go ahead and force it to a vec4. 
*/
-   assert(mov->src[0].file == FIXED_GRF);
-   mov->src[0] = brw_vec4_grf(mov->src[0].nr, 0);
+   if (uniforms == 1) {
+  assert(mov->src[0].file == FIXED_GRF);
+  mov->src[0] = brw_vec4_grf(mov->src[0].nr, 0);
+   }
 }
 
 /**
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 07/12] glsl: Add arb_cull_distance support

2016-04-04 Thread Tobias Klausmann



On 05.04.2016 00:06, Dave Airlie wrote:

On 4 April 2016 at 23:07, Tobias Klausmann
 wrote:


On 04.04.2016 04:48, Timothy Arceri wrote:

On Mon, 2016-04-04 at 12:15 +1000, Dave Airlie wrote:

From: Tobias Klausmann 

Signed-off-by: Tobias Klausmann Const.MaxClipPlanes);
+   } else if (strcmp("gl_CullDistance", name) == 0
+  && size > state->Const.MaxClipPlanes) {
+  /* From the ARB_cull_distance spec:
+   *
+   *   "The gl_CullDistance array is predeclared as unsized and
+   *must be sized by the shader either redeclaring it with
+   *a size or indexing it only with integral constant
+   *expressions. The size determines the number and set of
+   *enabled cull distances and can be at most
+   *gl_MaxCullDistances."
+   */
+  _mesa_glsl_error(&loc, state, "`gl_CullDistance' array size
cannot "
+   "be larger than gl_MaxCullDistances (%u)",
+   state->Const.MaxClipPlanes);
  }
   }
   diff --git a/src/compiler/glsl/builtin_variables.cpp
b/src/compiler/glsl/builtin_variables.cpp
index f31f9f6..8d049c8 100644
--- a/src/compiler/glsl/builtin_variables.cpp
+++ b/src/compiler/glsl/builtin_variables.cpp
@@ -302,7 +302,7 @@ public:
  const glsl_type *construct_interface_instance() const;
 private:
-   glsl_struct_field fields[10];
+   glsl_struct_field fields[11];
  unsigned num_fields;
   };
   @@ -675,6 +675,11 @@ builtin_variable_generator::generate_constants()
 add_const("gl_MaxClipDistances", state->Const.MaxClipPlanes);
 add_const("gl_MaxVaryingComponents", state->ctx-

Const.MaxVarying * 4);

  }
+   if (state->is_version(450, 0) || state->ARB_cull_distance_enable)
{
+  add_const("gl_MaxCullDistances", state->Const.MaxClipPlanes);
+  add_const("gl_MaxCombinedClipAndCullDistances",
+state->Const.MaxClipPlanes);
+   }
if (state->has_geometry_shader()) {
 add_const("gl_MaxVertexOutputComponents",
@@ -1246,6 +1251,10 @@
builtin_variable_generator::generate_varyings()
  add_varying(VARYING_SLOT_CLIP_DIST0, array(float_t, 0),
  "gl_ClipDistance");
  }
+   if (state->is_version(450, 0) || state->ARB_cull_distance_enable)
{
+  add_varying(VARYING_SLOT_CULL_DIST0, array(float_t, 0),
+   "gl_CullDistance");
+   }
if (compatibility) {
 add_varying(VARYING_SLOT_TEX0, array(vec4_t, 0),
"gl_TexCoord");
diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y
b/src/compiler/glsl/glcpp/glcpp-parse.y
index a48266c..e44f074 100644
--- a/src/compiler/glsl/glcpp/glcpp-parse.y
+++ b/src/compiler/glsl/glcpp/glcpp-parse.y
@@ -2457,6 +2457,9 @@
_glcpp_parser_handle_version_declaration(glcpp_parser_t *parser,
intmax_t versio
  if (extensions->ARB_shader_draw_parameters)
   add_builtin_define(parser,
"GL_ARB_shader_draw_parameters", 1);
+
+ if (extensions->ARB_cull_distance)
+add_builtin_define(parser, "GL_ARB_cull_distance", 1);
 }
  }
   diff --git a/src/compiler/glsl/glsl_parser_extras.cpp
b/src/compiler/glsl/glsl_parser_extras.cpp
index 76321aa..9b1d53f 100644
--- a/src/compiler/glsl/glsl_parser_extras.cpp
+++ b/src/compiler/glsl/glsl_parser_extras.cpp
@@ -569,6 +569,7 @@ static const _mesa_glsl_extension
_mesa_glsl_supported_extensions[] = {
  EXT(ARB_arrays_of_arrays, true,  false, ARB_array
s_of_arrays),
  EXT(ARB_compute_shader,   true,  false, ARB_compu
te_shader),
  EXT(ARB_conservative_depth,   true,  false, ARB_conse
rvative_depth),
+   EXT(ARB_cull_distance,true,  false, ARB_cull_
distance),
  EXT(ARB_derivative_control,   true,  false, ARB_deriv
ative_control),
  EXT(ARB_draw_buffers, true,  false, dummy_tru
e),
  EXT(ARB_draw_instanced,   true,  false, ARB_draw_
instanced),
diff --git a/src/compiler/glsl/glsl_parser_extras.h
b/src/compiler/glsl/glsl_parser_extras.h
index c774fbe..85a8ebf 100644
--- a/src/compiler/glsl/glsl_parser_extras.h
+++ b/src/compiler/glsl/glsl_parser_extras.h
@@ -518,6 +518,8 @@ struct _mesa_glsl_parse_state {
  bool ARB_compute_shader_warn;
  bool ARB_conservative_depth_enable;
  bool ARB_conservative_depth_warn;
+   bool ARB_cull_distance_enable;
+   bool ARB_cull_distance_warn;
  bool ARB_derivative_control_enable;
  bool ARB_derivative_control_warn;
  bool ARB_draw_buffers_enable;
diff --git a/src/compiler/glsl/link_varyings.cpp
b/src/compiler/glsl/link_varyings.cpp
index 8e74981..d4cc68f 100644
--- a/src/compiler/glsl/link_varyings.cpp
+++ b/src/compiler/glsl/link_varyings.cpp
@@ -573,6 +573,10 @@ tfeedback_decl::init(struct gl_context *ctx,
const void *mem_ctx,
  strcmp(this->var_name, "gl_ClipDistance") == 0) {
 this->lowered_builtin_array_variable = clip_distance;
  }
+   if (ctx-

Const.ShaderCompilerOptio

Re: [Mesa-dev] [PATCH 07/12] glsl: Add arb_cull_distance support

2016-04-04 Thread Dave Airlie
On 4 April 2016 at 23:07, Tobias Klausmann
 wrote:
>
>
> On 04.04.2016 04:48, Timothy Arceri wrote:
>>
>> On Mon, 2016-04-04 at 12:15 +1000, Dave Airlie wrote:
>>>
>>> From: Tobias Klausmann 
>>>
>>> Signed-off-by: Tobias Klausmann >> ---
>>>   src/compiler/glsl/ast_to_hir.cpp |  14 +++
>>>   src/compiler/glsl/builtin_variables.cpp  |  11 ++-
>>>   src/compiler/glsl/glcpp/glcpp-parse.y|   3 +
>>>   src/compiler/glsl/glsl_parser_extras.cpp |   1 +
>>>   src/compiler/glsl/glsl_parser_extras.h   |   2 +
>>>   src/compiler/glsl/link_varyings.cpp  |  10 +++
>>>   src/compiler/glsl/link_varyings.h|   1 +
>>>   src/compiler/glsl/linker.cpp | 122
>>> +--
>>>   src/compiler/glsl/standalone_scaffolding.cpp |   1 +
>>>   src/compiler/glsl/tests/varyings_test.cpp|  27 ++
>>>   src/compiler/shader_enums.h  |   4 +
>>>   11 files changed, 169 insertions(+), 27 deletions(-)
>>>
>>> diff --git a/src/compiler/glsl/ast_to_hir.cpp
>>> b/src/compiler/glsl/ast_to_hir.cpp
>>> index 7c9be81..47db841 100644
>>> --- a/src/compiler/glsl/ast_to_hir.cpp
>>> +++ b/src/compiler/glsl/ast_to_hir.cpp
>>> @@ -1210,6 +1210,20 @@ check_builtin_array_max_size(const char *name,
>>> unsigned size,
>>> _mesa_glsl_error(&loc, state, "`gl_ClipDistance' array size
>>> cannot "
>>>  "be larger than gl_MaxClipDistances (%u)",
>>>  state->Const.MaxClipPlanes);
>>> +   } else if (strcmp("gl_CullDistance", name) == 0
>>> +  && size > state->Const.MaxClipPlanes) {
>>> +  /* From the ARB_cull_distance spec:
>>> +   *
>>> +   *   "The gl_CullDistance array is predeclared as unsized and
>>> +   *must be sized by the shader either redeclaring it with
>>> +   *a size or indexing it only with integral constant
>>> +   *expressions. The size determines the number and set of
>>> +   *enabled cull distances and can be at most
>>> +   *gl_MaxCullDistances."
>>> +   */
>>> +  _mesa_glsl_error(&loc, state, "`gl_CullDistance' array size
>>> cannot "
>>> +   "be larger than gl_MaxCullDistances (%u)",
>>> +   state->Const.MaxClipPlanes);
>>>  }
>>>   }
>>>   diff --git a/src/compiler/glsl/builtin_variables.cpp
>>> b/src/compiler/glsl/builtin_variables.cpp
>>> index f31f9f6..8d049c8 100644
>>> --- a/src/compiler/glsl/builtin_variables.cpp
>>> +++ b/src/compiler/glsl/builtin_variables.cpp
>>> @@ -302,7 +302,7 @@ public:
>>>  const glsl_type *construct_interface_instance() const;
>>> private:
>>> -   glsl_struct_field fields[10];
>>> +   glsl_struct_field fields[11];
>>>  unsigned num_fields;
>>>   };
>>>   @@ -675,6 +675,11 @@ builtin_variable_generator::generate_constants()
>>> add_const("gl_MaxClipDistances", state->Const.MaxClipPlanes);
>>> add_const("gl_MaxVaryingComponents", state->ctx-

 Const.MaxVarying * 4);
>>>
>>>  }
>>> +   if (state->is_version(450, 0) || state->ARB_cull_distance_enable)
>>> {
>>> +  add_const("gl_MaxCullDistances", state->Const.MaxClipPlanes);
>>> +  add_const("gl_MaxCombinedClipAndCullDistances",
>>> +state->Const.MaxClipPlanes);
>>> +   }
>>>if (state->has_geometry_shader()) {
>>> add_const("gl_MaxVertexOutputComponents",
>>> @@ -1246,6 +1251,10 @@
>>> builtin_variable_generator::generate_varyings()
>>>  add_varying(VARYING_SLOT_CLIP_DIST0, array(float_t, 0),
>>>  "gl_ClipDistance");
>>>  }
>>> +   if (state->is_version(450, 0) || state->ARB_cull_distance_enable)
>>> {
>>> +  add_varying(VARYING_SLOT_CULL_DIST0, array(float_t, 0),
>>> +   "gl_CullDistance");
>>> +   }
>>>if (compatibility) {
>>> add_varying(VARYING_SLOT_TEX0, array(vec4_t, 0),
>>> "gl_TexCoord");
>>> diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y
>>> b/src/compiler/glsl/glcpp/glcpp-parse.y
>>> index a48266c..e44f074 100644
>>> --- a/src/compiler/glsl/glcpp/glcpp-parse.y
>>> +++ b/src/compiler/glsl/glcpp/glcpp-parse.y
>>> @@ -2457,6 +2457,9 @@
>>> _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser,
>>> intmax_t versio
>>>  if (extensions->ARB_shader_draw_parameters)
>>>   add_builtin_define(parser,
>>> "GL_ARB_shader_draw_parameters", 1);
>>> +
>>> + if (extensions->ARB_cull_distance)
>>> +add_builtin_define(parser, "GL_ARB_cull_distance", 1);
>>> }
>>>  }
>>>   diff --git a/src/compiler/glsl/glsl_parser_extras.cpp
>>> b/src/compiler/glsl/glsl_parser_extras.cpp
>>> index 76321aa..9b1d53f 100644
>>> --- a/src/compiler/glsl/glsl_parser_extras.cpp
>>> +++ b/src/compiler/glsl/glsl_parser_extras.cpp
>>> @@ -569,6 +569,7 @@ static const _mesa_glsl_extension
>>> _mesa_glsl_supported_extensions[] = {
>>>  EXT(ARB_arrays_of_arrays, true,  false, ARB_array
>>> s_of

Re: [Mesa-dev] [PATCH] doxygen: update glsl link

2016-04-04 Thread Emil Velikov
On 4 April 2016 at 21:38, Elie TOURNIER  wrote:
> ---
>  doxygen/glsl.doxy | 7 +++
>  1 file changed, 3 insertions(+), 4 deletions(-)
>
> diff --git a/doxygen/glsl.doxy b/doxygen/glsl.doxy
> index 9915ba2..0f4c822 100644
> --- a/doxygen/glsl.doxy
> +++ b/doxygen/glsl.doxy
> @@ -9,11 +9,10 @@ PROJECT_NAME   = "Mesa GLSL module"
>  #---
>  # configuration options related to the input files
>  #---
> -INPUT  = ../src/glsl/
> +INPUT  = ../src/compiler/glsl/
> +FILE_PATTERNS  = *.c *.cpp *.h
>  RECURSIVE  = NO
> -EXCLUDE= ../src/glsl/glsl_lexer.cpp \
> - ../src/glsl/glsl_parser.cpp \
> - ../src/glsl/glsl_parser.h
> +EXCLUDE=
Speaking of doxygen, has anyone managed to get it in shape ? I've seen
some efforts in https://fossies.org/dox/mesa-11.0.9/ although I'm not
sure how much thinkering was required to get that far.

For the patch itself - I second Matt's suggestion. Let's keep the
generated sources out, for the time being at least.

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] doxygen: update glsl link

2016-04-04 Thread Matt Turner
On Mon, Apr 4, 2016 at 1:38 PM, Elie TOURNIER  wrote:
> ---
>  doxygen/glsl.doxy | 7 +++
>  1 file changed, 3 insertions(+), 4 deletions(-)
>
> diff --git a/doxygen/glsl.doxy b/doxygen/glsl.doxy
> index 9915ba2..0f4c822 100644
> --- a/doxygen/glsl.doxy
> +++ b/doxygen/glsl.doxy
> @@ -9,11 +9,10 @@ PROJECT_NAME   = "Mesa GLSL module"
>  #---
>  # configuration options related to the input files
>  #---
> -INPUT  = ../src/glsl/
> +INPUT  = ../src/compiler/glsl/
> +FILE_PATTERNS  = *.c *.cpp *.h
>  RECURSIVE  = NO
> -EXCLUDE= ../src/glsl/glsl_lexer.cpp \
> - ../src/glsl/glsl_parser.cpp \
> - ../src/glsl/glsl_parser.h
> +EXCLUDE=

Do we actually want to run doxygen on the generated lexer and parser?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/4] expose ARB_robust_buffer_access_behavior

2016-04-04 Thread Nicolai Hähnle

Capitalize the commit title.

It might be nice to split this patch up into mesa/main and Gallium parts.

Also...

On 04.04.2016 06:41, Bas Nieuwenhuizen wrote:

Signed-off-by: Bas Nieuwenhuizen 
---
  src/gallium/docs/source/screen.rst   | 4 +++-
  src/gallium/drivers/freedreno/freedreno_screen.c | 1 +
  src/gallium/drivers/i915/i915_screen.c   | 1 +
  src/gallium/drivers/ilo/ilo_screen.c | 1 +
  src/gallium/drivers/llvmpipe/lp_screen.c | 1 +
  src/gallium/drivers/nouveau/nv30/nv30_screen.c   | 1 +
  src/gallium/drivers/nouveau/nv50/nv50_screen.c   | 1 +
  src/gallium/drivers/nouveau/nvc0/nvc0_screen.c   | 1 +
  src/gallium/drivers/r300/r300_screen.c   | 1 +
  src/gallium/drivers/r600/r600_pipe.c | 1 +
  src/gallium/drivers/radeonsi/si_pipe.c   | 1 +
  src/gallium/drivers/softpipe/sp_screen.c | 1 +
  src/gallium/drivers/svga/svga_screen.c   | 1 +
  src/gallium/drivers/swr/swr_screen.cpp   | 1 +
  src/gallium/drivers/vc4/vc4_screen.c | 1 +
  src/gallium/drivers/virgl/virgl_screen.c | 1 +
  src/gallium/include/pipe/p_defines.h | 1 +
  src/mesa/main/extensions_table.h | 1 +
  src/mesa/main/mtypes.h   | 1 +
  src/mesa/main/version.c  | 2 +-
  src/mesa/state_tracker/st_extensions.c   | 1 +
  21 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/src/gallium/docs/source/screen.rst 
b/src/gallium/docs/source/screen.rst
index 47a19de..02564bd 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -323,7 +323,9 @@ The integer capabilities:
  * ``PIPE_CAP_PCI_BUS``: Return the PCI bus number.
  * ``PIPE_CAP_PCI_DEVICE``: Return the PCI device number.
  * ``PIPE_CAP_PCI_FUNCTION``: Return the PCI function number.
-
+* ``PIPE_CAP_ROBUST_BUFFER_ACCESS``: Implementation uses bounds checking on
+  resource accesses by shader if the context is create with


*created.

Also, I think this comment should say slightly more about how the bounds 
checking works, because ARB_robust_buffer_access_behavior says more, 
i.e. stores being dropped and loads returning 0 or some value from 
inside the buffer. Maybe just state that the intended out-of-bounds 
behavior is that of the extension.


Cheers,
Nicolai


+  PIPE_CONTEXT_ROBUST_BUFFER_ACCESS.

  .. _pipe_capf:

diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c 
b/src/gallium/drivers/freedreno/freedreno_screen.c
index d47cb07..6bd3d81 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -255,6 +255,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+   case PIPE_CAP_ROBUST_BUFFER_ACCESS:
return 0;

case PIPE_CAP_MAX_VIEWPORTS:
diff --git a/src/gallium/drivers/i915/i915_screen.c 
b/src/gallium/drivers/i915/i915_screen.c
index f4aa310..b8ad3c8 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -269,6 +269,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap 
cap)
 case PIPE_CAP_PCI_BUS:
 case PIPE_CAP_PCI_DEVICE:
 case PIPE_CAP_PCI_FUNCTION:
+   case PIPE_CAP_ROBUST_BUFFER_ACCESS:
return 0;

 case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
diff --git a/src/gallium/drivers/ilo/ilo_screen.c 
b/src/gallium/drivers/ilo/ilo_screen.c
index 7812c82..ebe17a0 100644
--- a/src/gallium/drivers/ilo/ilo_screen.c
+++ b/src/gallium/drivers/ilo/ilo_screen.c
@@ -498,6 +498,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap 
param)
 case PIPE_CAP_PCI_BUS:
 case PIPE_CAP_PCI_DEVICE:
 case PIPE_CAP_PCI_FUNCTION:
+   case PIPE_CAP_ROBUST_BUFFER_ACCESS:
return 0;

 case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c 
b/src/gallium/drivers/llvmpipe/lp_screen.c
index 2529b54..6d54685 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -319,6 +319,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum 
pipe_cap param)
 case PIPE_CAP_PCI_BUS:
 case PIPE_CAP_PCI_DEVICE:
 case PIPE_CAP_PCI_FUNCTION:
+   case PIPE_CAP_ROBUST_BUFFER_ACCESS:
return 0;
 }
 /* should only get here on unhandled cases */
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c 
b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index b105c6a..5e11c90 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -192,6 +192,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
 case PIPE_CAP_PCI_BUS:
 case PIPE_CAP_PCI_DEVICE:
 case PIPE_CAP_PCI_FUNCTION:
+   case PIPE_CAP_ROBUST_BUFFER_ACCESS:
return 0;

 case PIPE_CA

Re: [Mesa-dev] [PATCH 2/4] radeonsi: use bounded indexing for samplers

2016-04-04 Thread Nicolai Hähnle

Patches 1 & 2 are

Reviewed-by: Nicolai Hähnle 

On 04.04.2016 06:41, Bas Nieuwenhuizen wrote:

Signed-off-by: Bas Nieuwenhuizen 
---
  src/gallium/drivers/radeonsi/si_shader.c | 5 -
  1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index dd04748..392f439 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3781,7 +3781,10 @@ static void tex_fetch_ptrs(
const struct tgsi_full_src_register *reg = 
&emit_data->inst->Src[sampler_src];
LLVMValueRef ind_index;

-   ind_index = get_indirect_index(ctx, ®->Indirect, 
reg->Register.Index);
+   ind_index = get_bounded_indirect_index(ctx,
+  ®->Indirect,
+  reg->Register.Index,
+  SI_NUM_USER_SAMPLERS);

*res_ptr = get_sampler_desc(ctx, ind_index, DESC_IMAGE);



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH v2] glsl: Lower variable indexing of system value arrays unconditionally.

2016-04-04 Thread Ian Romanick
It might be worth having an assertion in case we either add another such 
variable in the future or the tess lowering pass somehow fails.


Either way is

Reviewed-by: Ian Romanick


On April 4, 2016 11:09:44 AM Kenneth Graunke  wrote:


lower_variable_index_to_cond_assign() did not handle system values.
gl_SampleMaskIn[] is a system value, and also an array.  Accessing it
with a variable index would trigger an unreachable() assert.

Rather than adding a new EmitNoIndirectSystemValues flag, we simply
lower unconditionally.  There is exactly one case where this occurs,
and for all current drivers, lowering produces optimal code.  Even
for future drivers with 32x MSAA, it produces reasonable code.

Fixes Piglit's new samplemaskin-indirect test.  Also fixes many ES31-CTS
tests when OES_sample_variables is enabled.

Cc: mesa-sta...@lists.freedesktop.org
Cc: Ilia Mirkin 
Signed-off-by: Kenneth Graunke 
---
 .../glsl/lower_variable_index_to_cond_assign.cpp | 20 
 1 file changed, 20 insertions(+)

diff --git a/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp 
b/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp

index 278d545..057de31 100644
--- a/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp
+++ b/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp
@@ -385,6 +385,26 @@ public:
   case ir_var_const_in:
  return this->lower_temps;

+  case ir_var_system_value:
+ /* There are only a few system values that have array types:
+  *
+  *gl_TessLevelInner[]
+  *gl_TessLevelOuter[]
+  *gl_SampleMaskIn[]
+  *
+  * The tessellation factor arrays are lowered to vec4/vec2s
+  * by lower_tess_level() before this pass occurs, so we'll
+  * never see them here.
+  *
+  * The only remaining case is gl_SampleMaskIn[], which has
+  * a length of ceil(ctx->Const.MaxSamples, 32).  Most hardware
+  * supports fewer than 32 samples, at which point our lowering
+  * produces a single read of gl_SampleMaskIn[0].  Even with 32x
+  * MSAA, the array length is only 2, so the lowering is fairly
+  * efficient.  Therefore, lower unconditionally.
+  */
+ return true;
+
   case ir_var_shader_in:
  /* The input array size is unknown at compiler time for non-patch
   * inputs in TCS and TES. The arrays are sized to
--
2.7.4

___
mesa-stable mailing list
mesa-sta...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-stable



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] doxygen: update glsl link

2016-04-04 Thread Elie TOURNIER
---
 doxygen/glsl.doxy | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/doxygen/glsl.doxy b/doxygen/glsl.doxy
index 9915ba2..0f4c822 100644
--- a/doxygen/glsl.doxy
+++ b/doxygen/glsl.doxy
@@ -9,11 +9,10 @@ PROJECT_NAME   = "Mesa GLSL module"
 #---
 # configuration options related to the input files
 #---
-INPUT  = ../src/glsl/
+INPUT  = ../src/compiler/glsl/
+FILE_PATTERNS  = *.c *.cpp *.h
 RECURSIVE  = NO
-EXCLUDE= ../src/glsl/glsl_lexer.cpp \
- ../src/glsl/glsl_parser.cpp \
- ../src/glsl/glsl_parser.h
+EXCLUDE=
 EXCLUDE_PATTERNS   =
 #---
 # configuration options related to the HTML output
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] Build completed: mesa 803

2016-04-04 Thread AppVeyor


Build mesa 803 completed



Commit 65fbc43d54 by Kenneth Graunke on 1/27/2016 8:21 PM:

i965: Add an INTEL_PRECISE_TRIG=1 option to fix SIN/COS output range.\n\nThe SIN and COS instructions on Intel hardware can produce values\nslightly outside of the [-1.0, 1.0] range for a small set of values.\nObviously, this can break everyone's expectations about trig functions.\n\nAccording to an internal presentation, the COS instruction can produce\na value up to 1.27 for inputs in the range (0.08296, 0.09888).  One\nsuggested workaround is to multiply by 0.7, scaling down the\namplitude slightly.  Apparently this also minimizes the error function,\nreducing the maximum error from 0.6 to about 0.3.\n\nWhen enabled, fixes 16 dEQP precision tests\n\n   dEQP-GLES31.functional.shaders.builtin_functions.precision.\n   {cos,sin}.{highp,mediump}_compute.{scalar,vec2,vec4,vec4}.\n\nat the cost of making every sin and cos call more expensive (about\ntwice the number of cycles on recent hardware).  Enabling this\noption has been shown to reduce GPUTest Volplosion performance by\nabout 10%.\n\nSigned-off-by: Kenneth Graunke \nReviewed-by: Matt Turner \nReviewed-by: Jason Ekstrand 


Configure your notification preferences

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 16/26] i965: Implement the new imod and irem opcodes

2016-04-04 Thread Jason Ekstrand
On Fri, Apr 1, 2016 at 9:56 PM, Jason Ekstrand  wrote:

>
>
> On Tue, Mar 29, 2016 at 8:49 PM, Matt Turner  wrote:
>
>> On Fri, Mar 25, 2016 at 4:12 PM, Jason Ekstrand 
>> wrote:
>> > ---
>> >  src/mesa/drivers/dri/i965/brw_fs_nir.cpp   | 32
>> ++
>> >  src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 32
>> ++
>> >  2 files changed, 64 insertions(+)
>> >
>> > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> > index 14480fb..131f50e 100644
>> > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> > @@ -844,8 +844,40 @@ fs_visitor::nir_emit_alu(const fs_builder &bld,
>> nir_alu_instr *instr)
>> >unreachable("Should have been lowered by borrow_to_arith().");
>> >
>> > case nir_op_umod:
>> > +   case nir_op_irem:
>> > +  /* According to the sign table for INT DIV in the Ivy Bridge
>> PRM, it
>> > +   * appears that our hardware just does the right thing for signed
>> > +   * remainder.
>> > +   */
>> > +  bld.emit(SHADER_OPCODE_INT_REMAINDER, result, op[0], op[1]);
>> > +  break;
>> > +
>> > +   case nir_op_imod: {
>> > +  /* Get a regular C-style remainder.  If a % b == 0, set the
>> predicate. */
>> >bld.emit(SHADER_OPCODE_INT_REMAINDER, result, op[0], op[1]);
>> > +
>> > +  /* Math instructions don't support conditional mod */
>> > +  inst = bld.MOV(bld.null_reg_d(), result);
>> > +  inst->conditional_mod = BRW_CONDITIONAL_NZ;
>> > +
>> > +  /* Now, we need to determine if signs of the sources are
>> different.
>> > +   * When we XOR the sources, the top bit is 0 if they are the
>> same and 1
>> > +   * if they are different.  We can then use a conditional
>> modifier to
>> > +   * turn that into a predicate.  This leads us to an XOR.l
>> instruction.
>> > +   */
>> > +  fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_D);
>> > +  inst = bld.XOR(tmp, op[0], op[1]);
>> > +  inst->predicate = BRW_PREDICATE_NORMAL;
>> > +  inst->conditional_mod = BRW_CONDITIONAL_L;
>>
>> This goes against the PRM:
>>
>> "This operation does not produce sign or overflow conditions. Only the
>> .e/.z or .ne/.nz conditional modifiers should be used."
>>
>
> So, interesting news: I wrote a Vulkan CTS test for both imod and irem
> (they weren't tested by the CTS before) to see what the hardware does.
> And, contrary to what the PRM might lead you to believe, it seems to work
> just fine.  I've only tested on SKL so far but I have tested in both FS and
> vec4.  I'll try it on BDW and HSW before declaring victory, but it looks
> like XOR.l might be a well-defined thing after all.
>

More data: It appears to also work on Ivy Bridge.


> At the very least, we should add a comment with the PRM citation and the
> empirical results.
> --Jason
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: expose EXT_base_instance in ES3 contexts

2016-04-04 Thread Matt Turner
Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: expose EXT_polygon_offset_clamp in ES contexts

2016-04-04 Thread Matt Turner
On Sat, Apr 2, 2016 at 11:29 PM, Ilia Mirkin  wrote:
> The extension spec was extended to also support ES. This functionality
> is provided all the way back to ES 1.0.

I was going to ask if we have ES tests, and I see that you've already sent one!

Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: make *sampler2DMSArray available in ESSL 3.20

2016-04-04 Thread Matt Turner
Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] Build failed: mesa 802

2016-04-04 Thread AppVeyor



Build mesa 802 failed


Commit 8c8157bf6f by Jason Ekstrand on 4/4/2016 6:24 PM:

Remove more spirv2nir remnants


Configure your notification preferences

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Add an INTEL_PRECISE_TRIG=1 option to fix SIN/COS output range.

2016-04-04 Thread Jason Ekstrand
On Sun, Apr 3, 2016 at 5:34 PM, Kenneth Graunke 
wrote:

> The SIN and COS instructions on Intel hardware can produce values
> slightly outside of the [-1.0, 1.0] range for a small set of values.
> Obviously, this can break everyone's expectations about trig functions.
>
> According to an internal presentation, the COS instruction can produce
> a value up to 1.27 for inputs in the range (0.08296, 0.09888).  One
> suggested workaround is to multiply by 0.7, scaling down the
> amplitude slightly.  Apparently this also minimizes the error function,
> reducing the maximum error from 0.6 to about 0.3.
>
> When enabled, fixes 16 dEQP precision tests
>
>dEQP-GLES31.functional.shaders.builtin_functions.precision.
>{cos,sin}.{highp,mediump}_compute.{scalar,vec2,vec4,vec4}.
>
> at the cost of making every sin and cos call more expensive (about
> twice the number of cycles on recent hardware).  Enabling this
> option has been shown to reduce GPUTest Volplosion performance by
> about 10%.
>
> Signed-off-by: Kenneth Graunke 
> ---
>  src/mesa/drivers/dri/i965/brw_compiler.c   |  2 ++
>  src/mesa/drivers/dri/i965/brw_compiler.h   |  6 ++
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp   | 16 ++--
>  src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 16 ++--
>  4 files changed, 36 insertions(+), 4 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_compiler.c
> b/src/mesa/drivers/dri/i965/brw_compiler.c
> index 3da6aac..6509267 100644
> --- a/src/mesa/drivers/dri/i965/brw_compiler.c
> +++ b/src/mesa/drivers/dri/i965/brw_compiler.c
> @@ -147,6 +147,8 @@ brw_compiler_create(void *mem_ctx, const struct
> brw_device_info *devinfo)
> brw_fs_alloc_reg_sets(compiler);
> brw_vec4_alloc_reg_set(compiler);
>
> +   compiler->precise_trig = env_var_as_boolean("INTEL_PRECISE_TRIG",
> false);
> +
> compiler->scalar_stage[MESA_SHADER_VERTEX] =
>devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS);
> compiler->scalar_stage[MESA_SHADER_TESS_CTRL] = false;
> diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h
> b/src/mesa/drivers/dri/i965/brw_compiler.h
> index 27a95a3..231e000 100644
> --- a/src/mesa/drivers/dri/i965/brw_compiler.h
> +++ b/src/mesa/drivers/dri/i965/brw_compiler.h
> @@ -92,6 +92,12 @@ struct brw_compiler {
>
> bool scalar_stage[MESA_SHADER_STAGES];
> struct gl_shader_compiler_options
> glsl_compiler_options[MESA_SHADER_STAGES];
> +
> +   /**
> +* Apply workarounds for SIN and COS output range problems.
> +* This can negatively impact performance.
> +*/
> +   bool precise_trig;
>

This seems like the most reasonable thing to do

Reviewed-by: Jason Ekstrand 


>  };
>
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index 7839428..5cca91e 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -775,12 +775,24 @@ fs_visitor::nir_emit_alu(const fs_builder &bld,
> nir_alu_instr *instr)
>break;
>
> case nir_op_fsin:
> -  inst = bld.emit(SHADER_OPCODE_SIN, result, op[0]);
> +  if (!compiler->precise_trig) {
> + inst = bld.emit(SHADER_OPCODE_SIN, result, op[0]);
> +  } else {
> + fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F);
> + inst = bld.emit(SHADER_OPCODE_SIN, tmp, op[0]);
> + inst = bld.MUL(result, tmp, brw_imm_f(0.7));
> +  }
>inst->saturate = instr->dest.saturate;
>break;
>
> case nir_op_fcos:
> -  inst = bld.emit(SHADER_OPCODE_COS, result, op[0]);
> +  if (!compiler->precise_trig) {
> + inst = bld.emit(SHADER_OPCODE_COS, result, op[0]);
> +  } else {
> + fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F);
> + inst = bld.emit(SHADER_OPCODE_COS, tmp, op[0]);
> + inst = bld.MUL(result, tmp, brw_imm_f(0.7));
> +  }
>inst->saturate = instr->dest.saturate;
>break;
>
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> index ee6929b..6c8fd06 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> @@ -1101,12 +1101,24 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
>break;
>
> case nir_op_fsin:
> -  inst = emit_math(SHADER_OPCODE_SIN, dst, op[0]);
> +  if (!compiler->precise_trig) {
> + inst = emit_math(SHADER_OPCODE_SIN, dst, op[0]);
> +  } else {
> + src_reg tmp = src_reg(this, glsl_type::vec4_type);
> + inst = emit_math(SHADER_OPCODE_SIN, dst_reg(tmp), op[0]);
> + inst = emit(MUL(dst, tmp, brw_imm_f(0.7)));
> +  }
>inst->saturate = instr->dest.saturate;
>break;
>
> case nir_op_fcos:
> -  inst = emit_math(SHADER_OPCODE_COS, dst, op[0]);
> +  if (!compiler->precise_trig) {
> + inst = emit_math(SHADER_OPCODE_COS, dst, op[0]);
> +  } else {
> + src_reg tmp = src_reg(this,

Re: [Mesa-dev] [PATCH] glsl: allow conservative depth qualifiers in GLSL 420

2016-04-04 Thread Matt Turner
On Sat, Apr 2, 2016 at 6:12 PM, Ilia Mirkin  wrote:
> Signed-off-by: Ilia Mirkin 
> ---
>
> Noticed by inspection.
>
>  src/compiler/glsl/ast_to_hir.cpp | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/src/compiler/glsl/ast_to_hir.cpp 
> b/src/compiler/glsl/ast_to_hir.cpp
> index a609ff9..259bec9 100644
> --- a/src/compiler/glsl/ast_to_hir.cpp
> +++ b/src/compiler/glsl/ast_to_hir.cpp
> @@ -3326,6 +3326,7 @@ apply_layout_qualifier_to_variable(const struct 
> ast_type_qualifier *qual,
>+ qual->flags.q.depth_less
>+ qual->flags.q.depth_unchanged;
> if (depth_layout_count > 0
> +   && !state->is_version(420, 0)
> && !state->AMD_conservative_depth_enable
> && !state->ARB_conservative_depth_enable) {
> _mesa_glsl_error(loc, state,

There's an error message starting on the next line that should be updated.

I checked around and didn't see any other places that should be
updated. Looks like 4b15cb6da already handled one, while missing these
two.

With the error message updated:

Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Add an INTEL_PRECISE_TRIG=1 option to fix SIN/COS output range.

2016-04-04 Thread Matt Turner
Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] i965: Make bblock_t::next and friends return NULL at sentinels.

2016-04-04 Thread Matt Turner
On Sun, Apr 3, 2016 at 10:56 PM, Kenneth Graunke  wrote:
> The bblock_t::prev/prev_const/next/next_const API returns bblock_t
> pointers, rather than exec_nodes.  So it's a bit surprising.
>
> Signed-off-by: Kenneth Graunke 
> ---

Both are

Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] glsl: Lower variable indexing of system value arrays unconditionally.

2016-04-04 Thread Ilia Mirkin
On Mon, Apr 4, 2016 at 2:09 PM, Kenneth Graunke  wrote:
> lower_variable_index_to_cond_assign() did not handle system values.
> gl_SampleMaskIn[] is a system value, and also an array.  Accessing it
> with a variable index would trigger an unreachable() assert.
>
> Rather than adding a new EmitNoIndirectSystemValues flag, we simply
> lower unconditionally.  There is exactly one case where this occurs,
> and for all current drivers, lowering produces optimal code.  Even
> for future drivers with 32x MSAA, it produces reasonable code.
>
> Fixes Piglit's new samplemaskin-indirect test.  Also fixes many ES31-CTS
> tests when OES_sample_variables is enabled.
>
> Cc: mesa-sta...@lists.freedesktop.org
> Cc: Ilia Mirkin 
> Signed-off-by: Kenneth Graunke 
> ---
>  .../glsl/lower_variable_index_to_cond_assign.cpp | 20 
> 
>  1 file changed, 20 insertions(+)
>
> diff --git a/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp 
> b/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp
> index 278d545..057de31 100644
> --- a/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp
> +++ b/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp
> @@ -385,6 +385,26 @@ public:
>case ir_var_const_in:
>   return this->lower_temps;
>
> +  case ir_var_system_value:
> + /* There are only a few system values that have array types:
> +  *
> +  *gl_TessLevelInner[]
> +  *gl_TessLevelOuter[]
> +  *gl_SampleMaskIn[]
> +  *
> +  * The tessellation factor arrays are lowered to vec4/vec2s
> +  * by lower_tess_level() before this pass occurs, so we'll
> +  * never see them here.
> +  *
> +  * The only remaining case is gl_SampleMaskIn[], which has
> +  * a length of ceil(ctx->Const.MaxSamples, 32).  Most hardware

DIV_ROUND_UP(ctx->Const.MaxSamples, 32) hopefully. [Unless ceil does
something I don't know about.]

> +  * supports fewer than 32 samples, at which point our lowering
> +  * produces a single read of gl_SampleMaskIn[0].  Even with 32x
> +  * MSAA, the array length is only 2, so the lowering is fairly

Still 1 at 32x MSAA. Would be 2 at a (hypothetical) 33x MSAA or more
probable 64x MSAA.

With the comment adjusted to better reflect reality,

Reviewed-by: Ilia Mirkin 

> +  * efficient.  Therefore, lower unconditionally.
> +  */
> + return true;
> +
>case ir_var_shader_in:
>   /* The input array size is unknown at compiler time for non-patch
>* inputs in TCS and TES. The arrays are sized to
> --
> 2.7.4
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] glsl: Lower variable indexing of system value arrays unconditionally.

2016-04-04 Thread Kenneth Graunke
lower_variable_index_to_cond_assign() did not handle system values.
gl_SampleMaskIn[] is a system value, and also an array.  Accessing it
with a variable index would trigger an unreachable() assert.

Rather than adding a new EmitNoIndirectSystemValues flag, we simply
lower unconditionally.  There is exactly one case where this occurs,
and for all current drivers, lowering produces optimal code.  Even
for future drivers with 32x MSAA, it produces reasonable code.

Fixes Piglit's new samplemaskin-indirect test.  Also fixes many ES31-CTS
tests when OES_sample_variables is enabled.

Cc: mesa-sta...@lists.freedesktop.org
Cc: Ilia Mirkin 
Signed-off-by: Kenneth Graunke 
---
 .../glsl/lower_variable_index_to_cond_assign.cpp | 20 
 1 file changed, 20 insertions(+)

diff --git a/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp 
b/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp
index 278d545..057de31 100644
--- a/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp
+++ b/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp
@@ -385,6 +385,26 @@ public:
   case ir_var_const_in:
  return this->lower_temps;
 
+  case ir_var_system_value:
+ /* There are only a few system values that have array types:
+  *
+  *gl_TessLevelInner[]
+  *gl_TessLevelOuter[]
+  *gl_SampleMaskIn[]
+  *
+  * The tessellation factor arrays are lowered to vec4/vec2s
+  * by lower_tess_level() before this pass occurs, so we'll
+  * never see them here.
+  *
+  * The only remaining case is gl_SampleMaskIn[], which has
+  * a length of ceil(ctx->Const.MaxSamples, 32).  Most hardware
+  * supports fewer than 32 samples, at which point our lowering
+  * produces a single read of gl_SampleMaskIn[0].  Even with 32x
+  * MSAA, the array length is only 2, so the lowering is fairly
+  * efficient.  Therefore, lower unconditionally.
+  */
+ return true;
+
   case ir_var_shader_in:
  /* The input array size is unknown at compiler time for non-patch
   * inputs in TCS and TES. The arrays are sized to
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 13/20] radeonsi: only emit compute shader state when switching shaders

2016-04-04 Thread Bas Nieuwenhuizen
On Mon, Apr 4, 2016 at 7:53 PM, Bas Nieuwenhuizen
 wrote:
> On Mon, Apr 4, 2016 at 7:29 PM, Marek Olšák  wrote:
>> On Sat, Apr 2, 2016 at 3:10 PM, Bas Nieuwenhuizen
>>  wrote:
>>> Signed-off-by: Bas Nieuwenhuizen 
>>> ---
>>>  src/gallium/drivers/radeonsi/si_compute.c | 142 
>>> +-
>>>  src/gallium/drivers/radeonsi/si_pipe.h|   2 +
>>>  2 files changed, 85 insertions(+), 59 deletions(-)
>>>
>>> diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
>>> b/src/gallium/drivers/radeonsi/si_compute.c
>>> index e712b46..74db8d4 100644
>>> --- a/src/gallium/drivers/radeonsi/si_compute.c
>>> +++ b/src/gallium/drivers/radeonsi/si_compute.c
>>> @@ -173,6 +173,7 @@ static void si_initialize_compute(struct si_context 
>>> *sctx)
>>> radeon_emit(cs, 0x190 /* Default value */);
>>> }
>>>
>>> +   sctx->cs_shader_state.emitted_bo = NULL;
>>> sctx->cs_shader_state.initialized = true;
>>>  }
>>>
>>> @@ -213,6 +214,87 @@ static bool si_setup_compute_scratch_buffer(struct 
>>> si_context *sctx,
>>> return true;
>>>  }
>>>
>>> +static bool si_switch_compute_shader(struct si_context *sctx,
>>> + struct si_compute *program,
>>> + struct si_shader *shader, unsigned 
>>> offset)
>>> +{
>>> +   struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
>>> +   struct si_shader_config inline_config = {0};
>>> +   struct si_shader_config *config;
>>> +   uint64_t shader_va;
>>> +
>>> +
>>> +   if (program->ir_type == PIPE_SHADER_IR_TGSI) {
>>> +   config = &shader->config;
>>> +   } else {
>>> +   unsigned lds_blocks;
>>> +
>>> +   config = &inline_config;
>>> +   si_shader_binary_read_config(&shader->binary, config, 
>>> offset);
>>> +
>>> +   lds_blocks = config->lds_size;
>>> +   /* XXX: We are over allocating LDS.  For SI, the shader 
>>> reports
>>> +   * LDS in blocks of 256 bytes, so if there are 4 bytes lds
>>> +   * allocated in the shader and 4 bytes allocated by the state
>>> +   * tracker, then we will set LDS_SIZE to 512 bytes rather 
>>> than 256.
>>> +   */
>>> +   if (sctx->b.chip_class <= SI) {
>>> +   lds_blocks += align(program->local_size, 256) >> 8;
>>> +   } else {
>>> +   lds_blocks += align(program->local_size, 512) >> 9;
>>> +   }
>>> +
>>> +   assert(lds_blocks <= 0xFF);
>>> +
>>> +   config->rsrc2 &= C_00B84C_LDS_SIZE;
>>> +   config->rsrc2 |=  S_00B84C_LDS_SIZE(lds_blocks);
>>> +   }
>>> +
>>> +   if (!si_setup_compute_scratch_buffer(sctx, shader, config))
>>> +   return false;
>>> +
>>> +   if (sctx->cs_shader_state.emitted_bo == shader->bo &&
>>> +   sctx->cs_shader_state.offset == offset)
>>> +   return true;
>>
>> Can this conditional, by any chance, be moved to the beginning of the 
>> function?
>
> si_setup_compute_sratch_buffer may need to reapply the relocations and
> then shader->bo changes.

And I just realized that can't happen when we use the same shader,
because it is then necessarily update from the last dispatch and we
don't create a new scratch buffer.

So, yes it can be moved to the beginning.

Thanks,
Bas

>>
>>> +
>>> +   if (shader->scratch_bo) {
>>> +   COMPUTE_DBG(sctx->screen, "Waves: %u; Scratch per wave: %u 
>>> bytes; "
>>> +   "Total Scratch: %u bytes\n", 
>>> sctx->scratch_waves,
>>> +   config->scratch_bytes_per_wave,
>>> +   config->scratch_bytes_per_wave *
>>> +   sctx->scratch_waves);
>>> +
>>> +   radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
>>> + shader->scratch_bo, RADEON_USAGE_READWRITE,
>>> + RADEON_PRIO_SCRATCH_BUFFER);
>>> +   }
>>> +
>>> +   shader_va = shader->bo->gpu_address + offset;
>>> +
>>> +   radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, shader->bo,
>>> + RADEON_USAGE_READ, 
>>> RADEON_PRIO_USER_SHADER);
>>> +
>>> +   radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 2, 0));
>>> +   radeon_emit(cs, (R_00B830_COMPUTE_PGM_LO - SI_SH_REG_OFFSET) >> 2);
>>> +   radeon_emit(cs, shader_va >> 8);
>>> +   radeon_emit(cs, shader_va >> 40);
>>> +
>>> +   radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 2, 0));
>>> +   radeon_emit(cs, (R_00B848_COMPUTE_PGM_RSRC1 - SI_SH_REG_OFFSET) >> 
>>> 2);
>>> +   radeon_emit(cs, config->rsrc1);
>>> +   radeon_emit(cs, config->rsrc2);
>>> +
>>> +   radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 1, 0));
>>> +   radeon_emit(cs, (R_00B860_COMPUTE_TMPRING_SIZE - SI_SH_REG_OFFSET) 
>>> >> 2);
>>> +   radeon_emit(cs, S_00B860_WAVES(sctx->scratch_waves)
>>> + 

Re: [Mesa-dev] [PATCH v2 2/2] i965: Fix invalid pointer read in dead_control_flow_eliminate().

2016-04-04 Thread Jason Ekstrand
On Mon, Apr 4, 2016 at 10:23 AM, Kenneth Graunke 
wrote:

> On Monday, April 4, 2016 12:19:57 AM PDT Jason Ekstrand wrote:
> > On Sun, Apr 3, 2016 at 10:56 PM, Kenneth Graunke 
> > wrote:
> >
> > > There may not be a previous block.  In this case, there's no real work
> > > to do, so just continue on to the next one.
> > >
> > > v2: Update for bblock->prev() API change.
> > >
> > > Signed-off-by: Kenneth Graunke 
> > > ---
> > >  src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp | 4 
> > >  1 file changed, 4 insertions(+)
> > >
> > > diff --git a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp
> > > b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp
> > > index 2c1abaf..114dc6c 100644
> > > --- a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp
> > > +++ b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp
> > > @@ -42,6 +42,10 @@ dead_control_flow_eliminate(backend_shader *s)
> > >
> > > foreach_block_safe (block, s->cfg) {
> > >bblock_t *prev_block = block->prev();
> > > +
> > > +  if (!prev_block)
> > > + continue;
> > > +
> > >backend_instruction *const inst = block->start();
> > >backend_instruction *const prev_inst = prev_block->end();
> > >
> >
> > Yeah, that guy right there will be a problem on the first loop iteration
> > every time.  I've seen this in valgrind quite a bit lately.  Thanks for
> > fixing it!
> >
> > Reviewed-by: Jason Ekstrand 
> >
> > Maybe Cc stable?
> > --Jason
>
> I checked 10.2 on the first spin of this series, and I didn't see the
> valgrind errors there.
>

Sounds good
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 13/20] radeonsi: only emit compute shader state when switching shaders

2016-04-04 Thread Bas Nieuwenhuizen
On Mon, Apr 4, 2016 at 7:29 PM, Marek Olšák  wrote:
> On Sat, Apr 2, 2016 at 3:10 PM, Bas Nieuwenhuizen
>  wrote:
>> Signed-off-by: Bas Nieuwenhuizen 
>> ---
>>  src/gallium/drivers/radeonsi/si_compute.c | 142 
>> +-
>>  src/gallium/drivers/radeonsi/si_pipe.h|   2 +
>>  2 files changed, 85 insertions(+), 59 deletions(-)
>>
>> diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
>> b/src/gallium/drivers/radeonsi/si_compute.c
>> index e712b46..74db8d4 100644
>> --- a/src/gallium/drivers/radeonsi/si_compute.c
>> +++ b/src/gallium/drivers/radeonsi/si_compute.c
>> @@ -173,6 +173,7 @@ static void si_initialize_compute(struct si_context 
>> *sctx)
>> radeon_emit(cs, 0x190 /* Default value */);
>> }
>>
>> +   sctx->cs_shader_state.emitted_bo = NULL;
>> sctx->cs_shader_state.initialized = true;
>>  }
>>
>> @@ -213,6 +214,87 @@ static bool si_setup_compute_scratch_buffer(struct 
>> si_context *sctx,
>> return true;
>>  }
>>
>> +static bool si_switch_compute_shader(struct si_context *sctx,
>> + struct si_compute *program,
>> + struct si_shader *shader, unsigned 
>> offset)
>> +{
>> +   struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
>> +   struct si_shader_config inline_config = {0};
>> +   struct si_shader_config *config;
>> +   uint64_t shader_va;
>> +
>> +
>> +   if (program->ir_type == PIPE_SHADER_IR_TGSI) {
>> +   config = &shader->config;
>> +   } else {
>> +   unsigned lds_blocks;
>> +
>> +   config = &inline_config;
>> +   si_shader_binary_read_config(&shader->binary, config, 
>> offset);
>> +
>> +   lds_blocks = config->lds_size;
>> +   /* XXX: We are over allocating LDS.  For SI, the shader 
>> reports
>> +   * LDS in blocks of 256 bytes, so if there are 4 bytes lds
>> +   * allocated in the shader and 4 bytes allocated by the state
>> +   * tracker, then we will set LDS_SIZE to 512 bytes rather 
>> than 256.
>> +   */
>> +   if (sctx->b.chip_class <= SI) {
>> +   lds_blocks += align(program->local_size, 256) >> 8;
>> +   } else {
>> +   lds_blocks += align(program->local_size, 512) >> 9;
>> +   }
>> +
>> +   assert(lds_blocks <= 0xFF);
>> +
>> +   config->rsrc2 &= C_00B84C_LDS_SIZE;
>> +   config->rsrc2 |=  S_00B84C_LDS_SIZE(lds_blocks);
>> +   }
>> +
>> +   if (!si_setup_compute_scratch_buffer(sctx, shader, config))
>> +   return false;
>> +
>> +   if (sctx->cs_shader_state.emitted_bo == shader->bo &&
>> +   sctx->cs_shader_state.offset == offset)
>> +   return true;
>
> Can this conditional, by any chance, be moved to the beginning of the 
> function?

si_setup_compute_sratch_buffer may need to reapply the relocations and
then shader->bo changes.

- Bas

>
>> +
>> +   if (shader->scratch_bo) {
>> +   COMPUTE_DBG(sctx->screen, "Waves: %u; Scratch per wave: %u 
>> bytes; "
>> +   "Total Scratch: %u bytes\n", sctx->scratch_waves,
>> +   config->scratch_bytes_per_wave,
>> +   config->scratch_bytes_per_wave *
>> +   sctx->scratch_waves);
>> +
>> +   radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
>> + shader->scratch_bo, RADEON_USAGE_READWRITE,
>> + RADEON_PRIO_SCRATCH_BUFFER);
>> +   }
>> +
>> +   shader_va = shader->bo->gpu_address + offset;
>> +
>> +   radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, shader->bo,
>> + RADEON_USAGE_READ, 
>> RADEON_PRIO_USER_SHADER);
>> +
>> +   radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 2, 0));
>> +   radeon_emit(cs, (R_00B830_COMPUTE_PGM_LO - SI_SH_REG_OFFSET) >> 2);
>> +   radeon_emit(cs, shader_va >> 8);
>> +   radeon_emit(cs, shader_va >> 40);
>> +
>> +   radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 2, 0));
>> +   radeon_emit(cs, (R_00B848_COMPUTE_PGM_RSRC1 - SI_SH_REG_OFFSET) >> 
>> 2);
>> +   radeon_emit(cs, config->rsrc1);
>> +   radeon_emit(cs, config->rsrc2);
>> +
>> +   radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 1, 0));
>> +   radeon_emit(cs, (R_00B860_COMPUTE_TMPRING_SIZE - SI_SH_REG_OFFSET) 
>> >> 2);
>> +   radeon_emit(cs, S_00B860_WAVES(sctx->scratch_waves)
>> +   | S_00B860_WAVESIZE(config->scratch_bytes_per_wave >> 10));
>
> Please use radeon_set_sh_reg_seq and radeon_set_sh_reg above.
>
> Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 14/20] radeonsi: implement TGSI compute dispatch

2016-04-04 Thread Marek Olšák
This should use radeon_set_sh_reg... like other patches.

Marek

On Sat, Apr 2, 2016 at 3:10 PM, Bas Nieuwenhuizen
 wrote:
> Signed-off-by: Bas Nieuwenhuizen 
> ---
>  src/gallium/drivers/radeonsi/si_compute.c | 104 
> ++
>  1 file changed, 77 insertions(+), 27 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
> b/src/gallium/drivers/radeonsi/si_compute.c
> index 74db8d4..64ad2f3 100644
> --- a/src/gallium/drivers/radeonsi/si_compute.c
> +++ b/src/gallium/drivers/radeonsi/si_compute.c
> @@ -346,13 +346,85 @@ static void si_upload_compute_input(struct si_context 
> *sctx,
> pipe_resource_reference((struct pipe_resource**)&input_buffer, NULL);
>  }
>
> +static void si_setup_tgsi_grid(struct si_context *sctx,
> +const struct pipe_grid_info *info)
> +{
> +   struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
> +   unsigned grid_size_reg = R_00B900_COMPUTE_USER_DATA_0 +
> + 4 * SI_SGPR_GRID_SIZE;
> +
> +   if (info->indirect) {
> +   uint64_t base_va = r600_resource(info->indirect)->gpu_address;
> +   uint64_t va = base_va + info->indirect_offset;
> +   int i;
> +
> +   radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
> +(struct r600_resource *)info->indirect,
> +RADEON_USAGE_READ, 
> RADEON_PRIO_DRAW_INDIRECT);
> +
> +   for (i = 0; i < 3; ++i) {
> +   radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
> +   radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
> +   COPY_DATA_DST_SEL(COPY_DATA_REG));
> +   radeon_emit(cs, (va +  4 * i));
> +   radeon_emit(cs, (va + 4 * i) >> 32);
> +   radeon_emit(cs, (grid_size_reg >> 2) + i);
> +   radeon_emit(cs, 0);
> +   }
> +   } else {
> +
> +   radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 3, 0));
> +   radeon_emit(cs, (grid_size_reg - SI_SH_REG_OFFSET) >> 2);
> +   radeon_emit(cs, info->grid[0]);
> +   radeon_emit(cs, info->grid[1]);
> +   radeon_emit(cs, info->grid[2]);
> +   }
> +}
> +
> +static void si_emit_dispatch_packets(struct si_context *sctx,
> + const struct pipe_grid_info *info)
> +{
> +   struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
> +
> +   radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 3, 0));
> +   radeon_emit(cs, (R_00B81C_COMPUTE_NUM_THREAD_X - SI_SH_REG_OFFSET) >> 
> 2);
> +   radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(info->block[0]));
> +   radeon_emit(cs, S_00B820_NUM_THREAD_FULL(info->block[1]));
> +   radeon_emit(cs, S_00B824_NUM_THREAD_FULL(info->block[2]));
> +
> +   if (info->indirect) {
> +   uint64_t base_va = r600_resource(info->indirect)->gpu_address;
> +
> +   radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
> +(struct r600_resource *)info->indirect,
> +RADEON_USAGE_READ, 
> RADEON_PRIO_DRAW_INDIRECT);
> +
> +   radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0) |
> +   PKT3_SHADER_TYPE_S(1));
> +   radeon_emit(cs, 1);
> +   radeon_emit(cs, base_va);
> +   radeon_emit(cs, base_va >> 32);
> +
> +   radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 1, 0) |
> +   PKT3_SHADER_TYPE_S(1));
> +   radeon_emit(cs, info->indirect_offset);
> +   radeon_emit(cs, 1);
> +   } else {
> +   radeon_emit(cs, PKT3(PKT3_DISPATCH_DIRECT, 3, 0) |
> +   PKT3_SHADER_TYPE_S(1));
> +   radeon_emit(cs, info->grid[0]);
> +   radeon_emit(cs, info->grid[1]);
> +   radeon_emit(cs, info->grid[2]);
> +   radeon_emit(cs, 1);
> +   }
> +}
> +
>  static void si_launch_grid(
> struct pipe_context *ctx, const struct pipe_grid_info *info)
>  {
> struct si_context *sctx = (struct si_context*)ctx;
> struct si_compute *program = sctx->cs_shader_state.program;
> -   struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
> -   unsigned i;
> +   int i;
>
> si_need_cs_space(sctx);
>
> @@ -367,21 +439,12 @@ static void si_launch_grid(
>  SI_CONTEXT_FLAG_COMPUTE;
> si_emit_cache_flush(sctx, NULL);
>
> -   pm4->compute_pkt = true;
> -
> if (!si_switch_compute_shader(sctx, program, &program->shader, 
> info->pc))
> return;
>
> if (program->input_size)
> si_upload_compute_input(sctx, info);
>
> -   si_pm4_set_reg(pm4, R_00B81C_COMPUTE_NUM_THREAD_X,
> -   S_00B81C_NUM_THREAD_FULL(info->block[0]));

Re: [Mesa-dev] [PATCH v2] egl: add EGL_KHR_reusable_sync to egl_dri

2016-04-04 Thread dw kim
On Mon, Apr 04, 2016 at 01:19:13PM +0200, Marek Olšák wrote:
> This looks good in general. Just some small nitpicks below.
> 
> On Sat, Apr 2, 2016 at 1:46 AM, Dongwon Kim  wrote:
> > This patch enables an EGL extension, EGL_KHR_reusable_sync.
> > This new extension basically provides a way for multiple APIs or
> > threads to be excuted synchronously via a "reusable sync"
> > primitive shared by those threads/API calls.
> >
> > This was implemented based on the specification at
> >
> > https://www.khronos.org/registry/egl/extensions/KHR/EGL_KHR_reusable_sync.txt
> >
> > v2
> > - use thread functions defined in C11/threads.h instead of
> >   using direct pthread calls
> > - make the timeout set with reference to CLOCK_MONOTONIC
> > - cleaned up the way expiration time is calculated
> > - (bug fix) in dri2_client_wait_sync, case EGL_SYNC_CL_EVENT_KHR
> >   has been added.
> > - (bug fix) in dri2_destroy_sync, return from cond_broadcast
> >   call is now stored in 'err' intead of 'ret' to prevent 'ret'
> >   from being reset to 'EGL_FALSE' even in successful case
> > - corrected minor syntax problems
> >
> > Signed-off-by: Dongwon Kim 
> > ---
> >  src/egl/drivers/dri2/egl_dri2.c | 210 
> > ++--
> >  src/egl/drivers/dri2/egl_dri2.h |   2 +
> >  src/egl/main/eglapi.c   |   8 ++
> >  src/egl/main/eglsync.c  |   3 +-
> >  4 files changed, 213 insertions(+), 10 deletions(-)
> >
> > diff --git a/src/egl/drivers/dri2/egl_dri2.c 
> > b/src/egl/drivers/dri2/egl_dri2.c
> > index 8f50f0c..843cd53 100644
> > --- a/src/egl/drivers/dri2/egl_dri2.c
> > +++ b/src/egl/drivers/dri2/egl_dri2.c
> > @@ -38,6 +38,8 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> > +#include 
> >  #ifdef HAVE_LIBDRM
> >  #include 
> >  #include 
> > @@ -623,6 +625,8 @@ dri2_setup_screen(_EGLDisplay *disp)
> >   disp->Extensions.KHR_cl_event2 = EGL_TRUE;
> > }
> >
> > +   disp->Extensions.KHR_reusable_sync = EGL_TRUE;
> > +
> > if (dri2_dpy->image) {
> >if (dri2_dpy->image->base.version >= 10 &&
> >dri2_dpy->image->getCapabilities != NULL) {
> > @@ -2389,14 +2393,22 @@ dri2_egl_ref_sync(struct dri2_egl_sync *sync)
> > p_atomic_inc(&sync->refcount);
> >  }
> >
> > -static void
> > +static EGLint
> 
> Since this function only returns EGL_TRUE, the return type can just be void.

Thanks for pointing this out. I missed the point that cnd_wait is not returning
any error. I will fix this in v3. 

> 
> >  dri2_egl_unref_sync(struct dri2_egl_display *dri2_dpy,
> >  struct dri2_egl_sync *dri2_sync)
> >  {
> > if (p_atomic_dec_zero(&dri2_sync->refcount)) {
> > -  dri2_dpy->fence->destroy_fence(dri2_dpy->dri_screen, 
> > dri2_sync->fence);
> > +  if (dri2_sync->base.Type == EGL_SYNC_REUSABLE_KHR) {
> > + cnd_destroy(&dri2_sync->cond);
> > +  }
> > +
> > +  if (dri2_sync->fence)
> > + dri2_dpy->fence->destroy_fence(dri2_dpy->dri_screen, 
> > dri2_sync->fence);
> > +
> >free(dri2_sync);
> > }
> > +
> > +   return EGL_TRUE;
> >  }
> >
> >  static _EGLSync *
> > @@ -2408,6 +2420,8 @@ dri2_create_sync(_EGLDriver *drv, _EGLDisplay *dpy,
> > struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy);
> > struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx);
> > struct dri2_egl_sync *dri2_sync;
> > +   EGLint ret;
> > +   pthread_condattr_t attr;
> >
> > dri2_sync = calloc(1, sizeof(struct dri2_egl_sync));
> > if (!dri2_sync) {
> > @@ -2450,6 +2464,37 @@ dri2_create_sync(_EGLDriver *drv, _EGLDisplay *dpy,
> >  dri2_sync->fence, 0, 0))
> >   dri2_sync->base.SyncStatus = EGL_SIGNALED_KHR;
> >break;
> > +
> > +   case EGL_SYNC_REUSABLE_KHR:
> > +  /* intialize attr */
> > +  ret = pthread_condattr_init(&attr);
> > +
> > +  if (ret) {
> > + _eglError(EGL_BAD_ACCESS, "eglCreateSyncKHR");
> > + free(dri2_sync);
> > + return NULL;
> > +  }
> > +
> > +  /* change clock attribute to CLOCK_MONOTONIC */
> > +  ret = pthread_condattr_setclock(&attr, CLOCK_MONOTONIC);
> > +
> > +  if (ret) {
> > + _eglError(EGL_BAD_ACCESS, "eglCreateSyncKHR");
> > + free(dri2_sync);
> > + return NULL;
> > +  }
> > +
> > +  ret = pthread_cond_init(&dri2_sync->cond, &attr);
> > +
> > +  if (ret) {
> > + _eglError(EGL_BAD_ACCESS, "eglCreateSyncKHR");
> > + free(dri2_sync);
> > + return NULL;
> > +  }
> > +
> > +  /* initial status of reusable sync must be "unsignaled" */
> > +  dri2_sync->base.SyncStatus = EGL_UNSIGNALED_KHR;
> > +  break;
> > }
> >
> > p_atomic_set(&dri2_sync->refcount, 1);
> > @@ -2461,9 +2506,33 @@ dri2_destroy_sync(_EGLDriver *drv, _EGLDisplay *dpy, 
> > _EGLSync *sync)
> >  {
> > struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy);
> > struct dri2_egl_sync *dri2_sync = dri2_e

Re: [Mesa-dev] [PATCH] glsl: Lower variable indexing of system value arrays; treat like inputs.

2016-04-04 Thread Ilia Mirkin
On Mon, Apr 4, 2016 at 1:22 PM, Kenneth Graunke  wrote:
> On Monday, April 4, 2016 12:05:20 PM PDT Ilia Mirkin wrote:
>> For those (few, I'm sure) of us who are exceedingly lazy, what [glsl
>> ir] code ends up getting generated as a result of this?
>>
>> int temp;
>> if (zero == 0) temp = gl_SampleMaskIn[0]
>> else leave temp undefined?
>
> Running my new samplemaskin-indirect Piglit test, it appears to create:
>
> (assign (x) (var_ref dereference_array_value)
> (array_ref (var_ref gl_SampleMaskIn) (constant uint (0
>
> a.k.a.
>
>temp = gl_SampleMaskIn[0];
>
> For arrays of size <= 4, lower_variable_index_to_cond_assign() creates
> if-ladders that try array indexes sequentially.  It also unconditionally
> reads the first element.
>
> So, if there were 64 samples, the array size would be [2], and it would
> generate:
>
> temp = gl_SampleMaskIn[0]
> if (index == 1)
> temp = gl_SampleMaskIn[1];
>
> This seems optimal.

I like it! Esp since I'm fairly sure > 32x MSAA is not supported on
many levels in mesa (not to mention hw).

It appears that you're reusing EmitNoIndirectInput, however I'm fairly
sure that both nvc0 and radeonsi end up with that set to false:

  options->EmitNoIndirectInput =
 !screen->get_shader_param(screen, sh,
   PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR);

src/gallium/drivers/nouveau/nvc0/nvc0_screen.c:   case
PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
src/gallium/drivers/nouveau/nvc0/nvc0_screen.c-  return shader !=
PIPE_SHADER_FRAGMENT || class_3d < GM107_3D_CLASS;

[and it's only false for GM107 frag shaders because I need to spend
some time figuring out how the new IPA instructions behave differently
wrt this]

src/gallium/drivers/radeonsi/si_pipe.c: case
PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
src/gallium/drivers/radeonsi/si_pipe.c- return shader !=
PIPE_SHADER_GEOMETRY;

etc.

I would recommend just always returning true for system values, or
perhaps only for the gl_SampleMaskIn system value.

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 13/20] radeonsi: only emit compute shader state when switching shaders

2016-04-04 Thread Marek Olšák
On Sat, Apr 2, 2016 at 3:10 PM, Bas Nieuwenhuizen
 wrote:
> Signed-off-by: Bas Nieuwenhuizen 
> ---
>  src/gallium/drivers/radeonsi/si_compute.c | 142 
> +-
>  src/gallium/drivers/radeonsi/si_pipe.h|   2 +
>  2 files changed, 85 insertions(+), 59 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
> b/src/gallium/drivers/radeonsi/si_compute.c
> index e712b46..74db8d4 100644
> --- a/src/gallium/drivers/radeonsi/si_compute.c
> +++ b/src/gallium/drivers/radeonsi/si_compute.c
> @@ -173,6 +173,7 @@ static void si_initialize_compute(struct si_context *sctx)
> radeon_emit(cs, 0x190 /* Default value */);
> }
>
> +   sctx->cs_shader_state.emitted_bo = NULL;
> sctx->cs_shader_state.initialized = true;
>  }
>
> @@ -213,6 +214,87 @@ static bool si_setup_compute_scratch_buffer(struct 
> si_context *sctx,
> return true;
>  }
>
> +static bool si_switch_compute_shader(struct si_context *sctx,
> + struct si_compute *program,
> + struct si_shader *shader, unsigned 
> offset)
> +{
> +   struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
> +   struct si_shader_config inline_config = {0};
> +   struct si_shader_config *config;
> +   uint64_t shader_va;
> +
> +
> +   if (program->ir_type == PIPE_SHADER_IR_TGSI) {
> +   config = &shader->config;
> +   } else {
> +   unsigned lds_blocks;
> +
> +   config = &inline_config;
> +   si_shader_binary_read_config(&shader->binary, config, offset);
> +
> +   lds_blocks = config->lds_size;
> +   /* XXX: We are over allocating LDS.  For SI, the shader 
> reports
> +   * LDS in blocks of 256 bytes, so if there are 4 bytes lds
> +   * allocated in the shader and 4 bytes allocated by the state
> +   * tracker, then we will set LDS_SIZE to 512 bytes rather than 
> 256.
> +   */
> +   if (sctx->b.chip_class <= SI) {
> +   lds_blocks += align(program->local_size, 256) >> 8;
> +   } else {
> +   lds_blocks += align(program->local_size, 512) >> 9;
> +   }
> +
> +   assert(lds_blocks <= 0xFF);
> +
> +   config->rsrc2 &= C_00B84C_LDS_SIZE;
> +   config->rsrc2 |=  S_00B84C_LDS_SIZE(lds_blocks);
> +   }
> +
> +   if (!si_setup_compute_scratch_buffer(sctx, shader, config))
> +   return false;
> +
> +   if (sctx->cs_shader_state.emitted_bo == shader->bo &&
> +   sctx->cs_shader_state.offset == offset)
> +   return true;

Can this conditional, by any chance, be moved to the beginning of the function?

> +
> +   if (shader->scratch_bo) {
> +   COMPUTE_DBG(sctx->screen, "Waves: %u; Scratch per wave: %u 
> bytes; "
> +   "Total Scratch: %u bytes\n", sctx->scratch_waves,
> +   config->scratch_bytes_per_wave,
> +   config->scratch_bytes_per_wave *
> +   sctx->scratch_waves);
> +
> +   radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
> + shader->scratch_bo, RADEON_USAGE_READWRITE,
> + RADEON_PRIO_SCRATCH_BUFFER);
> +   }
> +
> +   shader_va = shader->bo->gpu_address + offset;
> +
> +   radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, shader->bo,
> + RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
> +
> +   radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 2, 0));
> +   radeon_emit(cs, (R_00B830_COMPUTE_PGM_LO - SI_SH_REG_OFFSET) >> 2);
> +   radeon_emit(cs, shader_va >> 8);
> +   radeon_emit(cs, shader_va >> 40);
> +
> +   radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 2, 0));
> +   radeon_emit(cs, (R_00B848_COMPUTE_PGM_RSRC1 - SI_SH_REG_OFFSET) >> 2);
> +   radeon_emit(cs, config->rsrc1);
> +   radeon_emit(cs, config->rsrc2);
> +
> +   radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 1, 0));
> +   radeon_emit(cs, (R_00B860_COMPUTE_TMPRING_SIZE - SI_SH_REG_OFFSET) >> 
> 2);
> +   radeon_emit(cs, S_00B860_WAVES(sctx->scratch_waves)
> +   | S_00B860_WAVESIZE(config->scratch_bytes_per_wave >> 10));

Please use radeon_set_sh_reg_seq and radeon_set_sh_reg above.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 2/2] i965: Fix invalid pointer read in dead_control_flow_eliminate().

2016-04-04 Thread Kenneth Graunke
On Monday, April 4, 2016 12:19:57 AM PDT Jason Ekstrand wrote:
> On Sun, Apr 3, 2016 at 10:56 PM, Kenneth Graunke 
> wrote:
> 
> > There may not be a previous block.  In this case, there's no real work
> > to do, so just continue on to the next one.
> >
> > v2: Update for bblock->prev() API change.
> >
> > Signed-off-by: Kenneth Graunke 
> > ---
> >  src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp | 4 
> >  1 file changed, 4 insertions(+)
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp
> > b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp
> > index 2c1abaf..114dc6c 100644
> > --- a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp
> > @@ -42,6 +42,10 @@ dead_control_flow_eliminate(backend_shader *s)
> >
> > foreach_block_safe (block, s->cfg) {
> >bblock_t *prev_block = block->prev();
> > +
> > +  if (!prev_block)
> > + continue;
> > +
> >backend_instruction *const inst = block->start();
> >backend_instruction *const prev_inst = prev_block->end();
> >
> 
> Yeah, that guy right there will be a problem on the first loop iteration
> every time.  I've seen this in valgrind quite a bit lately.  Thanks for
> fixing it!
> 
> Reviewed-by: Jason Ekstrand 
> 
> Maybe Cc stable?
> --Jason

I checked 10.2 on the first spin of this series, and I didn't see the
valgrind errors there.


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: Lower variable indexing of system value arrays; treat like inputs.

2016-04-04 Thread Kenneth Graunke
On Monday, April 4, 2016 12:05:20 PM PDT Ilia Mirkin wrote:
> For those (few, I'm sure) of us who are exceedingly lazy, what [glsl
> ir] code ends up getting generated as a result of this?
> 
> int temp;
> if (zero == 0) temp = gl_SampleMaskIn[0]
> else leave temp undefined?

Running my new samplemaskin-indirect Piglit test, it appears to create:

(assign (x) (var_ref dereference_array_value)
(array_ref (var_ref gl_SampleMaskIn) (constant uint (0

a.k.a.

   temp = gl_SampleMaskIn[0];

For arrays of size <= 4, lower_variable_index_to_cond_assign() creates
if-ladders that try array indexes sequentially.  It also unconditionally
reads the first element.

So, if there were 64 samples, the array size would be [2], and it would
generate:

temp = gl_SampleMaskIn[0]
if (index == 1)
temp = gl_SampleMaskIn[1];

This seems optimal.


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] r600: Change default behaviour for undefined COLOR0

2016-04-04 Thread Axel Davy

On 04/04/2016 18:48, Roland Scheidegger wrote:

Am 04.04.2016 um 17:27 schrieb Axel Davy:



So is that ok to you for now to update the radeon behaviour ?

As I said, you can do what you want there as far as I'm concerned. Just
saying it's going to be a constant battle to fix other drivers...


Also another thing to consider is that we plan to propose a context
creation flag
to indicate the state tracker is gallium nine. That would enable the hw
to -optionally-
change some gallium undefined things depending on whether you're gl or
nine.

For example radeon hw have a hw bit for rasterization rounding gl vs d3d.
This makes some differences when looking closely at the output results
for some draw calls.
This is 'optional' feature in the sense that it's not too bad if you
don't use that hw bit,
but it is better if you do. Also some rasterizer behaviours can be
adapted, like NaN handling, etc.

I don't think a bit that it's a nine context makes sense. There's other
bits already doing similar things, like half_pixel_center, clip_halfz -
which when introduced first strictly were for d3d9-like state trackers
(even if nine wasn't around then). So, so far we've broken down the "do
d3d9 behavior" to individual bits, and I don't think it makes sense to
change that. So, if you need another such rasterization bit, that should
be added separately (though I'm not quite sure what the difference
really is?).

I'm not sure other hw could set this rasterization setting as easily,
perhaps it needs to set a register that affects other things.
I don't know much about nouveau, but I have been told there
are d3d flags to switch the behaviour of parts of the pipeline.



Shader behavior is another matter. d3d9 generally really dislikes NaNs
(as does old GL btw albeit theoretically old or new it's still mostly
optional with glsl, argh). Some instructions are also of course
different between d3d9 and gl.
This comes up from time to time, but we didn't do anything yet. Could be
handled with shader property (so, per-shader) or even with additional
instruction flags (or additional instructions). Personally I'd be in
favor of shader property if it really helps.


The (1, 1, 1, 1) for color0 and (0,0,0,0) or (0,0,0,1) for the others is
less 'optional' in the sense games rely
on it, but as there are very very few games that do, I guess it could be
sort of okay to put it optional.

Err wait so it has to be (1,1,1,1) for color0 but (0,0,0,0) or (0,0,0,1)
are both ok for other inputs? Weird.
I think though (for this particular difference) it would make sense if
you'd just bite the bullet, check if the currently bound shaders are
affected by this and if so make a new variant translating that mess
away. gallium should make it easy to handle api differences relatively
well, but not to the point of handling every last bit of crazy stuff
apis might have.

Roland



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] About the usage of the "flat" interpolation qualifier with input (unsigned) integers and doubles in fragment shaders

2016-04-04 Thread Andres Gomez
Hi,

Thanks, Jason and Ian, for the feedback. See the patch sent at:
https://lists.freedesktop.org/archives/mesa-dev/2016-April/111842.html

Br.

On Wed, 2016-03-23 at 14:51 -0700, Ian Romanick wrote:
> On 03/18/2016 09:52 PM, Jason Ekstrand wrote:
> > 
> > 
> > 
> > On Fri, Mar 4, 2016 at 5:43 AM, Andres Gomez  > > wrote:
> > 
> > Hi,
> > 
> > as complementary work to the one done to "Add FP64 support to
> > the i965
> > shader backends" at:
> > https://bugs.freedesktop.org/show_bug.cgi?id=92760
> > 
> > I've been working to add piglit tests that would check the new
> > features
> > added by this addition.
> > 
> > Checking the specification of the extension, when talking about
> > inputs,
> > the extension instructs that any fragment input being a double
> > or
> > derived type must be qualified with the interpolation qualifier
> > "flat".
> > 
> > This extends the restriction that already existed for signed
> > and
> > unsigned integers to doubles too.
> > 
> > In following versions of the spec, when doubles are already
> > part of it
> > we can read:
> > 
> > "
> > Fragment shader inputs that are signed or unsigned integers,
> > integer
> > vectors, or any double-precision floating-point type must be
> > qualified
> > with the interpolation qualifier flat.
> > "
> > 
> > The specification doesn't explicitly say it, but as far as I
> > understand,
> > it is reasonable to think that the restriction applies to input
> > variables in the form of arrays or members of structures of
> > these types.
> > 
> > 
> > Yes, I think that's a reasonable assumption.  The reason it's
> > required
> > for integers is because you can't interpolate them at all.  Some
> > hardware may also not support interpolating doubles so they are
> > included
> > in the statement.  Incidentally, we could implement it in i965
> > because
> > Intel hardware does its interpolation in the shader.  That does
> > not,
> > however, mean that we should :-)
> That might be an interesting extension, should anyone ever care.
> *shrug*
> 
> > 
> > In fact, current implementation already fails on compilation if
> > a
> > non-flat integer is in a fragment input as an array or a struct
> > member.
> > The same happens with doubles (llvmpipe).
> > 
> > It is also reasonable to think that the restriction would also
> > apply to
> > input interface blocks holding variables of these types.
> > 
> > However, the compilation doesn't fail when using a non-flat
> > integer or a
> > double. On execution, the integer version just gives bogus
> > values while,
> > with doubles, it crashes.
> > 
> > --
> > 
> > The situation with the glslangValidator is (it doesn't support
> > doubles):
> >   * When using a non-flat integer as a fragment input in
> > the form of
> > an array or as a member of a struct it fails with an
> > error of
> > the type:
> > 
> > int' : must be qualified as flat in
> > 
> >   * When using a non-flat integer as a fragment input in
> > the form of
> > a member of an interface block it succeeds on
> > compilation.
> > 
> > 
> > Glslang is an ok compiler, but I wouldn't give it too much
> > weight.  Just
> > because it's billed as a "reference compiler" doesn't mean you
> > should
> > trust it to get all the edge cases right.  I've personally fixed
> > enough
> > bugs in it to now better ;-)
> I would suggest submitting a bug against glslang as well.
> 
> > 
> > The situation with nvidia proprietary driver is:
> >   * When using a non-flat integer as a fragment input in
> > the form of
> > a scalar, an array, a member of a struct or a member of
> > an
> > interface block it doesn't fail on compilation but on
> > linkage
> > with an error of the type:
> > 
> > error C5215: Integer varying  must be flat
> > 
> >   * When using a double as a fragment input in the form of
> > a scalar
> > or an array it fails on compilation with an error of
> > the type:
> > 
> > error C7570: 64 bit input '' should be flat
> > 
> >   * When using a double as a fragment input in the form of
> > a a
> > member of a struct or a member of an interface block it
> > doesn't
> > fail. Actually, it seems to do some kind of
> > interpolation.
> > 
> > --
> > 
> > Hence, based on this, my proposal would be:
> >   * To enforce the usage of the "flat" qualifier when in a
> > member of
> > an input block interface in a fragment shader for
> > (unsigned)
> > integers and doubles.
> > 
> > 
> > Agreed.  That seems like the right thing to do.
> >  
> > 
> >   * Open a bug in Khronos specs bugzilla about the
> > ambiguity of
> > this.
> > 
> > 
> > Done.  Khronos bug #15671.
> > 
> > Opinions?
> > 
> > -

Re: [Mesa-dev] [PATCH 12/20] radeonsi: rework compute scratch buffer

2016-04-04 Thread Marek Olšák
On Sat, Apr 2, 2016 at 3:10 PM, Bas Nieuwenhuizen
 wrote:
> Instead of having a scratch buffer per program, have one per
> context.
>
> Also removed the per kernel wave count calculations, but
> that only helped if the total number of waves in the dispatch
> was smaller than sctx->scratch_waves.
>
> Signed-off-by: Bas Nieuwenhuizen 
> ---
>  src/gallium/drivers/radeonsi/si_compute.c | 136 
> ++
>  src/gallium/drivers/radeonsi/si_pipe.c|   1 +
>  src/gallium/drivers/radeonsi/si_pipe.h|   2 +
>  3 files changed, 46 insertions(+), 93 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
> b/src/gallium/drivers/radeonsi/si_compute.c
> index 7320ace..e712b46 100644
> --- a/src/gallium/drivers/radeonsi/si_compute.c
> +++ b/src/gallium/drivers/radeonsi/si_compute.c
> @@ -46,47 +46,6 @@ struct si_compute {
> struct pipe_resource *global_buffers[MAX_GLOBAL_BUFFERS];
>  };
>
> -static void init_scratch_buffer(struct si_context *sctx, struct si_compute 
> *program)
> -{
> -   unsigned scratch_bytes = 0;
> -   uint64_t scratch_buffer_va;
> -   unsigned i;
> -
> -   /* Compute the scratch buffer size using the maximum number of waves.
> -* This way we don't need to recompute it for each kernel launch. */
> -   unsigned scratch_waves = 32 * 
> sctx->screen->b.info.num_good_compute_units;
> -   for (i = 0; i < program->shader.binary.global_symbol_count; i++) {
> -   unsigned offset =
> -   
> program->shader.binary.global_symbol_offsets[i];
> -   unsigned scratch_bytes_needed;
> -
> -   si_shader_binary_read_config(&program->shader.binary,
> -&program->shader.config, offset);
> -   scratch_bytes_needed = 
> program->shader.config.scratch_bytes_per_wave;
> -   scratch_bytes = MAX2(scratch_bytes, scratch_bytes_needed);
> -   }
> -
> -   if (scratch_bytes == 0)
> -   return;
> -
> -   program->shader.scratch_bo =
> -   si_resource_create_custom(sctx->b.b.screen,
> -   PIPE_USAGE_DEFAULT,
> -   scratch_bytes * scratch_waves);
> -
> -   scratch_buffer_va = program->shader.scratch_bo->gpu_address;
> -
> -   /* apply_scratch_relocs needs scratch_bytes_per_wave to be set
> -* to the maximum bytes needed, so it can compute the stride
> -* correctly.
> -*/
> -   program->shader.config.scratch_bytes_per_wave = scratch_bytes;
> -
> -   /* Patch the shader with the scratch buffer address. */
> -   si_shader_apply_scratch_relocs(sctx,
> -   &program->shader, scratch_buffer_va);
> -}
> -
>  static void *si_create_compute_state(
> struct pipe_context *ctx,
> const struct pipe_compute_state *cso)
> @@ -140,11 +99,6 @@ static void *si_create_compute_state(
> code = cso->prog + sizeof(struct pipe_llvm_program_header);
>
> radeon_elf_read(code, header->num_bytes, 
> &program->shader.binary);
> -   /* init_scratch_buffer patches the shader code with the 
> scratch address,
> -   * so we need to call it before si_shader_binary_read() which 
> uploads
> -   * the shader code to the GPU.
> -   */
> -   init_scratch_buffer(sctx, program);
> si_shader_binary_read_config(&program->shader.binary,
>  &program->shader.config, 0);
> }
> @@ -189,43 +143,6 @@ static void si_set_global_binding(
> }
>  }
>
> -/**
> - * This function computes the value for R_00B860_COMPUTE_TMPRING_SIZE.WAVES
> - * /p block_layout is the number of threads in each work group.
> - * /p grid layout is the number of work groups.
> - */
> -static unsigned compute_num_waves_for_scratch(
> -   const struct radeon_info *info,
> -   const uint *block_layout,
> -   const uint *grid_layout)
> -{
> -   unsigned num_sh = MAX2(info->max_sh_per_se, 1);
> -   unsigned num_se = MAX2(info->max_se, 1);
> -   unsigned num_blocks = 1;
> -   unsigned threads_per_block = 1;
> -   unsigned waves_per_block;
> -   unsigned waves_per_sh;
> -   unsigned waves;
> -   unsigned scratch_waves;
> -   unsigned i;
> -
> -   for (i = 0; i < 3; i++) {
> -   threads_per_block *= block_layout[i];
> -   num_blocks *= grid_layout[i];
> -   }
> -
> -   waves_per_block = align(threads_per_block, 64) / 64;
> -   waves = waves_per_block * num_blocks;
> -   waves_per_sh = align(waves, num_sh * num_se) / (num_sh * num_se);
> -   scratch_waves = waves_per_sh * num_sh * num_se;
> -
> -   if (waves_per_block > waves_per_sh) {
> -   scratch_waves = waves_per_block * num_sh * num_se;
> -   }
> -
> -   return scratch_wav

[Mesa-dev] [PATCH] glsl: Checks for interpolation into its own function.

2016-04-04 Thread Andres Gomez
This generalizes the validation also to be done for variables inside
interface blocks, which, for some cases, was missing.

For a discussion about the additional validation cases included see
https://lists.freedesktop.org/archives/mesa-dev/2016-March/109117.html
and Khronos bug #15671.

Signed-off-by: Andres Gomez 
---
 src/compiler/glsl/ast_to_hir.cpp | 316 +--
 1 file changed, 171 insertions(+), 145 deletions(-)

diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp
index 7c9be81..e4ebc6b 100644
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -2792,8 +2792,164 @@ apply_explicit_binding(struct _mesa_glsl_parse_state 
*state,
 }
 
 
+static void
+validate_interpolation_qualifier(struct _mesa_glsl_parse_state *state,
+ YYLTYPE *loc,
+ const glsl_interp_qualifier interpolation,
+ const struct ast_type_qualifier *qual,
+ const struct glsl_type *var_type,
+ ir_variable_mode mode)
+{
+   /* Interpolation qualifiers can only apply to shader inputs or outputs, but
+* not to vertex shader inputs nor fragment shader outputs.
+*
+* From section 4.3 ("Storage Qualifiers") of the GLSL 1.30 spec:
+*"Outputs from a vertex shader (out) and inputs to a fragment
+*shader (in) can be further qualified with one or more of these
+*interpolation qualifiers"
+*...
+*"These interpolation qualifiers may only precede the qualifiers in,
+*centroid in, out, or centroid out in a declaration. They do not apply
+*to the deprecated storage qualifiers varying or centroid
+*varying. They also do not apply to inputs into a vertex shader or
+*outputs from a fragment shader."
+*
+* From section 4.3 ("Storage Qualifiers") of the GLSL ES 3.00 spec:
+*"Outputs from a shader (out) and inputs to a shader (in) can be
+*further qualified with one of these interpolation qualifiers."
+*...
+*"These interpolation qualifiers may only precede the qualifiers
+*in, centroid in, out, or centroid out in a declaration. They do
+*not apply to inputs into a vertex shader or outputs from a
+*fragment shader."
+*/
+   if (state->is_version(130, 300)
+   && interpolation != INTERP_QUALIFIER_NONE) {
+  const char *i = interpolation_string(interpolation);
+  if (mode != ir_var_shader_in && mode != ir_var_shader_out)
+ _mesa_glsl_error(loc, state,
+  "interpolation qualifier `%s' can only be applied to 
"
+  "shader inputs or outputs.", i);
+
+  switch (state->stage) {
+  case MESA_SHADER_VERTEX:
+ if (mode == ir_var_shader_in) {
+_mesa_glsl_error(loc, state,
+ "interpolation qualifier '%s' cannot be applied 
to "
+ "vertex shader inputs", i);
+ }
+ break;
+  case MESA_SHADER_FRAGMENT:
+ if (mode == ir_var_shader_out) {
+_mesa_glsl_error(loc, state,
+ "interpolation qualifier '%s' cannot be applied 
to "
+ "fragment shader outputs", i);
+ }
+ break;
+  default:
+ break;
+  }
+   }
+
+   /* Interpolation qualifiers cannot be applied to 'centroid' and
+* 'centroid varying'.
+*
+* From section 4.3 ("Storage Qualifiers") of the GLSL 1.30 spec:
+*"interpolation qualifiers may only precede the qualifiers in,
+*centroid in, out, or centroid out in a declaration. They do not apply
+*to the deprecated storage qualifiers varying or centroid varying."
+*
+* These deprecated storage qualifiers do not exist in GLSL ES 3.00.
+*/
+   if (state->is_version(130, 0)
+   && interpolation != INTERP_QUALIFIER_NONE
+   && qual->flags.q.varying) {
+
+  const char *i = interpolation_string(interpolation);
+  const char *s;
+  if (qual->flags.q.centroid)
+ s = "centroid varying";
+  else
+ s = "varying";
+
+  _mesa_glsl_error(loc, state,
+   "qualifier '%s' cannot be applied to the "
+   "deprecated storage qualifier '%s'", i, s);
+   }
+
+   /* Integer fragment inputs must be qualified with 'flat'.  In GLSL ES,
+* so must integer vertex outputs.
+*
+* From section 4.3.4 ("Inputs") of the GLSL 1.50 spec:
+*"Fragment shader inputs that are signed or unsigned integers or
+*integer vectors must be qualified with the interpolation qualifier
+*flat."
+*
+* From section 4.3.4 ("Input Variables") of the GLSL 3.00 ES spec:
+*"Fragment shader inputs that are, or contain, signed or unsigned
+*integers or integer vectors must be qualified

Re: [Mesa-dev] [PATCH 1/3] r600: Change default behaviour for undefined COLOR0

2016-04-04 Thread Roland Scheidegger
Am 04.04.2016 um 17:27 schrieb Axel Davy:
> Le 04/04/2016 15:27, Roland Scheidegger a écrit :
>> Am 04.04.2016 um 14:04 schrieb Axel Davy:
>>> On 03/04/2016 20:41, Roland Scheidegger wrote :
 Am 03.04.2016 um 10:11 schrieb Axel Davy:
> d3d 9 needs COLOR0 to be 1.0 on all channels when
> undefined. 0.0 for the others is fine.
> GL behaviour is undefined.
>
 Not that I care what drivers use by default for undefined values,
 but it
 probably would be better to fix that properly in the state tracker
 since
 certainly gallium doesn't impose that default...

 Roland

>>> Implementing it properly in the state tracker is a bit annoying,
>>> because of possible combinations of usage ff pipeline vs non ff
>>> pipeline.
>>> It would be made easier if (0,0,0,0) was defined as the default in
>>> gallium,
>>> so only COLOR0 would have to be checked, else all outputs/inputs have to
>>> be checked.
>>>
>>> As it seems rather easy to fix in the driver, I chose that way.
>> Well all zeros might look sane as a default. I have no idea however
>> which drivers would conform to it - seems a bit unfair to "break" them
>> by a doc update...
>> (Though requiring a different default just on color0 input is indeed
>> insanity on d3d9's part.)
>>
>>> There seems to be very few games relying on the behaviour.
>> Yes, hopefully...
>>
>> Roland
>>
> So is that ok to you for now to update the radeon behaviour ?
As I said, you can do what you want there as far as I'm concerned. Just
saying it's going to be a constant battle to fix other drivers...

> 
> Also another thing to consider is that we plan to propose a context
> creation flag
> to indicate the state tracker is gallium nine. That would enable the hw
> to -optionally-
> change some gallium undefined things depending on whether you're gl or
> nine.
> 
> For example radeon hw have a hw bit for rasterization rounding gl vs d3d.
> This makes some differences when looking closely at the output results
> for some draw calls.
> This is 'optional' feature in the sense that it's not too bad if you
> don't use that hw bit,
> but it is better if you do. Also some rasterizer behaviours can be
> adapted, like NaN handling, etc.
I don't think a bit that it's a nine context makes sense. There's other
bits already doing similar things, like half_pixel_center, clip_halfz -
which when introduced first strictly were for d3d9-like state trackers
(even if nine wasn't around then). So, so far we've broken down the "do
d3d9 behavior" to individual bits, and I don't think it makes sense to
change that. So, if you need another such rasterization bit, that should
be added separately (though I'm not quite sure what the difference
really is?).

Shader behavior is another matter. d3d9 generally really dislikes NaNs
(as does old GL btw albeit theoretically old or new it's still mostly
optional with glsl, argh). Some instructions are also of course
different between d3d9 and gl.
This comes up from time to time, but we didn't do anything yet. Could be
handled with shader property (so, per-shader) or even with additional
instruction flags (or additional instructions). Personally I'd be in
favor of shader property if it really helps.

> 
> The (1, 1, 1, 1) for color0 and (0,0,0,0) or (0,0,0,1) for the others is
> less 'optional' in the sense games rely
> on it, but as there are very very few games that do, I guess it could be
> sort of okay to put it optional.
Err wait so it has to be (1,1,1,1) for color0 but (0,0,0,0) or (0,0,0,1)
are both ok for other inputs? Weird.
I think though (for this particular difference) it would make sense if
you'd just bite the bullet, check if the currently bound shaders are
affected by this and if so make a new variant translating that mess
away. gallium should make it easy to handle api differences relatively
well, but not to the point of handling every last bit of crazy stuff
apis might have.

Roland

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 09/20] radeonsi: split input upload off from si_launch_grid

2016-04-04 Thread Marek Olšák
Patches 1-9:

Reviewed-by: Marek Olšák 

Marek

On Sat, Apr 2, 2016 at 3:10 PM, Bas Nieuwenhuizen
 wrote:
> Signed-off-by: Bas Nieuwenhuizen 
> ---
>  src/gallium/drivers/radeonsi/si_compute.c | 94 
> +--
>  1 file changed, 53 insertions(+), 41 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
> b/src/gallium/drivers/radeonsi/si_compute.c
> index f2b13f0..3702e80 100644
> --- a/src/gallium/drivers/radeonsi/si_compute.c
> +++ b/src/gallium/drivers/radeonsi/si_compute.c
> @@ -24,6 +24,7 @@
>
>  #include "tgsi/tgsi_parse.h"
>  #include "util/u_memory.h"
> +#include "util/u_upload_mgr.h"
>  #include "radeon/r600_pipe_common.h"
>  #include "radeon/radeon_elf_util.h"
>  #include "radeon/radeon_llvm_util.h"
> @@ -42,7 +43,6 @@ struct si_compute {
> unsigned input_size;
> struct si_shader shader;
>
> -   struct r600_resource *input_buffer;
> struct pipe_resource *global_buffers[MAX_GLOBAL_BUFFERS];
>  };
>
> @@ -152,11 +152,6 @@ static void *si_create_compute_state(
>TGSI_PROCESSOR_COMPUTE, stderr);
> si_shader_binary_upload(sctx->screen, &program->shader);
>
> -   if (program->input_size) {
> -   program->input_buffer = 
> si_resource_create_custom(sctx->b.b.screen,
> -   PIPE_USAGE_IMMUTABLE, program->input_size);
> -   }
> -
> return program;
>  }
>
> @@ -231,19 +226,64 @@ static unsigned compute_num_waves_for_scratch(
> return scratch_waves;
>  }
>
> -static void si_launch_grid(
> -   struct pipe_context *ctx, const struct pipe_grid_info *info)
> +static void si_upload_compute_input(struct si_context *sctx,
> +  const struct pipe_grid_info *info)
>  {
> -   struct si_context *sctx = (struct si_context*)ctx;
> struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
> struct si_compute *program = sctx->cs_shader_state.program;
> -   struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
> -   struct r600_resource *input_buffer = program->input_buffer;
> +   struct r600_resource *input_buffer = NULL;
> unsigned kernel_args_size;
> unsigned num_work_size_bytes = 36;
> uint32_t kernel_args_offset = 0;
> uint32_t *kernel_args;
> +   void *kernel_args_ptr;
> uint64_t kernel_args_va;
> +   unsigned i;
> +
> +   /* The extra num_work_size_bytes are for work group / work item size 
> information */
> +   kernel_args_size = program->input_size + num_work_size_bytes;
> +
> +   u_upload_alloc(sctx->b.uploader, 0, kernel_args_size, 256,
> +  &kernel_args_offset,
> +  (struct pipe_resource**)&input_buffer, 
> &kernel_args_ptr);
> +
> +   kernel_args = (uint32_t*)kernel_args_ptr;
> +   for (i = 0; i < 3; i++) {
> +   kernel_args[i] = info->grid[i];
> +   kernel_args[i + 3] = info->grid[i] * info->block[i];
> +   kernel_args[i + 6] = info->block[i];
> +   }
> +
> +   memcpy(kernel_args + (num_work_size_bytes / 4), info->input,
> +  program->input_size);
> +
> +
> +   for (i = 0; i < (kernel_args_size / 4); i++) {
> +   COMPUTE_DBG(sctx->screen, "input %u : %u\n", i,
> +   kernel_args[i]);
> +   }
> +
> +   kernel_args_va = input_buffer->gpu_address + kernel_args_offset;
> +
> +   radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, input_buffer,
> + RADEON_USAGE_READ, 
> RADEON_PRIO_CONST_BUFFER);
> +
> +   radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 2, 0));
> +   radeon_emit(cs,(R_00B900_COMPUTE_USER_DATA_0 - SI_SH_REG_OFFSET) >> 
> 2);
> +   radeon_emit(cs, kernel_args_va);
> +   radeon_emit(cs, S_008F04_BASE_ADDRESS_HI (kernel_args_va >> 32) |
> +   S_008F04_STRIDE(0));
> +
> +   pipe_resource_reference((struct pipe_resource**)&input_buffer, NULL);
> +}
> +
> +static void si_launch_grid(
> +   struct pipe_context *ctx, const struct pipe_grid_info *info)
> +{
> +   struct si_context *sctx = (struct si_context*)ctx;
> +   struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
> +   struct si_compute *program = sctx->cs_shader_state.program;
> +   struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
> uint64_t scratch_buffer_va = 0;
> uint64_t shader_va;
> unsigned i;
> @@ -268,25 +308,12 @@ static void si_launch_grid(
> /* Read the config information */
> si_shader_binary_read_config(&shader->binary, &shader->config, 
> info->pc);
>
> -   /* Upload the kernel arguments */
> -
> -   /* The extra num_work_size_bytes are for work group / work item size 
> information */
> -   kernel_args_size = program->input_size + num_work_size_bytes + 8 /* 
> For scratch va */;
> -
> -   kernel_args = sctx->b.ws->buffer_map(input_buffer->buf,
> - 

Re: [Mesa-dev] [PATCH 11/20] radeonsi: do per cs setup for compute shaders once per cs

2016-04-04 Thread Marek Olšák
On Sat, Apr 2, 2016 at 3:10 PM, Bas Nieuwenhuizen
 wrote:
> Also removes PKT3_CONTEXT_CONTROL as that is already being done
> by si_begin_new_cs, when emitting init_config.
>
> Signed-off-by: Bas Nieuwenhuizen 
> ---
>  src/gallium/drivers/radeonsi/si_compute.c| 69 
> +++-
>  src/gallium/drivers/radeonsi/si_hw_context.c |  2 +
>  src/gallium/drivers/radeonsi/si_pipe.h   |  1 +
>  3 files changed, 40 insertions(+), 32 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
> b/src/gallium/drivers/radeonsi/si_compute.c
> index 9c25335..7320ace 100644
> --- a/src/gallium/drivers/radeonsi/si_compute.c
> +++ b/src/gallium/drivers/radeonsi/si_compute.c
> @@ -226,6 +226,39 @@ static unsigned compute_num_waves_for_scratch(
> return scratch_waves;
>  }
>
> +static void si_initialize_compute(struct si_context *sctx)
> +{
> +   struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
> +
> +   radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 3, 0));
> +   radeon_emit(cs, (R_00B810_COMPUTE_START_X - SI_SH_REG_OFFSET) >> 2);

Please use radeon_set_sh_reg_seq.

> +   radeon_emit(cs, 0);
> +   radeon_emit(cs, 0);
> +   radeon_emit(cs, 0);
> +
> +   radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 3, 0));
> +   radeon_emit(cs, (R_00B854_COMPUTE_RESOURCE_LIMITS - SI_SH_REG_OFFSET) 
> >> 2);

Same here.

> +   radeon_emit(cs, 0);
> +   radeon_emit(cs, S_00B858_SH0_CU_EN(0x) | 
> S_00B858_SH1_CU_EN(0x));
> +   radeon_emit(cs, S_00B85C_SH0_CU_EN(0x) | 
> S_00B85C_SH1_CU_EN(0x));

Please add a comment that these two write COMPUTE_STATIC_THREAD_MGMT_SE0-1.

We should also write COMPUTE_STATIC_THREAD_MGMT_SE2 and 3 on CIK and
later chips. They are not in the docs, but sid.h defines them.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/20] radeonsi: set shader calling conventions

2016-04-04 Thread Tom Stellard
On Sat, Apr 02, 2016 at 03:10:44PM +0200, Bas Nieuwenhuizen wrote:
> Note that old mesa + new LLVM or new mesa + old LLVM breaks
> with this change and the corresponding LLVM change (D18559).
> 
> For LLVM version <= 3.8 we use the old method, but we can't detect
> people using a post 3.8 svn version that is still too old.
> 
> Signed-off-by: Bas Nieuwenhuizen 
> ---
>  src/gallium/drivers/radeon/radeon_llvm_emit.c | 17 -
>  1 file changed, 16 insertions(+), 1 deletion(-)
> 
> diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c 
> b/src/gallium/drivers/radeon/radeon_llvm_emit.c
> index 474154e..7174132 100644
> --- a/src/gallium/drivers/radeon/radeon_llvm_emit.c
> +++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c
> @@ -55,6 +55,13 @@ enum radeon_llvm_shader_type {
>   RADEON_LLVM_SHADER_CS = 3,
>  };
>  
> +enum radeon_llvm_calling_convention {
> + RADEON_LLVM_AMDGPU_VS = 87,
> + RADEON_LLVM_AMDGPU_GS = 88,
> + RADEON_LLVM_AMDGPU_PS = 89,
> + RADEON_LLVM_AMDGPU_CS = 90,
> +};
> +
>  void radeon_llvm_add_attribute(LLVMValueRef F, const char *name, int value)
>  {
>   char str[16];
> @@ -71,27 +78,35 @@ void radeon_llvm_add_attribute(LLVMValueRef F, const char 
> *name, int value)
>  void radeon_llvm_shader_type(LLVMValueRef F, unsigned type)
>  {
>   enum radeon_llvm_shader_type llvm_type;
> + enum radeon_llvm_calling_convention calling_conv;
>  

This looks like you will get 'unused variable warnings' with this change.
Probably this easiest thing to do is put (void)variable_name somewhere, but
I'm not sure this is really a big deal.
Either way:

Reviewed-by: Tom Stellard 


>   switch (type) {
>   case TGSI_PROCESSOR_VERTEX:
>   case TGSI_PROCESSOR_TESS_CTRL:
>   case TGSI_PROCESSOR_TESS_EVAL:
>   llvm_type = RADEON_LLVM_SHADER_VS;
> + calling_conv = RADEON_LLVM_AMDGPU_VS;
>   break;
>   case TGSI_PROCESSOR_GEOMETRY:
>   llvm_type = RADEON_LLVM_SHADER_GS;
> + calling_conv = RADEON_LLVM_AMDGPU_GS;
>   break;
>   case TGSI_PROCESSOR_FRAGMENT:
>   llvm_type = RADEON_LLVM_SHADER_PS;
> + calling_conv = RADEON_LLVM_AMDGPU_PS;
>   break;
>   case TGSI_PROCESSOR_COMPUTE:
>   llvm_type = RADEON_LLVM_SHADER_CS;
> + calling_conv = RADEON_LLVM_AMDGPU_CS;
>   break;
>   default:
>   assert(0);
>   }
>  
> - radeon_llvm_add_attribute(F, "ShaderType", llvm_type);
> + if (HAVE_LLVM >= 0x309)
> + LLVMSetFunctionCallConv(F, calling_conv);
> + else
> + radeon_llvm_add_attribute(F, "ShaderType", llvm_type);
>  }
>  
>  static void init_r600_target()
> -- 
> 2.7.4
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Allow 8x MSAA on >= 64bpp formats on Gen8+.

2016-04-04 Thread Ben Widawsky
On Mon, Apr 04, 2016 at 02:04:13AM -0700, Kenneth Graunke wrote:
> See commit 3b0279a69 - this restriction is documented in the "Surface
> Format" field of RENDER_SURFACE_STATE.
> 
> Looking at newer documentation, this restriction appears to exist on
> Haswell, but no longer applies on Gen8+.
> 
> Signed-off-by: Kenneth Graunke 

Reviewed-by: Ben Widawsky 

[snip]


-- 
Ben Widawsky, Intel Open Source Technology Center
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: Lower variable indexing of system value arrays; treat like inputs.

2016-04-04 Thread Ilia Mirkin
For those (few, I'm sure) of us who are exceedingly lazy, what [glsl
ir] code ends up getting generated as a result of this?

int temp;
if (zero == 0) temp = gl_SampleMaskIn[0]
else leave temp undefined?

On Mon, Apr 4, 2016 at 5:08 AM, Kenneth Graunke  wrote:
> Some system values, such as gl_SampleMaskIn[], may be arrays.
> lower_variable_index_to_cond_assign() did not handle this case and would
> hit an unreachable() assert.
>
> For now, lower when EmitNoIndirectInput is set.  We could potentially
> add another flag for system values, but I'm not sure how useful that
> would be.
>
> Fixes Piglit's new samplemaskin-indirect test.  Also fixes many ES31-CTS
> tests when OES_sample_variables is enabled.
>
> Cc: mesa-sta...@lists.freedesktop.org
> Signed-off-by: Kenneth Graunke 
> ---
>  src/compiler/glsl/lower_variable_index_to_cond_assign.cpp | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp 
> b/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp
> index 278d545..2391089 100644
> --- a/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp
> +++ b/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp
> @@ -385,6 +385,7 @@ public:
>case ir_var_const_in:
>   return this->lower_temps;
>
> +  case ir_var_system_value:
>case ir_var_shader_in:
>   /* The input array size is unknown at compiler time for non-patch
>* inputs in TCS and TES. The arrays are sized to
> --
> 2.7.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glsl: use has_shader_storage_buffer_objects helper

2016-04-04 Thread Ilia Mirkin
Replaces open-coded logic with existing helper.

Signed-off-by: Ilia Mirkin 
---
 src/compiler/glsl/lower_ubo_reference.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/compiler/glsl/lower_ubo_reference.cpp 
b/src/compiler/glsl/lower_ubo_reference.cpp
index 3155ab6..1a0140f 100644
--- a/src/compiler/glsl/lower_ubo_reference.cpp
+++ b/src/compiler/glsl/lower_ubo_reference.cpp
@@ -372,8 +372,7 @@ lower_ubo_reference_visitor::ubo_load(void *mem_ctx,
 static bool
 shader_storage_buffer_object(const _mesa_glsl_parse_state *state)
 {
-   return state->ARB_shader_storage_buffer_object_enable ||
-  state->is_version(430, 310);
+   return state->has_shader_storage_buffer_objects();
 }
 
 uint32_t
-- 
2.7.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 94805] Compile Mesa , specific compilation , error: LLVM is required to build Gallium

2016-04-04 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=94805

--- Comment #2 from tele  ---
I build LLVM like this:

#
mkdir build
cd build

export PKG_CONFIG_PATH=/usr/local/lib/pkgconfig
export CC="gcc -m32"
export CXX="g++ -m32"

cmake .. \
 -DLLVM_BUILD_32_BITS:BOOLEAN=ON \
 -DBUILD_SHARED_LIBS:BOOL=ON

make
#

#-
$ ls /usr/local/bin/
avcenc*   llvm-cov*llvm-mcmarkup*loadjpeg*
bugpoint* llvm-c-test* llvm-nm*  mpeg2vaenc*
c_rehash* llvm-cxxdump*llvm-objdump* mpeg2vldemo*
h264encode*   llvm-diff*   llvm-pdbdump* obj2yaml*
jpegenc*  llvm-dis*llvm-profdata*openssl*
libtool*  llvm-dsymutil*   llvm-ranlib@  opt*
libtoolize*   llvm-dwarfdump*  llvm-readobj* putsurface*
llc*  llvm-dwp*llvm-rtdyld*  sancov*
lli*  llvm-extract*llvm-size*vainfo*
llvm-ar*  llvm-lib@llvm-split*   verify-uselistorder*
llvm-as*  llvm-link*   llvm-stress*  xmlwf*
llvm-bcanalyzer*  llvm-lto*llvm-symbolizer*  yaml2obj*
llvm-config*  llvm-mc* llvm-tblgen*
#-

#---
$ objdump -p /usr/local/bin/llvm-config | grep format
/usr/local/bin/llvm-config: file format elf32-i386
#---

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 09/20] radeonsi: split input upload off from si_launch_grid

2016-04-04 Thread Marek Olšák
The commit message should say that this changes the allocation to
u_upload_alloc.

Marek

On Sat, Apr 2, 2016 at 3:10 PM, Bas Nieuwenhuizen
 wrote:
> Signed-off-by: Bas Nieuwenhuizen 
> ---
>  src/gallium/drivers/radeonsi/si_compute.c | 94 
> +--
>  1 file changed, 53 insertions(+), 41 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
> b/src/gallium/drivers/radeonsi/si_compute.c
> index f2b13f0..3702e80 100644
> --- a/src/gallium/drivers/radeonsi/si_compute.c
> +++ b/src/gallium/drivers/radeonsi/si_compute.c
> @@ -24,6 +24,7 @@
>
>  #include "tgsi/tgsi_parse.h"
>  #include "util/u_memory.h"
> +#include "util/u_upload_mgr.h"
>  #include "radeon/r600_pipe_common.h"
>  #include "radeon/radeon_elf_util.h"
>  #include "radeon/radeon_llvm_util.h"
> @@ -42,7 +43,6 @@ struct si_compute {
> unsigned input_size;
> struct si_shader shader;
>
> -   struct r600_resource *input_buffer;
> struct pipe_resource *global_buffers[MAX_GLOBAL_BUFFERS];
>  };
>
> @@ -152,11 +152,6 @@ static void *si_create_compute_state(
>TGSI_PROCESSOR_COMPUTE, stderr);
> si_shader_binary_upload(sctx->screen, &program->shader);
>
> -   if (program->input_size) {
> -   program->input_buffer = 
> si_resource_create_custom(sctx->b.b.screen,
> -   PIPE_USAGE_IMMUTABLE, program->input_size);
> -   }
> -
> return program;
>  }
>
> @@ -231,19 +226,64 @@ static unsigned compute_num_waves_for_scratch(
> return scratch_waves;
>  }
>
> -static void si_launch_grid(
> -   struct pipe_context *ctx, const struct pipe_grid_info *info)
> +static void si_upload_compute_input(struct si_context *sctx,
> +  const struct pipe_grid_info *info)
>  {
> -   struct si_context *sctx = (struct si_context*)ctx;
> struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
> struct si_compute *program = sctx->cs_shader_state.program;
> -   struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
> -   struct r600_resource *input_buffer = program->input_buffer;
> +   struct r600_resource *input_buffer = NULL;
> unsigned kernel_args_size;
> unsigned num_work_size_bytes = 36;
> uint32_t kernel_args_offset = 0;
> uint32_t *kernel_args;
> +   void *kernel_args_ptr;
> uint64_t kernel_args_va;
> +   unsigned i;
> +
> +   /* The extra num_work_size_bytes are for work group / work item size 
> information */
> +   kernel_args_size = program->input_size + num_work_size_bytes;
> +
> +   u_upload_alloc(sctx->b.uploader, 0, kernel_args_size, 256,
> +  &kernel_args_offset,
> +  (struct pipe_resource**)&input_buffer, 
> &kernel_args_ptr);
> +
> +   kernel_args = (uint32_t*)kernel_args_ptr;
> +   for (i = 0; i < 3; i++) {
> +   kernel_args[i] = info->grid[i];
> +   kernel_args[i + 3] = info->grid[i] * info->block[i];
> +   kernel_args[i + 6] = info->block[i];
> +   }
> +
> +   memcpy(kernel_args + (num_work_size_bytes / 4), info->input,
> +  program->input_size);
> +
> +
> +   for (i = 0; i < (kernel_args_size / 4); i++) {
> +   COMPUTE_DBG(sctx->screen, "input %u : %u\n", i,
> +   kernel_args[i]);
> +   }
> +
> +   kernel_args_va = input_buffer->gpu_address + kernel_args_offset;
> +
> +   radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, input_buffer,
> + RADEON_USAGE_READ, 
> RADEON_PRIO_CONST_BUFFER);
> +
> +   radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 2, 0));
> +   radeon_emit(cs,(R_00B900_COMPUTE_USER_DATA_0 - SI_SH_REG_OFFSET) >> 
> 2);
> +   radeon_emit(cs, kernel_args_va);
> +   radeon_emit(cs, S_008F04_BASE_ADDRESS_HI (kernel_args_va >> 32) |
> +   S_008F04_STRIDE(0));
> +
> +   pipe_resource_reference((struct pipe_resource**)&input_buffer, NULL);
> +}
> +
> +static void si_launch_grid(
> +   struct pipe_context *ctx, const struct pipe_grid_info *info)
> +{
> +   struct si_context *sctx = (struct si_context*)ctx;
> +   struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
> +   struct si_compute *program = sctx->cs_shader_state.program;
> +   struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
> uint64_t scratch_buffer_va = 0;
> uint64_t shader_va;
> unsigned i;
> @@ -268,25 +308,12 @@ static void si_launch_grid(
> /* Read the config information */
> si_shader_binary_read_config(&shader->binary, &shader->config, 
> info->pc);
>
> -   /* Upload the kernel arguments */
> -
> -   /* The extra num_work_size_bytes are for work group / work item size 
> information */
> -   kernel_args_size = program->input_size + num_work_size_bytes + 8 /* 
> For scratch va */;
> -
> -   kernel_args = sctx->b.ws->buffer_map(i

Re: [Mesa-dev] [PATCH 1/3] r600: Change default behaviour for undefined COLOR0

2016-04-04 Thread Axel Davy

Le 04/04/2016 15:27, Roland Scheidegger a écrit :

Am 04.04.2016 um 14:04 schrieb Axel Davy:

On 03/04/2016 20:41, Roland Scheidegger wrote :

Am 03.04.2016 um 10:11 schrieb Axel Davy:

d3d 9 needs COLOR0 to be 1.0 on all channels when
undefined. 0.0 for the others is fine.
GL behaviour is undefined.


Not that I care what drivers use by default for undefined values, but it
probably would be better to fix that properly in the state tracker since
certainly gallium doesn't impose that default...

Roland


Implementing it properly in the state tracker is a bit annoying,
because of possible combinations of usage ff pipeline vs non ff pipeline.
It would be made easier if (0,0,0,0) was defined as the default in gallium,
so only COLOR0 would have to be checked, else all outputs/inputs have to
be checked.

As it seems rather easy to fix in the driver, I chose that way.

Well all zeros might look sane as a default. I have no idea however
which drivers would conform to it - seems a bit unfair to "break" them
by a doc update...
(Though requiring a different default just on color0 input is indeed
insanity on d3d9's part.)


There seems to be very few games relying on the behaviour.

Yes, hopefully...

Roland


So is that ok to you for now to update the radeon behaviour ?

Also another thing to consider is that we plan to propose a context 
creation flag
to indicate the state tracker is gallium nine. That would enable the hw 
to -optionally-

change some gallium undefined things depending on whether you're gl or nine.

For example radeon hw have a hw bit for rasterization rounding gl vs d3d.
This makes some differences when looking closely at the output results 
for some draw calls.
This is 'optional' feature in the sense that it's not too bad if you 
don't use that hw bit,
but it is better if you do. Also some rasterizer behaviours can be 
adapted, like NaN handling, etc.


The (1, 1, 1, 1) for color0 and (0,0,0,0) or (0,0,0,1) for the others is 
less 'optional' in the sense games rely
on it, but as there are very very few games that do, I guess it could be 
sort of okay to put it optional.


Axel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl/list: make nodes safe for double removal, etc.

2016-04-04 Thread Rob Clark
On Mon, Apr 4, 2016 at 10:34 AM, Iago Toral  wrote:
> On Sat, 2016-04-02 at 17:09 -0400, Rob Clark wrote:
>> From: Rob Clark 
>>
>> It's no extra overhead to do a _self_link() and it eliminates a class of
>> potential problems.
>
> it can also hide actual programming mistakes that would otherwise be
> immediately visible... does this actually help something specific?

Well, basically it avoids needing to explicitly do a _self_link()
after removing a node in cases where you know (for example) that you
might end up removing multiple times.  The kernel list implementation
does have separate list_del() and list_del_init(), which would be a
different possible way to go.

But in my experience the programming mistakes that this would hide are
simply cases where you wanted to do list_del_init() instead of
list_del(), so I'm curious about which other cases you are worried
about.

Anyways, this patch doesn't solve something in particular, it is
mostly just a response to a comment Jason made about my usage of
immediate _self_link() after removal on another patch.

BR,
-R

> Iago
>
>> Signed-off-by: Rob Clark 
>> Dared-by: Jason Ekstrand 
>> ---
>>  src/compiler/glsl/list.h | 15 +++
>>  1 file changed, 7 insertions(+), 8 deletions(-)
>>
>> diff --git a/src/compiler/glsl/list.h b/src/compiler/glsl/list.h
>> index a1c4d82..77e1f67 100644
>> --- a/src/compiler/glsl/list.h
>> +++ b/src/compiler/glsl/list.h
>> @@ -165,19 +165,18 @@ exec_node_get_prev(struct exec_node *n)
>>  }
>>
>>  static inline void
>> -exec_node_remove(struct exec_node *n)
>> +exec_node_self_link(struct exec_node *n)
>>  {
>> -   n->next->prev = n->prev;
>> -   n->prev->next = n->next;
>> -   n->next = NULL;
>> -   n->prev = NULL;
>> +   n->next = n;
>> +   n->prev = n;
>>  }
>>
>>  static inline void
>> -exec_node_self_link(struct exec_node *n)
>> +exec_node_remove(struct exec_node *n)
>>  {
>> -   n->next = n;
>> -   n->prev = n;
>> +   n->next->prev = n->prev;
>> +   n->prev->next = n->next;
>> +   exec_node_self_link(n);
>>  }
>>
>>  static inline void
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl/list: make nodes safe for double removal, etc.

2016-04-04 Thread Iago Toral
On Sat, 2016-04-02 at 17:09 -0400, Rob Clark wrote:
> From: Rob Clark 
> 
> It's no extra overhead to do a _self_link() and it eliminates a class of
> potential problems.

it can also hide actual programming mistakes that would otherwise be
immediately visible... does this actually help something specific?

Iago

> Signed-off-by: Rob Clark 
> Dared-by: Jason Ekstrand 
> ---
>  src/compiler/glsl/list.h | 15 +++
>  1 file changed, 7 insertions(+), 8 deletions(-)
> 
> diff --git a/src/compiler/glsl/list.h b/src/compiler/glsl/list.h
> index a1c4d82..77e1f67 100644
> --- a/src/compiler/glsl/list.h
> +++ b/src/compiler/glsl/list.h
> @@ -165,19 +165,18 @@ exec_node_get_prev(struct exec_node *n)
>  }
>  
>  static inline void
> -exec_node_remove(struct exec_node *n)
> +exec_node_self_link(struct exec_node *n)
>  {
> -   n->next->prev = n->prev;
> -   n->prev->next = n->next;
> -   n->next = NULL;
> -   n->prev = NULL;
> +   n->next = n;
> +   n->prev = n;
>  }
>  
>  static inline void
> -exec_node_self_link(struct exec_node *n)
> +exec_node_remove(struct exec_node *n)
>  {
> -   n->next = n;
> -   n->prev = n;
> +   n->next->prev = n->prev;
> +   n->prev->next = n->next;
> +   exec_node_self_link(n);
>  }
>  
>  static inline void


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: make ssbo predicate return true when in a GLSL 430 or ESSL 310 shader

2016-04-04 Thread Iago Toral
Instead of doing this, I think we should just call
state->has_shader_storage_buffer_objects(), which does exactly the same.

With that change:

Reviewed-by: Iago Toral Quiroga 

On Sat, 2016-04-02 at 21:51 -0400, Ilia Mirkin wrote:
> I can't tell whether this actually matters, but we're creating function
> signatures with this predicate, so it should probably match when SSBO's
> are available.
> 
> Signed-off-by: Ilia Mirkin 
> ---
>  src/compiler/glsl/lower_ubo_reference.cpp | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/src/compiler/glsl/lower_ubo_reference.cpp 
> b/src/compiler/glsl/lower_ubo_reference.cpp
> index d6269f7..3155ab6 100644
> --- a/src/compiler/glsl/lower_ubo_reference.cpp
> +++ b/src/compiler/glsl/lower_ubo_reference.cpp
> @@ -372,7 +372,8 @@ lower_ubo_reference_visitor::ubo_load(void *mem_ctx,
>  static bool
>  shader_storage_buffer_object(const _mesa_glsl_parse_state *state)
>  {
> -   return state->ARB_shader_storage_buffer_object_enable;
> +   return state->ARB_shader_storage_buffer_object_enable ||
> +  state->is_version(430, 310);
>  }
>  
>  uint32_t


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa/get: fix MAX_GEOMETRY_SHADER_STORAGE_BLOCKS

2016-04-04 Thread Iago Toral
Reviewed-by: Iago Toral Quiroga 

On Mon, 2016-04-04 at 07:55 +0100, Dave Airlie wrote:
> From: Dave Airlie 
> 
> this was returning the fragment shader value.
> 
> Signed-off-by: Dave Airlie 
> ---
>  src/mesa/main/get_hash_params.py | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/mesa/main/get_hash_params.py 
> b/src/mesa/main/get_hash_params.py
> index a0cc4f8..eb44a6a 100644
> --- a/src/mesa/main/get_hash_params.py
> +++ b/src/mesa/main/get_hash_params.py
> @@ -532,7 +532,7 @@ descriptor=[
>[ "MAX_GEOMETRY_ATOMIC_COUNTERS", 
> "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters), 
> extra_ARB_shader_atomic_counters_and_geometry_shader" ],
>  
>  # GL_ARB_shader_storage_buffer_object / geometry shader
> -  [ "MAX_GEOMETRY_SHADER_STORAGE_BLOCKS", 
> "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks), 
> extra_ARB_shader_storage_buffer_object_and_geometry_shader" ],
> +  [ "MAX_GEOMETRY_SHADER_STORAGE_BLOCKS", 
> "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxShaderStorageBlocks), 
> extra_ARB_shader_storage_buffer_object_and_geometry_shader" ],
>  
>  # GL_ARB_uniform_buffer_object / geometry shader
>[ "MAX_GEOMETRY_UNIFORM_BLOCKS", 
> "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxUniformBlocks), 
> extra_ARB_uniform_buffer_object_and_geometry_shader" ],


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/6] gallium: add way for drivers to create fences without flushing

2016-04-04 Thread Rob Clark
Hmm, so we could actually use EGL_SYNC_FLUSH_COMMANDS_BIT_KHR (which
is ignored when there is not a ctx bound).. from looking closer at
KHR_fence_sync this appears to be how things are expected to work.

ofc that means that you end up needing to keep both
ctx->fence_finish() and screen->fence_finish(), or maybe extend
screen->fence_finish() to take an optional ctx ptr.. neither of which
is the prettiest thing.

BR,
-R


On Mon, Apr 4, 2016 at 8:10 AM, Marek Olšák  wrote:
> There is one problem with this: It doesn't allow calling
> pipe_context::fence_finish from another thread in a thread-safe
> manner, and it implies that fence_finish is generally NOT thread-safe.
> This thread safety is something I'd like to preserve.
>
> I would say that a flush flag telling the driver not to flush would be
> better, so that drivers which have per-screen fences and cheap flushes
> can ignore it.
>
> Marek
>
> On Fri, Apr 1, 2016 at 10:29 PM, Rob Clark  wrote:
>> From: Rob Clark 
>>
>> Since current thing is kinda horrible for tilers.  And that issue will
>> be even worse with EGL_ANDROID_native_fence_sync.
>>
>> Not wired up yet for gl syncobj, which can come later.  For now we just
>> need this with EGL.
>>
>> Signed-off-by: Rob Clark 
>> ---
>>  src/gallium/include/pipe/p_context.h  | 24 
>>  src/gallium/state_trackers/dri/dri2.c | 29 -
>>  2 files changed, 44 insertions(+), 9 deletions(-)
>>
>> diff --git a/src/gallium/include/pipe/p_context.h 
>> b/src/gallium/include/pipe/p_context.h
>> index 1c97e82..02a946b 100644
>> --- a/src/gallium/include/pipe/p_context.h
>> +++ b/src/gallium/include/pipe/p_context.h
>> @@ -457,6 +457,30 @@ struct pipe_context {
>>   unsigned flags);
>>
>> /**
>> +* Create a fence without necessarily flushing rendering.  Note
>> +* that if the driver implements this, it must also implement
>> +* ctx->fence_finish() which will be used instead of
>> +* screen->fence_finish() to give the driver an opportunity to
>> +* flush.
>> +*
>> +* This allows drivers, in particular tilers, to defer flush
>> +* until someone actually wants to wait on a fence.
>> +*
>> +* \param fence  if not NULL, an old fence to unref and transfer a
>> +*new fence reference to
>> +*/
>> +   void (*create_fence)(struct pipe_context *pipe,
>> +struct pipe_fence_handle **fence);
>> +
>> +   /**
>> +* Wait for the fence to finish.
>> +* \param timeout in nanoseconds (may be PIPE_TIMEOUT_INFINITE).
>> +*/
>> +   boolean (*fence_finish)(struct pipe_context *pipe,
>> +   struct pipe_fence_handle *fence,
>> +   uint64_t timeout);
>> +
>> +   /**
>>  * Create a view on a texture to be used by a shader stage.
>>  */
>> struct pipe_sampler_view * (*create_sampler_view)(struct pipe_context 
>> *ctx,
>> diff --git a/src/gallium/state_trackers/dri/dri2.c 
>> b/src/gallium/state_trackers/dri/dri2.c
>> index fb0a180..b66d885 100644
>> --- a/src/gallium/state_trackers/dri/dri2.c
>> +++ b/src/gallium/state_trackers/dri/dri2.c
>> @@ -1320,7 +1320,12 @@ dri2_create_fence(__DRIcontext *_ctx)
>> if (!fence)
>>return NULL;
>>
>> -   ctx->flush(ctx, &fence->pipe_fence, 0);
>> +   if (ctx->create_fence) {
>> +  debug_assert(ctx->fence_finish);
>> +  ctx->create_fence(ctx, &fence->pipe_fence);
>> +   } else {
>> +  ctx->flush(ctx, &fence->pipe_fence, 0);
>> +   }
>>
>> if (!fence->pipe_fence) {
>>FREE(fence);
>> @@ -1376,27 +1381,33 @@ static GLboolean
>>  dri2_client_wait_sync(__DRIcontext *_ctx, void *_fence, unsigned flags,
>>uint64_t timeout)
>>  {
>> +   struct pipe_context *ctx = dri_context(_ctx)->st->pipe;
>> struct dri2_fence *fence = (struct dri2_fence*)_fence;
>> struct dri_screen *driscreen = fence->driscreen;
>> struct pipe_screen *screen = driscreen->base.screen;
>> +   struct pipe_fence_handle *pipe_fence = NULL;
>>
>> -   /* No need to flush. The context was flushed when the fence was created. 
>> */
>> +   /* No need to flush. The context was flushed when the fence was created,
>> +* or the ctx implements ctx->fence_finish() which will take care of
>> +* flushing if required
>> +*/
>>
>> if (fence->pipe_fence)
>> -  return screen->fence_finish(screen, fence->pipe_fence, timeout);
>> +  pipe_fence = fence->pipe_fence;
>> else if (fence->cl_event) {
>> -  struct pipe_fence_handle *pipe_fence =
>> - driscreen->opencl_dri_event_get_fence(fence->cl_event);
>> -
>> -  if (pipe_fence)
>> - return screen->fence_finish(screen, pipe_fence, timeout);
>> -  else
>> +  pipe_fence = driscreen->opencl_dri_event_get_fence(fence->cl_event);
>> +  if (!pipe_fence)
>>   return driscreen->opencl_dri_event_wait(fence->cl_event, timeout);
>> }
>> else {
>>assert(0);
>>retu

Re: [Mesa-dev] [PATCH 4/6] gallium: add way for drivers to create fences without flushing

2016-04-04 Thread Rob Clark
well,

https://groups.google.com/a/chromium.org/forum/#!topic/chromium-os-reviews/IFiv8rZfAq0

does imply that it could be called without a ctx bound..  hmm..

BR,
-R

On Mon, Apr 4, 2016 at 8:43 AM, Rob Clark  wrote:
> Hmm, what are the requirements of eglClientWaitSyncKHR() about having
> current context bound?  Not being thread-safe (ie. being associated w/
> current ctx) is pretty important to allow apps that create fences at
> in-opportune times to not force a mid-frame flush for tilers.. :-(
>
> BR,
> -R
>
>
> On Mon, Apr 4, 2016 at 8:10 AM, Marek Olšák  wrote:
>> There is one problem with this: It doesn't allow calling
>> pipe_context::fence_finish from another thread in a thread-safe
>> manner, and it implies that fence_finish is generally NOT thread-safe.
>> This thread safety is something I'd like to preserve.
>>
>> I would say that a flush flag telling the driver not to flush would be
>> better, so that drivers which have per-screen fences and cheap flushes
>> can ignore it.
>>
>> Marek
>>
>> On Fri, Apr 1, 2016 at 10:29 PM, Rob Clark  wrote:
>>> From: Rob Clark 
>>>
>>> Since current thing is kinda horrible for tilers.  And that issue will
>>> be even worse with EGL_ANDROID_native_fence_sync.
>>>
>>> Not wired up yet for gl syncobj, which can come later.  For now we just
>>> need this with EGL.
>>>
>>> Signed-off-by: Rob Clark 
>>> ---
>>>  src/gallium/include/pipe/p_context.h  | 24 
>>>  src/gallium/state_trackers/dri/dri2.c | 29 -
>>>  2 files changed, 44 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/src/gallium/include/pipe/p_context.h 
>>> b/src/gallium/include/pipe/p_context.h
>>> index 1c97e82..02a946b 100644
>>> --- a/src/gallium/include/pipe/p_context.h
>>> +++ b/src/gallium/include/pipe/p_context.h
>>> @@ -457,6 +457,30 @@ struct pipe_context {
>>>   unsigned flags);
>>>
>>> /**
>>> +* Create a fence without necessarily flushing rendering.  Note
>>> +* that if the driver implements this, it must also implement
>>> +* ctx->fence_finish() which will be used instead of
>>> +* screen->fence_finish() to give the driver an opportunity to
>>> +* flush.
>>> +*
>>> +* This allows drivers, in particular tilers, to defer flush
>>> +* until someone actually wants to wait on a fence.
>>> +*
>>> +* \param fence  if not NULL, an old fence to unref and transfer a
>>> +*new fence reference to
>>> +*/
>>> +   void (*create_fence)(struct pipe_context *pipe,
>>> +struct pipe_fence_handle **fence);
>>> +
>>> +   /**
>>> +* Wait for the fence to finish.
>>> +* \param timeout in nanoseconds (may be PIPE_TIMEOUT_INFINITE).
>>> +*/
>>> +   boolean (*fence_finish)(struct pipe_context *pipe,
>>> +   struct pipe_fence_handle *fence,
>>> +   uint64_t timeout);
>>> +
>>> +   /**
>>>  * Create a view on a texture to be used by a shader stage.
>>>  */
>>> struct pipe_sampler_view * (*create_sampler_view)(struct pipe_context 
>>> *ctx,
>>> diff --git a/src/gallium/state_trackers/dri/dri2.c 
>>> b/src/gallium/state_trackers/dri/dri2.c
>>> index fb0a180..b66d885 100644
>>> --- a/src/gallium/state_trackers/dri/dri2.c
>>> +++ b/src/gallium/state_trackers/dri/dri2.c
>>> @@ -1320,7 +1320,12 @@ dri2_create_fence(__DRIcontext *_ctx)
>>> if (!fence)
>>>return NULL;
>>>
>>> -   ctx->flush(ctx, &fence->pipe_fence, 0);
>>> +   if (ctx->create_fence) {
>>> +  debug_assert(ctx->fence_finish);
>>> +  ctx->create_fence(ctx, &fence->pipe_fence);
>>> +   } else {
>>> +  ctx->flush(ctx, &fence->pipe_fence, 0);
>>> +   }
>>>
>>> if (!fence->pipe_fence) {
>>>FREE(fence);
>>> @@ -1376,27 +1381,33 @@ static GLboolean
>>>  dri2_client_wait_sync(__DRIcontext *_ctx, void *_fence, unsigned flags,
>>>uint64_t timeout)
>>>  {
>>> +   struct pipe_context *ctx = dri_context(_ctx)->st->pipe;
>>> struct dri2_fence *fence = (struct dri2_fence*)_fence;
>>> struct dri_screen *driscreen = fence->driscreen;
>>> struct pipe_screen *screen = driscreen->base.screen;
>>> +   struct pipe_fence_handle *pipe_fence = NULL;
>>>
>>> -   /* No need to flush. The context was flushed when the fence was 
>>> created. */
>>> +   /* No need to flush. The context was flushed when the fence was created,
>>> +* or the ctx implements ctx->fence_finish() which will take care of
>>> +* flushing if required
>>> +*/
>>>
>>> if (fence->pipe_fence)
>>> -  return screen->fence_finish(screen, fence->pipe_fence, timeout);
>>> +  pipe_fence = fence->pipe_fence;
>>> else if (fence->cl_event) {
>>> -  struct pipe_fence_handle *pipe_fence =
>>> - driscreen->opencl_dri_event_get_fence(fence->cl_event);
>>> -
>>> -  if (pipe_fence)
>>> - return screen->fence_finish(screen, pipe_fence, timeout);
>>> -  else
>>> +  pipe_fence = driscreen->op

[Mesa-dev] Mesa 11.2.0

2016-04-04 Thread Emil Velikov
Hello all,

Mesa 11.2.0 is now available.

Huge shout out goes to everyone involved. Thank you for your amazing and
ongoing work !

Here are some of the highlights:


- CI
 - Add Travis-CI configuration file.

- Documented more environment variables, squashed a ton of typos.

- Core mesa
 - Fix locking of GLsync objects.
 - Ongoing work for GL_OES_geometry_shader support

- GLSL
 - Massive amount of fixes - ubo/ssbo amongst others.
 - Significant rework in preparation for GL_ARB_enhanced_layouts support.

- GL
 - Reduced libGL.so binary size (by about 15%)

- Nine
 - Various fixes - multithreading, rounding issues, honour alignments, etc.

- WGL
 - Add support for WGL_ARB_render_texture

- OSMesa (both classic and gallium)
 - Add new OSMesaCreateContextAttribs() API

- EGL
 - Android: Add ANDROID_framebuffer_target and ANDROID_recordable support.
 - Wayland: Try to use wl_surface.damage_buffer for SwapBuffersWithDamage
 - X11: Fixed a number of crashes
  

Video backends:
- OMX
 - Correct the timestamping during video decoding

- VAAPI
 - Disable MPEG4 by default
 - Make the implementation thread safe
 - Add BOB/motion adaptive deinterlacing


Drivers:
- i965
 - Add more KBL PCI IDs
 - Fleshed out libi965_compiler in preparation for Vulkan support

- llvm
 - POWER8 optimised codepaths

- nouveau
 - Renamed various symbols to follow the naming scheme used by Nvidia.
 - Updated against the new nouveau UAPI
 - Initial support for GM20x GPUs
 - Added a nouveau backend for st/va
 - Ongoing work towards compute shaders support
 - More performance counters work - added SM30 (Kepler), removed unused ones.

- radeon(s)
 - Dropped support for LLVM 3.5. LLVM 3.6 or later is required
 - ETC support for Stoney
 - Disable MPEG1 (UVD)
 - Big endian fixes

- svga
 - Avoid emitting redundant commands - SetIndexBuffer, SetVertexBuffers
 - Increase the fence timeout

- virgl
 - Add virtio 1.0 PCI IDs


Platform specific:
- Android
 - Added virgl to the build
 - Handle secondary arch on mixed 32/64bit builds, ARM64 support
 - Fixes when building for SSE 4.1 capable CPU
 - Various compatibility fixes for AOSP build system rework

- UNIX
 - DragonFly support/fixes
 - Removed hardcoded link libraries references (-ldl, -lpthread)
 - Check for correct python binary name.

- Windows
 - Visual Studio 2013 or later is now required


This release adds support for the following extensions:

- GL_ARB_arrays_of_arrays on all gallium drivers that provide GLSL 1.30
- GL_ARB_base_instance on freedreno/a4xx
- GL_ARB_compute_shader on i965
- GL_ARB_copy_image on r600
- GL_ARB_indirect_parameters on nvc0
- GL_ARB_query_buffer_object on nvc0
- GL_ARB_shader_atomic_counters on nvc0
- GL_ARB_shader_draw_parameters on i965, nvc0
- GL_ARB_shader_storage_buffer_object on nvc0
- GL_ARB_tessellation_shader on i965 and r600 (evergreen/cayman only)
- GL_ARB_texture_buffer_object_rgb32 on freedreno/a4xx
- GL_ARB_texture_buffer_range on freedreno/a4xx
- GL_ARB_texture_query_lod on freedreno/a4xx
- GL_ARB_texture_rgb10_a2ui on freedreno/a4xx
- GL_ARB_texture_view on freedreno/a4xx
- GL_ARB_vertex_type_10f_11f_11f_rev on freedreno/a4xx
- GL_KHR_texture_compression_astc_ldr on freedreno/a4xx
- GL_AMD_performance_monitor on radeonsi (CIK+ only)
- GL_ATI_meminfo on r600, radeonsi
- GL_NVX_gpu_memory_info on r600, radeonsi


git tag: mesa-11.2.0

ftp://ftp.freedesktop.org/pub/mesa/11.2.0/mesa-11.2.0.tar.gz
MD5: c2a71920490edc66cebfa3605a3470a4  mesa-11.2.0.tar.gz
SHA1: 147e28fc5a77a5f7e4a01ee0c8fdd6c7d04e2918  mesa-11.2.0.tar.gz
SHA256: dea3d8143929aad5c24ef0993ddb05807b30c284b488fc62903adfcc1c127887  
mesa-11.2.0.tar.gz
PGP: ftp://ftp.freedesktop.org/pub/mesa/11.2.0/mesa-11.2.0.tar.gz.sig

ftp://ftp.freedesktop.org/pub/mesa/11.2.0/mesa-11.2.0.tar.xz
MD5: aee389ef4fe00c4251fcb866ca3c510f  mesa-11.2.0.tar.xz
SHA1: df92533f313869a390cc899f0aef4ef23d07b7a4  mesa-11.2.0.tar.xz
SHA256: 1c1fed2674abf3f16ed2623e9a5694d6752c293194e18462ebc644a19cfaafb2  
mesa-11.2.0.tar.xz
PGP: ftp://ftp.freedesktop.org/pub/mesa/11.2.0/mesa-11.2.0.tar.xz.sig
--
-Emil



signature.asc
Description: OpenPGP digital signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 03/20] radeonsi: add shared memory

2016-04-04 Thread Marek Olšák
On Sat, Apr 2, 2016 at 3:10 PM, Bas Nieuwenhuizen
 wrote:
> Declares the shared memory as a global variable so that
> LLVM is aware of it and it does not conflict with passes
> like AMDGPUPromoteAlloca.
>
> Signed-off-by: Bas Nieuwenhuizen 
> ---
>  src/gallium/drivers/radeon/radeon_llvm.h   |  3 ++
>  .../drivers/radeon/radeon_setup_tgsi_llvm.c|  4 +++
>  src/gallium/drivers/radeonsi/si_shader.c   | 35 
> ++
>  src/gallium/drivers/radeonsi/si_shader.h   |  3 ++
>  4 files changed, 45 insertions(+)
>
> diff --git a/src/gallium/drivers/radeon/radeon_llvm.h 
> b/src/gallium/drivers/radeon/radeon_llvm.h
> index 0a164bb..3e11b36 100644
> --- a/src/gallium/drivers/radeon/radeon_llvm.h
> +++ b/src/gallium/drivers/radeon/radeon_llvm.h
> @@ -68,6 +68,9 @@ struct radeon_llvm_context {
> unsigned index,
> const struct tgsi_full_declaration *decl);
>
> +   void (*declare_memory_region)(struct radeon_llvm_context *,
> +   const struct tgsi_full_declaration *decl);
> +
> /** This array contains the input values for the shader.  Typically 
> these
>   * values will be in the form of a target intrinsic that will inform 
> the
>   * backend how to load the actual inputs to the shader.
> diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
> b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> index fb883cb..5a3b586 100644
> --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> @@ -366,6 +366,10 @@ static void emit_declaration(
> break;
> }
>
> +   case TGSI_FILE_MEMORY:
> +   if (ctx->declare_memory_region)
> +   ctx->declare_memory_region(ctx, decl);
> +
> default:
> break;
> }
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
> b/src/gallium/drivers/radeonsi/si_shader.c
> index 2c44b14..2ce37ca 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -121,6 +121,9 @@ struct si_shader_context
> LLVMTypeRef v4i32;
> LLVMTypeRef v4f32;
> LLVMTypeRef v8i32;
> +
> +   unsigned memory_region_count;
> +   LLVMValueRef *memory_regions;
>  };
>
>  static struct si_shader_context *si_shader_context(
> @@ -1320,6 +1323,37 @@ static void declare_system_value(
> radeon_bld->system_values[index] = value;
>  }
>
> +static void declare_compute_memory(struct radeon_llvm_context *radeon_bld,
> +   const struct tgsi_full_declaration *decl)
> +{
> +   struct si_shader_context *ctx =
> +   si_shader_context(&radeon_bld->soa.bld_base);
> +   struct si_shader_selector *sel = ctx->shader->selector;
> +   struct gallivm_state *gallivm = &radeon_bld->gallivm;
> +
> +   LLVMTypeRef i8 = LLVMInt8TypeInContext(gallivm->context);
> +   LLVMTypeRef i8p = LLVMPointerType(i8, LOCAL_ADDR_SPACE);

You can use ctx->i8 here.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] r600: Change default behaviour for undefined COLOR0

2016-04-04 Thread Roland Scheidegger
Am 04.04.2016 um 14:04 schrieb Axel Davy:
> On 03/04/2016 20:41, Roland Scheidegger wrote :
>> Am 03.04.2016 um 10:11 schrieb Axel Davy:
>>> d3d 9 needs COLOR0 to be 1.0 on all channels when
>>> undefined. 0.0 for the others is fine.
>>> GL behaviour is undefined.
>>>
>> Not that I care what drivers use by default for undefined values, but it
>> probably would be better to fix that properly in the state tracker since
>> certainly gallium doesn't impose that default...
>>
>> Roland
>>
> Implementing it properly in the state tracker is a bit annoying,
> because of possible combinations of usage ff pipeline vs non ff pipeline.
> It would be made easier if (0,0,0,0) was defined as the default in gallium,
> so only COLOR0 would have to be checked, else all outputs/inputs have to
> be checked.
> 
> As it seems rather easy to fix in the driver, I chose that way.
Well all zeros might look sane as a default. I have no idea however
which drivers would conform to it - seems a bit unfair to "break" them
by a doc update...
(Though requiring a different default just on color0 input is indeed
insanity on d3d9's part.)

> 
> There seems to be very few games relying on the behaviour.
Yes, hopefully...

Roland

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 11/12] nvc0: Implement cull_distance as a special form of clip distance

2016-04-04 Thread Tobias Klausmann



On 04.04.2016 04:32, Ilia Mirkin wrote:

On Sun, Apr 3, 2016 at 10:15 PM, Dave Airlie  wrote:

From: Tobias Klausmann 

This enables ARB_cull_distance.

This isn't enough - something needs to lower the CULLDIST into
CLIPDIST (or some combined thing). Ideally they'd both just come in as
CLIPDIST and the property would let us know where the split is.


Actually it does come in as CULLDIST for both and it works for 0, 4, 8 
distances, e.g. 4 clip 4 cull, just not for lets say 2 clip 6 cull 
distances. The lowering pass fails to do that properly as of now...





Signed-off-by: Tobias Klausmann 
---
  docs/GL3.txt| 2 +-
  src/gallium/drivers/nouveau/nvc0/nvc0_program.c | 2 ++
  src/gallium/drivers/nouveau/nvc0/nvc0_program.h | 1 +
  src/gallium/drivers/nouveau/nvc0/nvc0_screen.c  | 2 +-
  4 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 5b6dc89..f28c8a9 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -211,7 +211,7 @@ GL 4.5, GLSL 4.50:
GL_ARB_ES3_1_compatibilitynot started
GL_ARB_clip_control   DONE (i965, nv50, 
nvc0, r600, radeonsi, llvmpipe, softpipe)
GL_ARB_conditional_render_invertedDONE (i965, nv50, 
nvc0, r600, radeonsi, llvmpipe, softpipe)
-  GL_ARB_cull_distance  in progress (Tobias)
+  GL_ARB_cull_distance  DONE (softpipe)

Presumably this would be nvc0, not softpipe?


Hehe, my local version stated both, as both are "working"




GL_ARB_derivative_control DONE (i965, nv50, 
nvc0, r600, radeonsi)
GL_ARB_direct_state_accessDONE (all drivers)
GL_ARB_get_texture_sub_image  DONE (all drivers)
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index db02fa2..5ca6faa 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -49,6 +49,7 @@ nvc0_shader_input_address(unsigned sn, unsigned si)
 case TGSI_SEMANTIC_COLOR:return 0x280 + si * 0x10;
 case TGSI_SEMANTIC_BCOLOR:   return 0x2a0 + si * 0x10;
 case TGSI_SEMANTIC_CLIPDIST: return 0x2c0 + si * 0x10;
+   case TGSI_SEMANTIC_CULLDIST: return 0x2c0 + si * 0x10;
 case TGSI_SEMANTIC_CLIPVERTEX:   return 0x270;
 case TGSI_SEMANTIC_PCOORD:   return 0x2e0;
 case TGSI_SEMANTIC_TESSCOORD:return 0x2f0;
@@ -78,6 +79,7 @@ nvc0_shader_output_address(unsigned sn, unsigned si)
 case TGSI_SEMANTIC_COLOR: return 0x280 + si * 0x10;
 case TGSI_SEMANTIC_BCOLOR:return 0x2a0 + si * 0x10;
 case TGSI_SEMANTIC_CLIPDIST:  return 0x2c0 + si * 0x10;
+   case TGSI_SEMANTIC_CULLDIST:  return 0x2c0 + si * 0x10;
 case TGSI_SEMANTIC_CLIPVERTEX:return 0x270;
 case TGSI_SEMANTIC_TEXCOORD:  return 0x300 + si * 0x10;
 case TGSI_SEMANTIC_EDGEFLAG:  return ~0;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h 
b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
index 8b8d221..d5c0d06 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
@@ -39,6 +39,7 @@ struct nvc0_program {
 struct {
uint32_t clip_mode; /* clip/cull selection */
uint8_t clip_enable; /* mask of defined clip planes */
+  uint8_t cull_enable; /* mask of defined cull planes */
uint8_t num_ucps; /* also set to max if ClipDistance is used */
uint8_t edgeflag; /* attribute index of edgeflag input */
bool need_vertex_id;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 21cb75d..f74c588 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -203,6 +203,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
 case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
 case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
 case PIPE_CAP_QUERY_BUFFER_OBJECT:
+   case PIPE_CAP_CULL_DISTANCE:
return 1;
 case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
@@ -238,7 +239,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
 case PIPE_CAP_PCI_BUS:
 case PIPE_CAP_PCI_DEVICE:
 case PIPE_CAP_PCI_FUNCTION:
-   case PIPE_CAP_CULL_DISTANCE:
return 0;

 case PIPE_CAP_VENDOR_ID:
--
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


_

Re: [Mesa-dev] [PATCH 07/12] glsl: Add arb_cull_distance support

2016-04-04 Thread Tobias Klausmann



On 04.04.2016 04:48, Timothy Arceri wrote:

On Mon, 2016-04-04 at 12:15 +1000, Dave Airlie wrote:

From: Tobias Klausmann 

Signed-off-by: Tobias Klausmann Const.MaxClipPlanes);
+   } else if (strcmp("gl_CullDistance", name) == 0
+  && size > state->Const.MaxClipPlanes) {
+  /* From the ARB_cull_distance spec:
+   *
+   *   "The gl_CullDistance array is predeclared as unsized and
+   *must be sized by the shader either redeclaring it with
+   *a size or indexing it only with integral constant
+   *expressions. The size determines the number and set of
+   *enabled cull distances and can be at most
+   *gl_MaxCullDistances."
+   */
+  _mesa_glsl_error(&loc, state, "`gl_CullDistance' array size
cannot "
+   "be larger than gl_MaxCullDistances (%u)",
+   state->Const.MaxClipPlanes);
 }
  }
  
diff --git a/src/compiler/glsl/builtin_variables.cpp

b/src/compiler/glsl/builtin_variables.cpp
index f31f9f6..8d049c8 100644
--- a/src/compiler/glsl/builtin_variables.cpp
+++ b/src/compiler/glsl/builtin_variables.cpp
@@ -302,7 +302,7 @@ public:
 const glsl_type *construct_interface_instance() const;
  
  private:

-   glsl_struct_field fields[10];
+   glsl_struct_field fields[11];
 unsigned num_fields;
  };
  
@@ -675,6 +675,11 @@ builtin_variable_generator::generate_constants()

add_const("gl_MaxClipDistances", state->Const.MaxClipPlanes);
add_const("gl_MaxVaryingComponents", state->ctx-

Const.MaxVarying * 4);

 }
+   if (state->is_version(450, 0) || state->ARB_cull_distance_enable)
{
+  add_const("gl_MaxCullDistances", state->Const.MaxClipPlanes);
+  add_const("gl_MaxCombinedClipAndCullDistances",
+state->Const.MaxClipPlanes);
+   }
  
 if (state->has_geometry_shader()) {

add_const("gl_MaxVertexOutputComponents",
@@ -1246,6 +1251,10 @@
builtin_variable_generator::generate_varyings()
 add_varying(VARYING_SLOT_CLIP_DIST0, array(float_t, 0),
 "gl_ClipDistance");
 }
+   if (state->is_version(450, 0) || state->ARB_cull_distance_enable)
{
+  add_varying(VARYING_SLOT_CULL_DIST0, array(float_t, 0),
+   "gl_CullDistance");
+   }
  
 if (compatibility) {

add_varying(VARYING_SLOT_TEX0, array(vec4_t, 0),
"gl_TexCoord");
diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y
b/src/compiler/glsl/glcpp/glcpp-parse.y
index a48266c..e44f074 100644
--- a/src/compiler/glsl/glcpp/glcpp-parse.y
+++ b/src/compiler/glsl/glcpp/glcpp-parse.y
@@ -2457,6 +2457,9 @@
_glcpp_parser_handle_version_declaration(glcpp_parser_t *parser,
intmax_t versio
  
   if (extensions->ARB_shader_draw_parameters)

  add_builtin_define(parser,
"GL_ARB_shader_draw_parameters", 1);
+
+ if (extensions->ARB_cull_distance)
+add_builtin_define(parser, "GL_ARB_cull_distance", 1);
}
 }
  
diff --git a/src/compiler/glsl/glsl_parser_extras.cpp

b/src/compiler/glsl/glsl_parser_extras.cpp
index 76321aa..9b1d53f 100644
--- a/src/compiler/glsl/glsl_parser_extras.cpp
+++ b/src/compiler/glsl/glsl_parser_extras.cpp
@@ -569,6 +569,7 @@ static const _mesa_glsl_extension
_mesa_glsl_supported_extensions[] = {
 EXT(ARB_arrays_of_arrays, true,  false, ARB_array
s_of_arrays),
 EXT(ARB_compute_shader,   true,  false, ARB_compu
te_shader),
 EXT(ARB_conservative_depth,   true,  false, ARB_conse
rvative_depth),
+   EXT(ARB_cull_distance,true,  false, ARB_cull_
distance),
 EXT(ARB_derivative_control,   true,  false, ARB_deriv
ative_control),
 EXT(ARB_draw_buffers, true,  false, dummy_tru
e),
 EXT(ARB_draw_instanced,   true,  false, ARB_draw_
instanced),
diff --git a/src/compiler/glsl/glsl_parser_extras.h
b/src/compiler/glsl/glsl_parser_extras.h
index c774fbe..85a8ebf 100644
--- a/src/compiler/glsl/glsl_parser_extras.h
+++ b/src/compiler/glsl/glsl_parser_extras.h
@@ -518,6 +518,8 @@ struct _mesa_glsl_parse_state {
 bool ARB_compute_shader_warn;
 bool ARB_conservative_depth_enable;
 bool ARB_conservative_depth_warn;
+   bool ARB_cull_distance_enable;
+   bool ARB_cull_distance_warn;
 bool ARB_derivative_control_enable;
 bool ARB_derivative_control_warn;
 bool ARB_draw_buffers_enable;
diff --git a/src/compiler/glsl/link_varyings.cpp
b/src/compiler/glsl/link_varyings.cpp
index 8e74981..d4cc68f 100644
--- a/src/compiler/glsl/link_varyings.cpp
+++ b/src/compiler/glsl/link_varyings.cpp
@@ -573,6 +573,10 @@ tfeedback_decl::init(struct gl_context *ctx,
const void *mem_ctx,
 strcmp(this->var_name, "gl_ClipDistance") == 0) {
this->lowered_builtin_array_variable = clip_distance;
 }
+   if (ctx-

Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerCombinedClipCul

lDistance &&
+   strcmp(this->var_name, "gl_CullDistance") == 0) {
+ 

Re: [Mesa-dev] [PATCH 0/4] ARB_robust_buffer_access_behavior for radeonsi

2016-04-04 Thread eocallaghan

I had a hacked up version of this last week which was very similar.
This is much cleaner, hence this series is,

Reviewed-by: Edward O'Callaghan 

On 2016-04-04 21:41, Bas Nieuwenhuizen wrote:
This series implements ARb_robust_buffer_access_behavior for the 
radeonsi

driver.

There are some tests at:

https://github.com/BNieuwenhuizen/piglit

These have not been send yet as they depend on robust access context
support in waffle.

Bas Nieuwenhuizen (4):
  radeonsi: use bounded indexing for constant buffers
  radeonsi: use bounded indexing for samplers
  expose ARB_robust_buffer_access_behavior
  radeonsi: mark ARB_robust_buffer_access_behavior as supported

 docs/GL3.txt |  2 +-
 docs/relnotes/11.3.0.html|  1 +
 src/gallium/docs/source/screen.rst   |  4 +++-
 src/gallium/drivers/freedreno/freedreno_screen.c |  1 +
 src/gallium/drivers/i915/i915_screen.c   |  1 +
 src/gallium/drivers/ilo/ilo_screen.c |  1 +
 src/gallium/drivers/llvmpipe/lp_screen.c |  1 +
 src/gallium/drivers/nouveau/nv30/nv30_screen.c   |  1 +
 src/gallium/drivers/nouveau/nv50/nv50_screen.c   |  1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c   |  1 +
 src/gallium/drivers/r300/r300_screen.c   |  1 +
 src/gallium/drivers/r600/r600_pipe.c |  1 +
 src/gallium/drivers/radeonsi/si_pipe.c   |  1 +
 src/gallium/drivers/radeonsi/si_shader.c | 10 +++---
 src/gallium/drivers/softpipe/sp_screen.c |  1 +
 src/gallium/drivers/svga/svga_screen.c   |  1 +
 src/gallium/drivers/swr/swr_screen.cpp   |  1 +
 src/gallium/drivers/vc4/vc4_screen.c |  1 +
 src/gallium/drivers/virgl/virgl_screen.c |  1 +
 src/gallium/include/pipe/p_defines.h |  1 +
 src/mesa/main/extensions_table.h |  1 +
 src/mesa/main/mtypes.h   |  1 +
 src/mesa/main/version.c  |  2 +-
 src/mesa/state_tracker/st_extensions.c   |  1 +
 24 files changed, 32 insertions(+), 6 deletions(-)


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/6] gallium: add way for drivers to create fences without flushing

2016-04-04 Thread Rob Clark
Hmm, what are the requirements of eglClientWaitSyncKHR() about having
current context bound?  Not being thread-safe (ie. being associated w/
current ctx) is pretty important to allow apps that create fences at
in-opportune times to not force a mid-frame flush for tilers.. :-(

BR,
-R


On Mon, Apr 4, 2016 at 8:10 AM, Marek Olšák  wrote:
> There is one problem with this: It doesn't allow calling
> pipe_context::fence_finish from another thread in a thread-safe
> manner, and it implies that fence_finish is generally NOT thread-safe.
> This thread safety is something I'd like to preserve.
>
> I would say that a flush flag telling the driver not to flush would be
> better, so that drivers which have per-screen fences and cheap flushes
> can ignore it.
>
> Marek
>
> On Fri, Apr 1, 2016 at 10:29 PM, Rob Clark  wrote:
>> From: Rob Clark 
>>
>> Since current thing is kinda horrible for tilers.  And that issue will
>> be even worse with EGL_ANDROID_native_fence_sync.
>>
>> Not wired up yet for gl syncobj, which can come later.  For now we just
>> need this with EGL.
>>
>> Signed-off-by: Rob Clark 
>> ---
>>  src/gallium/include/pipe/p_context.h  | 24 
>>  src/gallium/state_trackers/dri/dri2.c | 29 -
>>  2 files changed, 44 insertions(+), 9 deletions(-)
>>
>> diff --git a/src/gallium/include/pipe/p_context.h 
>> b/src/gallium/include/pipe/p_context.h
>> index 1c97e82..02a946b 100644
>> --- a/src/gallium/include/pipe/p_context.h
>> +++ b/src/gallium/include/pipe/p_context.h
>> @@ -457,6 +457,30 @@ struct pipe_context {
>>   unsigned flags);
>>
>> /**
>> +* Create a fence without necessarily flushing rendering.  Note
>> +* that if the driver implements this, it must also implement
>> +* ctx->fence_finish() which will be used instead of
>> +* screen->fence_finish() to give the driver an opportunity to
>> +* flush.
>> +*
>> +* This allows drivers, in particular tilers, to defer flush
>> +* until someone actually wants to wait on a fence.
>> +*
>> +* \param fence  if not NULL, an old fence to unref and transfer a
>> +*new fence reference to
>> +*/
>> +   void (*create_fence)(struct pipe_context *pipe,
>> +struct pipe_fence_handle **fence);
>> +
>> +   /**
>> +* Wait for the fence to finish.
>> +* \param timeout in nanoseconds (may be PIPE_TIMEOUT_INFINITE).
>> +*/
>> +   boolean (*fence_finish)(struct pipe_context *pipe,
>> +   struct pipe_fence_handle *fence,
>> +   uint64_t timeout);
>> +
>> +   /**
>>  * Create a view on a texture to be used by a shader stage.
>>  */
>> struct pipe_sampler_view * (*create_sampler_view)(struct pipe_context 
>> *ctx,
>> diff --git a/src/gallium/state_trackers/dri/dri2.c 
>> b/src/gallium/state_trackers/dri/dri2.c
>> index fb0a180..b66d885 100644
>> --- a/src/gallium/state_trackers/dri/dri2.c
>> +++ b/src/gallium/state_trackers/dri/dri2.c
>> @@ -1320,7 +1320,12 @@ dri2_create_fence(__DRIcontext *_ctx)
>> if (!fence)
>>return NULL;
>>
>> -   ctx->flush(ctx, &fence->pipe_fence, 0);
>> +   if (ctx->create_fence) {
>> +  debug_assert(ctx->fence_finish);
>> +  ctx->create_fence(ctx, &fence->pipe_fence);
>> +   } else {
>> +  ctx->flush(ctx, &fence->pipe_fence, 0);
>> +   }
>>
>> if (!fence->pipe_fence) {
>>FREE(fence);
>> @@ -1376,27 +1381,33 @@ static GLboolean
>>  dri2_client_wait_sync(__DRIcontext *_ctx, void *_fence, unsigned flags,
>>uint64_t timeout)
>>  {
>> +   struct pipe_context *ctx = dri_context(_ctx)->st->pipe;
>> struct dri2_fence *fence = (struct dri2_fence*)_fence;
>> struct dri_screen *driscreen = fence->driscreen;
>> struct pipe_screen *screen = driscreen->base.screen;
>> +   struct pipe_fence_handle *pipe_fence = NULL;
>>
>> -   /* No need to flush. The context was flushed when the fence was created. 
>> */
>> +   /* No need to flush. The context was flushed when the fence was created,
>> +* or the ctx implements ctx->fence_finish() which will take care of
>> +* flushing if required
>> +*/
>>
>> if (fence->pipe_fence)
>> -  return screen->fence_finish(screen, fence->pipe_fence, timeout);
>> +  pipe_fence = fence->pipe_fence;
>> else if (fence->cl_event) {
>> -  struct pipe_fence_handle *pipe_fence =
>> - driscreen->opencl_dri_event_get_fence(fence->cl_event);
>> -
>> -  if (pipe_fence)
>> - return screen->fence_finish(screen, pipe_fence, timeout);
>> -  else
>> +  pipe_fence = driscreen->opencl_dri_event_get_fence(fence->cl_event);
>> +  if (!pipe_fence)
>>   return driscreen->opencl_dri_event_wait(fence->cl_event, timeout);
>> }
>> else {
>>assert(0);
>>return false;
>> }
>> +
>> +   if (ctx->fence_finish)
>> +  return ctx->fence_finish(ctx, pipe_fence, timeout);
>> +
>> +   return sc

Re: [Mesa-dev] [PATCH 2/6] dri: extend fence extension to support native fd fences

2016-04-04 Thread Rob Clark
On Mon, Apr 4, 2016 at 7:49 AM, Marek Olšák  wrote:
> On Fri, Apr 1, 2016 at 10:29 PM, Rob Clark  wrote:
>> From: Rob Clark 
>>
>> Required to implement EGL_ANDROID_native_fence_sync.
>>
>> Signed-off-by: Rob Clark 
>> ---
>>  include/GL/internal/dri_interface.h | 44 
>> -
>>  1 file changed, 43 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/GL/internal/dri_interface.h 
>> b/include/GL/internal/dri_interface.h
>> index 2b49a29..8b0dadc 100644
>> --- a/include/GL/internal/dri_interface.h
>> +++ b/include/GL/internal/dri_interface.h
>> @@ -339,12 +339,19 @@ struct __DRI2throttleExtensionRec {
>>   */
>>
>>  #define __DRI2_FENCE "DRI2_Fence"
>> -#define __DRI2_FENCE_VERSION 1
>> +#define __DRI2_FENCE_VERSION 2
>>
>>  #define __DRI2_FENCE_TIMEOUT_INFINITE 0xllu
>>
>>  #define __DRI2_FENCE_FLAG_FLUSH_COMMANDS  (1 << 0)
>>
>> +/**
>> + * \name Capabilities that might be returned by 
>> __DRI2fenceExtensionRec::get_capabilities
>> + */
>> +/*@{*/
>> +#define __DRI_FENCE_CAP_NATIVE_FD 1
>> +/*@}*/
>> +
>>  struct __DRI2fenceExtensionRec {
>> __DRIextension base;
>>
>> @@ -389,6 +396,41 @@ struct __DRI2fenceExtensionRec {
>>  *sense with this function (right now there are none)
>>  */
>> void (*server_wait_sync)(__DRIcontext *ctx, void *fence, unsigned flags);
>> +
>> +   /**
>> +* Query for general capabilities of the driver that concern fences.
>> +* Returns a bitmask of __DRI_FENCE_CAP_x
>> +*
>> +* \since 2
>> +*/
>> +   unsigned (*get_capabilities)(__DRIscreen *screen);
>> +
>> +   /**
>> +* Create an fd (file descriptor) associated fence.  If the fence fd
>> +* is -1, this behaves similarly to create_fence() except that when
>> +* rendering is flushed the driver creates a fence fd.  Otherwise,
>> +* the driver wraps an existing fence fd.
>> +*
>> +* This is used to implement the EGL_ANDROID_native_fence_sync extension.
>> +*
>> +* \since 2
>> +*
>> +* \param ctx the context associated with the fence
>> +* \param fd  the fence fd or -1
>> +*/
>> +   void *(*create_fence_fd)(__DRIcontext *ctx, int fd);
>> +
>> +   /**
>> +* For fences created with create_fence_fd(), after rendering is flushed,
>> +* this retrieves the native fence fd.  Caller takes ownership of the
>> +* fd and will close() it when it is no longer needed.
>> +*
>> +* \since 2
>> +*
>> +* \param screen  the screen associated with the fence
>> +* \param fence   the fence
>> +*/
>> +   int (*get_fence_fd)(__DRIscreen *screen, void *fence);
>
> This can be even more generic. If you add a requirement that
> get_fence_fd must work with any fence, create_fence_fd() essentially
> becomes import_fence_fd() and create_fence() can be used if fd is -1.
>
> The reason is that we may be interested in adding support for
> cl_khr_gl_event in the future (MesaGL + HSA/OpenCL), which allows
> sharing any GL fence with CL, and we will most likely use fd fences.

Hmm, if you read the EGL_ANDROID_native_fence_sync extension, it
explicitly makes it clear that not *all* fences would need to be
convertible to a fd, to avoid needing to create a fd for every fence.
(See Issues #1.)

I'm still going back and forth on the kernel interface for this on the
driver side, but it could get awkward if you allow multiple 'struct
fence' objects created for the same seqno, so the interface I'll
probably end up going with makes your suggested change difficult to
implement.

Could you not just use the existing ANDROID_native_fence_sync
extension for GL + HSL/CL interop?

BR,
-R
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/6] gallium: wire up server_wait_sync

2016-04-04 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Fri, Apr 1, 2016 at 10:29 PM, Rob Clark  wrote:
> From: Rob Clark 
>
> This will be needed for explicit synchronization with devices outside
> the gpu, ie. EGL_ANDROID_native_fence_sync.
>
> Signed-off-by: Rob Clark 
> ---
>  src/gallium/include/pipe/p_context.h  | 6 ++
>  src/gallium/state_trackers/dri/dri2.c | 6 +-
>  2 files changed, 11 insertions(+), 1 deletion(-)
>
> diff --git a/src/gallium/include/pipe/p_context.h 
> b/src/gallium/include/pipe/p_context.h
> index 02a946b..d617ec4 100644
> --- a/src/gallium/include/pipe/p_context.h
> +++ b/src/gallium/include/pipe/p_context.h
> @@ -481,6 +481,12 @@ struct pipe_context {
> uint64_t timeout);
>
> /**
> +* Insert commands to have GPU wait for fence to be signaled.
> +*/
> +   void (*fence_server_sync)(struct pipe_context *pipe,
> + struct pipe_fence_handle *fence);
> +
> +   /**
>  * Create a view on a texture to be used by a shader stage.
>  */
> struct pipe_sampler_view * (*create_sampler_view)(struct pipe_context 
> *ctx,
> diff --git a/src/gallium/state_trackers/dri/dri2.c 
> b/src/gallium/state_trackers/dri/dri2.c
> index b66d885..6c8bb37 100644
> --- a/src/gallium/state_trackers/dri/dri2.c
> +++ b/src/gallium/state_trackers/dri/dri2.c
> @@ -1413,7 +1413,11 @@ dri2_client_wait_sync(__DRIcontext *_ctx, void 
> *_fence, unsigned flags,
>  static void
>  dri2_server_wait_sync(__DRIcontext *_ctx, void *_fence, unsigned flags)
>  {
> -   /* AFAIK, no driver currently supports parallel context execution. */
> +   struct pipe_context *ctx = dri_context(_ctx)->st->pipe;
> +   struct dri2_fence *fence = (struct dri2_fence*)_fence;
> +
> +   if (ctx->fence_server_sync)
> +  ctx->fence_server_sync(ctx, fence->pipe_fence);
>  }
>
>  static __DRI2fenceExtension dri2FenceExtension = {
> --
> 2.5.5
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/6] gallium: add way for drivers to create fences without flushing

2016-04-04 Thread Marek Olšák
There is one problem with this: It doesn't allow calling
pipe_context::fence_finish from another thread in a thread-safe
manner, and it implies that fence_finish is generally NOT thread-safe.
This thread safety is something I'd like to preserve.

I would say that a flush flag telling the driver not to flush would be
better, so that drivers which have per-screen fences and cheap flushes
can ignore it.

Marek

On Fri, Apr 1, 2016 at 10:29 PM, Rob Clark  wrote:
> From: Rob Clark 
>
> Since current thing is kinda horrible for tilers.  And that issue will
> be even worse with EGL_ANDROID_native_fence_sync.
>
> Not wired up yet for gl syncobj, which can come later.  For now we just
> need this with EGL.
>
> Signed-off-by: Rob Clark 
> ---
>  src/gallium/include/pipe/p_context.h  | 24 
>  src/gallium/state_trackers/dri/dri2.c | 29 -
>  2 files changed, 44 insertions(+), 9 deletions(-)
>
> diff --git a/src/gallium/include/pipe/p_context.h 
> b/src/gallium/include/pipe/p_context.h
> index 1c97e82..02a946b 100644
> --- a/src/gallium/include/pipe/p_context.h
> +++ b/src/gallium/include/pipe/p_context.h
> @@ -457,6 +457,30 @@ struct pipe_context {
>   unsigned flags);
>
> /**
> +* Create a fence without necessarily flushing rendering.  Note
> +* that if the driver implements this, it must also implement
> +* ctx->fence_finish() which will be used instead of
> +* screen->fence_finish() to give the driver an opportunity to
> +* flush.
> +*
> +* This allows drivers, in particular tilers, to defer flush
> +* until someone actually wants to wait on a fence.
> +*
> +* \param fence  if not NULL, an old fence to unref and transfer a
> +*new fence reference to
> +*/
> +   void (*create_fence)(struct pipe_context *pipe,
> +struct pipe_fence_handle **fence);
> +
> +   /**
> +* Wait for the fence to finish.
> +* \param timeout in nanoseconds (may be PIPE_TIMEOUT_INFINITE).
> +*/
> +   boolean (*fence_finish)(struct pipe_context *pipe,
> +   struct pipe_fence_handle *fence,
> +   uint64_t timeout);
> +
> +   /**
>  * Create a view on a texture to be used by a shader stage.
>  */
> struct pipe_sampler_view * (*create_sampler_view)(struct pipe_context 
> *ctx,
> diff --git a/src/gallium/state_trackers/dri/dri2.c 
> b/src/gallium/state_trackers/dri/dri2.c
> index fb0a180..b66d885 100644
> --- a/src/gallium/state_trackers/dri/dri2.c
> +++ b/src/gallium/state_trackers/dri/dri2.c
> @@ -1320,7 +1320,12 @@ dri2_create_fence(__DRIcontext *_ctx)
> if (!fence)
>return NULL;
>
> -   ctx->flush(ctx, &fence->pipe_fence, 0);
> +   if (ctx->create_fence) {
> +  debug_assert(ctx->fence_finish);
> +  ctx->create_fence(ctx, &fence->pipe_fence);
> +   } else {
> +  ctx->flush(ctx, &fence->pipe_fence, 0);
> +   }
>
> if (!fence->pipe_fence) {
>FREE(fence);
> @@ -1376,27 +1381,33 @@ static GLboolean
>  dri2_client_wait_sync(__DRIcontext *_ctx, void *_fence, unsigned flags,
>uint64_t timeout)
>  {
> +   struct pipe_context *ctx = dri_context(_ctx)->st->pipe;
> struct dri2_fence *fence = (struct dri2_fence*)_fence;
> struct dri_screen *driscreen = fence->driscreen;
> struct pipe_screen *screen = driscreen->base.screen;
> +   struct pipe_fence_handle *pipe_fence = NULL;
>
> -   /* No need to flush. The context was flushed when the fence was created. 
> */
> +   /* No need to flush. The context was flushed when the fence was created,
> +* or the ctx implements ctx->fence_finish() which will take care of
> +* flushing if required
> +*/
>
> if (fence->pipe_fence)
> -  return screen->fence_finish(screen, fence->pipe_fence, timeout);
> +  pipe_fence = fence->pipe_fence;
> else if (fence->cl_event) {
> -  struct pipe_fence_handle *pipe_fence =
> - driscreen->opencl_dri_event_get_fence(fence->cl_event);
> -
> -  if (pipe_fence)
> - return screen->fence_finish(screen, pipe_fence, timeout);
> -  else
> +  pipe_fence = driscreen->opencl_dri_event_get_fence(fence->cl_event);
> +  if (!pipe_fence)
>   return driscreen->opencl_dri_event_wait(fence->cl_event, timeout);
> }
> else {
>assert(0);
>return false;
> }
> +
> +   if (ctx->fence_finish)
> +  return ctx->fence_finish(ctx, pipe_fence, timeout);
> +
> +   return screen->fence_finish(screen, pipe_fence, timeout);
>  }
>
>  static void
> --
> 2.5.5
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nir: Use PRIi64 and PRIu64 instead of %ld and %lu.

2016-04-04 Thread Jose Fonseca

On 04/04/16 06:37, Jason Ekstrand wrote:


On Apr 3, 2016 7:51 PM, "Kenneth Graunke" mailto:kenn...@whitecape.org>> wrote:
 >
 > %ld and %lu aren't the right format specifiers for int64_t and uint64_t
 > on 32-bit (x86) systems.  They're %zu on Linux and %Iu on Windows.
 >
 > Use the standard C99 macros in hopes that they work everywhere.
 >
 > Signed-off-by: Kenneth Graunke mailto:kenn...@whitecape.org>>
 > ---
 >  src/compiler/nir/nir_search.c | 5 +++--
 >  1 file changed, 3 insertions(+), 2 deletions(-)
 >
 > diff --git a/src/compiler/nir/nir_search.c
b/src/compiler/nir/nir_search.c
 > index 6e63063..c17d47b 100644
 > --- a/src/compiler/nir/nir_search.c
 > +++ b/src/compiler/nir/nir_search.c
 > @@ -25,6 +25,7 @@
 >   *
 >   */
 >
 > +#include 
 >  #include "nir_search.h"
 >
 >  struct match_state {
 > @@ -483,7 +484,7 @@ construct_value(const nir_search_value *value,
 >   break;
 >
 >case nir_type_int:
 > - load->def.name  = ralloc_asprintf(load,
"%ld", c->data.i);
 > + load->def.name  = ralloc_asprintf(load,
"%" PRIi64, c->data.i);

Another option is to simply use lld and llu.  That should be 64-bits on
all the platforms we care about.


Yes, MSVC 2013 onwards also supports %ll : 
https://msdn.microsoft.com/en-us/library/56e442dc(v=vs.120).aspx .


But to avoid compiler warnings it's better to use `long long` for 
respective parameters too.


Jose
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] r600: Change default behaviour for undefined COLOR0

2016-04-04 Thread Axel Davy

On 03/04/2016 20:41, Roland Scheidegger wrote :

Am 03.04.2016 um 10:11 schrieb Axel Davy:

d3d 9 needs COLOR0 to be 1.0 on all channels when
undefined. 0.0 for the others is fine.
GL behaviour is undefined.


Not that I care what drivers use by default for undefined values, but it
probably would be better to fix that properly in the state tracker since
certainly gallium doesn't impose that default...

Roland


Implementing it properly in the state tracker is a bit annoying,
because of possible combinations of usage ff pipeline vs non ff pipeline.
It would be made easier if (0,0,0,0) was defined as the default in gallium,
so only COLOR0 would have to be checked, else all outputs/inputs have to 
be checked.


As it seems rather easy to fix in the driver, I chose that way.

There seems to be very few games relying on the behaviour.

Axel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/6] dri: extend fence extension to support native fd fences

2016-04-04 Thread Marek Olšák
On Fri, Apr 1, 2016 at 10:29 PM, Rob Clark  wrote:
> From: Rob Clark 
>
> Required to implement EGL_ANDROID_native_fence_sync.
>
> Signed-off-by: Rob Clark 
> ---
>  include/GL/internal/dri_interface.h | 44 
> -
>  1 file changed, 43 insertions(+), 1 deletion(-)
>
> diff --git a/include/GL/internal/dri_interface.h 
> b/include/GL/internal/dri_interface.h
> index 2b49a29..8b0dadc 100644
> --- a/include/GL/internal/dri_interface.h
> +++ b/include/GL/internal/dri_interface.h
> @@ -339,12 +339,19 @@ struct __DRI2throttleExtensionRec {
>   */
>
>  #define __DRI2_FENCE "DRI2_Fence"
> -#define __DRI2_FENCE_VERSION 1
> +#define __DRI2_FENCE_VERSION 2
>
>  #define __DRI2_FENCE_TIMEOUT_INFINITE 0xllu
>
>  #define __DRI2_FENCE_FLAG_FLUSH_COMMANDS  (1 << 0)
>
> +/**
> + * \name Capabilities that might be returned by 
> __DRI2fenceExtensionRec::get_capabilities
> + */
> +/*@{*/
> +#define __DRI_FENCE_CAP_NATIVE_FD 1
> +/*@}*/
> +
>  struct __DRI2fenceExtensionRec {
> __DRIextension base;
>
> @@ -389,6 +396,41 @@ struct __DRI2fenceExtensionRec {
>  *sense with this function (right now there are none)
>  */
> void (*server_wait_sync)(__DRIcontext *ctx, void *fence, unsigned flags);
> +
> +   /**
> +* Query for general capabilities of the driver that concern fences.
> +* Returns a bitmask of __DRI_FENCE_CAP_x
> +*
> +* \since 2
> +*/
> +   unsigned (*get_capabilities)(__DRIscreen *screen);
> +
> +   /**
> +* Create an fd (file descriptor) associated fence.  If the fence fd
> +* is -1, this behaves similarly to create_fence() except that when
> +* rendering is flushed the driver creates a fence fd.  Otherwise,
> +* the driver wraps an existing fence fd.
> +*
> +* This is used to implement the EGL_ANDROID_native_fence_sync extension.
> +*
> +* \since 2
> +*
> +* \param ctx the context associated with the fence
> +* \param fd  the fence fd or -1
> +*/
> +   void *(*create_fence_fd)(__DRIcontext *ctx, int fd);
> +
> +   /**
> +* For fences created with create_fence_fd(), after rendering is flushed,
> +* this retrieves the native fence fd.  Caller takes ownership of the
> +* fd and will close() it when it is no longer needed.
> +*
> +* \since 2
> +*
> +* \param screen  the screen associated with the fence
> +* \param fence   the fence
> +*/
> +   int (*get_fence_fd)(__DRIscreen *screen, void *fence);

This can be even more generic. If you add a requirement that
get_fence_fd must work with any fence, create_fence_fd() essentially
becomes import_fence_fd() and create_fence() can be used if fd is -1.

The reason is that we may be interested in adding support for
cl_khr_gl_event in the future (MesaGL + HSA/OpenCL), which allows
sharing any GL fence with CL, and we will most likely use fd fences.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/4] radeonsi: mark ARB_robust_buffer_access_behavior as supported

2016-04-04 Thread Bas Nieuwenhuizen
Signed-off-by: Bas Nieuwenhuizen 
---
 docs/GL3.txt   | 2 +-
 docs/relnotes/11.3.0.html  | 1 +
 src/gallium/drivers/radeonsi/si_pipe.c | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 6ea8d5c..d7e0a4b 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -177,7 +177,7 @@ GL 4.3, GLSL 4.30:
   GL_ARB_invalidate_subdata DONE (all drivers)
   GL_ARB_multi_draw_indirectDONE (i965, nvc0, 
r600, radeonsi, llvmpipe, softpipe)
   GL_ARB_program_interface_queryDONE (all drivers)
-  GL_ARB_robust_buffer_access_behavior  not started
+  GL_ARB_robust_buffer_access_behavior  DONE (radeonsi)
   GL_ARB_shader_image_size  DONE (i965, radeonsi)
   GL_ARB_shader_storage_buffer_object   DONE (i965, nvc0, 
radeonsi)
   GL_ARB_stencil_texturing  DONE (i965/gen8+, 
nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
diff --git a/docs/relnotes/11.3.0.html b/docs/relnotes/11.3.0.html
index 8624924..cb09dd7 100644
--- a/docs/relnotes/11.3.0.html
+++ b/docs/relnotes/11.3.0.html
@@ -46,6 +46,7 @@ Note: some of the new features are only available with 
certain drivers.
 
 GL_ARB_compute_shader on radeonsi
 GL_ARB_internalformat_query2 on all drivers
+GL_ARB_robust_buffer_access_behavior on radeonsi
 GL_ARB_shader_atomic_counters on radeonsi
 GL_ARB_shader_atomic_counter_ops on nvc0, radeonsi
 GL_ARB_shader_image_load_store on radeonsi, softpipe
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index e0fecac..afda4d5 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -308,6 +308,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
case PIPE_CAP_QUERY_MEMORY_INFO:
case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
+   case PIPE_CAP_ROBUST_BUFFER_ACCESS:
return 1;
 
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
@@ -358,7 +359,6 @@ static int si_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
case PIPE_CAP_QUERY_BUFFER_OBJECT:
-   case PIPE_CAP_ROBUST_BUFFER_ACCESS:
return 0;
 
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/4] radeonsi: use bounded indexing for samplers

2016-04-04 Thread Bas Nieuwenhuizen
Signed-off-by: Bas Nieuwenhuizen 
---
 src/gallium/drivers/radeonsi/si_shader.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index dd04748..392f439 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3781,7 +3781,10 @@ static void tex_fetch_ptrs(
const struct tgsi_full_src_register *reg = 
&emit_data->inst->Src[sampler_src];
LLVMValueRef ind_index;
 
-   ind_index = get_indirect_index(ctx, ®->Indirect, 
reg->Register.Index);
+   ind_index = get_bounded_indirect_index(ctx,
+  ®->Indirect,
+  reg->Register.Index,
+  SI_NUM_USER_SAMPLERS);
 
*res_ptr = get_sampler_desc(ctx, ind_index, DESC_IMAGE);
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] radeonsi: use bounded indexing for constant buffers

2016-04-04 Thread Bas Nieuwenhuizen
Signed-off-by: Bas Nieuwenhuizen 
---
 src/gallium/drivers/radeonsi/si_shader.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 28c7923..dd04748 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1393,8 +1393,9 @@ static LLVMValueRef fetch_constant(
if (reg->Register.Dimension && reg->Dimension.Indirect) {
LLVMValueRef ptr = LLVMGetParam(ctx->radeon_bld.main_fn, 
SI_PARAM_CONST_BUFFERS);
LLVMValueRef index;
-   index = get_indirect_index(ctx, ®->DimIndirect,
-  reg->Dimension.Index);
+   index = get_bounded_indirect_index(ctx, ®->DimIndirect,
+  reg->Dimension.Index,
+  SI_NUM_USER_CONST_BUFFERS);
bufp = build_indexed_load_const(ctx, ptr, index);
} else
bufp = ctx->const_buffers[buf];
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/4] expose ARB_robust_buffer_access_behavior

2016-04-04 Thread Bas Nieuwenhuizen
Signed-off-by: Bas Nieuwenhuizen 
---
 src/gallium/docs/source/screen.rst   | 4 +++-
 src/gallium/drivers/freedreno/freedreno_screen.c | 1 +
 src/gallium/drivers/i915/i915_screen.c   | 1 +
 src/gallium/drivers/ilo/ilo_screen.c | 1 +
 src/gallium/drivers/llvmpipe/lp_screen.c | 1 +
 src/gallium/drivers/nouveau/nv30/nv30_screen.c   | 1 +
 src/gallium/drivers/nouveau/nv50/nv50_screen.c   | 1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c   | 1 +
 src/gallium/drivers/r300/r300_screen.c   | 1 +
 src/gallium/drivers/r600/r600_pipe.c | 1 +
 src/gallium/drivers/radeonsi/si_pipe.c   | 1 +
 src/gallium/drivers/softpipe/sp_screen.c | 1 +
 src/gallium/drivers/svga/svga_screen.c   | 1 +
 src/gallium/drivers/swr/swr_screen.cpp   | 1 +
 src/gallium/drivers/vc4/vc4_screen.c | 1 +
 src/gallium/drivers/virgl/virgl_screen.c | 1 +
 src/gallium/include/pipe/p_defines.h | 1 +
 src/mesa/main/extensions_table.h | 1 +
 src/mesa/main/mtypes.h   | 1 +
 src/mesa/main/version.c  | 2 +-
 src/mesa/state_tracker/st_extensions.c   | 1 +
 21 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/src/gallium/docs/source/screen.rst 
b/src/gallium/docs/source/screen.rst
index 47a19de..02564bd 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -323,7 +323,9 @@ The integer capabilities:
 * ``PIPE_CAP_PCI_BUS``: Return the PCI bus number.
 * ``PIPE_CAP_PCI_DEVICE``: Return the PCI device number.
 * ``PIPE_CAP_PCI_FUNCTION``: Return the PCI function number.
-
+* ``PIPE_CAP_ROBUST_BUFFER_ACCESS``: Implementation uses bounds checking on
+  resource accesses by shader if the context is create with
+  PIPE_CONTEXT_ROBUST_BUFFER_ACCESS.
 
 .. _pipe_capf:
 
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c 
b/src/gallium/drivers/freedreno/freedreno_screen.c
index d47cb07..6bd3d81 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -255,6 +255,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+   case PIPE_CAP_ROBUST_BUFFER_ACCESS:
return 0;
 
case PIPE_CAP_MAX_VIEWPORTS:
diff --git a/src/gallium/drivers/i915/i915_screen.c 
b/src/gallium/drivers/i915/i915_screen.c
index f4aa310..b8ad3c8 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -269,6 +269,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap 
cap)
case PIPE_CAP_PCI_BUS:
case PIPE_CAP_PCI_DEVICE:
case PIPE_CAP_PCI_FUNCTION:
+   case PIPE_CAP_ROBUST_BUFFER_ACCESS:
   return 0;
 
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
diff --git a/src/gallium/drivers/ilo/ilo_screen.c 
b/src/gallium/drivers/ilo/ilo_screen.c
index 7812c82..ebe17a0 100644
--- a/src/gallium/drivers/ilo/ilo_screen.c
+++ b/src/gallium/drivers/ilo/ilo_screen.c
@@ -498,6 +498,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap 
param)
case PIPE_CAP_PCI_BUS:
case PIPE_CAP_PCI_DEVICE:
case PIPE_CAP_PCI_FUNCTION:
+   case PIPE_CAP_ROBUST_BUFFER_ACCESS:
   return 0;
 
case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c 
b/src/gallium/drivers/llvmpipe/lp_screen.c
index 2529b54..6d54685 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -319,6 +319,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum 
pipe_cap param)
case PIPE_CAP_PCI_BUS:
case PIPE_CAP_PCI_DEVICE:
case PIPE_CAP_PCI_FUNCTION:
+   case PIPE_CAP_ROBUST_BUFFER_ACCESS:
   return 0;
}
/* should only get here on unhandled cases */
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c 
b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index b105c6a..5e11c90 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -192,6 +192,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_PCI_BUS:
case PIPE_CAP_PCI_DEVICE:
case PIPE_CAP_PCI_FUNCTION:
+   case PIPE_CAP_ROBUST_BUFFER_ACCESS:
   return 0;
 
case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index ba5e500..12f35ce 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -245,6 +245,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_PCI_BUS:
case PIPE_CAP_PCI_DEVICE:
case PIPE_CAP_PCI_FUNCTION:
+   case PIPE_CAP_ROBUST_BUFFER_ACCESS:
   return 0;
 
case PIPE_CAP_VENDOR_ID:
diff --git a/src/galliu

[Mesa-dev] [PATCH 0/4] ARB_robust_buffer_access_behavior for radeonsi

2016-04-04 Thread Bas Nieuwenhuizen
This series implements ARb_robust_buffer_access_behavior for the radeonsi
driver.

There are some tests at:

https://github.com/BNieuwenhuizen/piglit

These have not been send yet as they depend on robust access context
support in waffle.

Bas Nieuwenhuizen (4):
  radeonsi: use bounded indexing for constant buffers
  radeonsi: use bounded indexing for samplers
  expose ARB_robust_buffer_access_behavior
  radeonsi: mark ARB_robust_buffer_access_behavior as supported

 docs/GL3.txt |  2 +-
 docs/relnotes/11.3.0.html|  1 +
 src/gallium/docs/source/screen.rst   |  4 +++-
 src/gallium/drivers/freedreno/freedreno_screen.c |  1 +
 src/gallium/drivers/i915/i915_screen.c   |  1 +
 src/gallium/drivers/ilo/ilo_screen.c |  1 +
 src/gallium/drivers/llvmpipe/lp_screen.c |  1 +
 src/gallium/drivers/nouveau/nv30/nv30_screen.c   |  1 +
 src/gallium/drivers/nouveau/nv50/nv50_screen.c   |  1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c   |  1 +
 src/gallium/drivers/r300/r300_screen.c   |  1 +
 src/gallium/drivers/r600/r600_pipe.c |  1 +
 src/gallium/drivers/radeonsi/si_pipe.c   |  1 +
 src/gallium/drivers/radeonsi/si_shader.c | 10 +++---
 src/gallium/drivers/softpipe/sp_screen.c |  1 +
 src/gallium/drivers/svga/svga_screen.c   |  1 +
 src/gallium/drivers/swr/swr_screen.cpp   |  1 +
 src/gallium/drivers/vc4/vc4_screen.c |  1 +
 src/gallium/drivers/virgl/virgl_screen.c |  1 +
 src/gallium/include/pipe/p_defines.h |  1 +
 src/mesa/main/extensions_table.h |  1 +
 src/mesa/main/mtypes.h   |  1 +
 src/mesa/main/version.c  |  2 +-
 src/mesa/state_tracker/st_extensions.c   |  1 +
 24 files changed, 32 insertions(+), 6 deletions(-)

-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] egl: add EGL_KHR_reusable_sync to egl_dri

2016-04-04 Thread Marek Olšák
This looks good in general. Just some small nitpicks below.

On Sat, Apr 2, 2016 at 1:46 AM, Dongwon Kim  wrote:
> This patch enables an EGL extension, EGL_KHR_reusable_sync.
> This new extension basically provides a way for multiple APIs or
> threads to be excuted synchronously via a "reusable sync"
> primitive shared by those threads/API calls.
>
> This was implemented based on the specification at
>
> https://www.khronos.org/registry/egl/extensions/KHR/EGL_KHR_reusable_sync.txt
>
> v2
> - use thread functions defined in C11/threads.h instead of
>   using direct pthread calls
> - make the timeout set with reference to CLOCK_MONOTONIC
> - cleaned up the way expiration time is calculated
> - (bug fix) in dri2_client_wait_sync, case EGL_SYNC_CL_EVENT_KHR
>   has been added.
> - (bug fix) in dri2_destroy_sync, return from cond_broadcast
>   call is now stored in 'err' intead of 'ret' to prevent 'ret'
>   from being reset to 'EGL_FALSE' even in successful case
> - corrected minor syntax problems
>
> Signed-off-by: Dongwon Kim 
> ---
>  src/egl/drivers/dri2/egl_dri2.c | 210 
> ++--
>  src/egl/drivers/dri2/egl_dri2.h |   2 +
>  src/egl/main/eglapi.c   |   8 ++
>  src/egl/main/eglsync.c  |   3 +-
>  4 files changed, 213 insertions(+), 10 deletions(-)
>
> diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
> index 8f50f0c..843cd53 100644
> --- a/src/egl/drivers/dri2/egl_dri2.c
> +++ b/src/egl/drivers/dri2/egl_dri2.c
> @@ -38,6 +38,8 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +#include 
>  #ifdef HAVE_LIBDRM
>  #include 
>  #include 
> @@ -623,6 +625,8 @@ dri2_setup_screen(_EGLDisplay *disp)
>   disp->Extensions.KHR_cl_event2 = EGL_TRUE;
> }
>
> +   disp->Extensions.KHR_reusable_sync = EGL_TRUE;
> +
> if (dri2_dpy->image) {
>if (dri2_dpy->image->base.version >= 10 &&
>dri2_dpy->image->getCapabilities != NULL) {
> @@ -2389,14 +2393,22 @@ dri2_egl_ref_sync(struct dri2_egl_sync *sync)
> p_atomic_inc(&sync->refcount);
>  }
>
> -static void
> +static EGLint

Since this function only returns EGL_TRUE, the return type can just be void.

>  dri2_egl_unref_sync(struct dri2_egl_display *dri2_dpy,
>  struct dri2_egl_sync *dri2_sync)
>  {
> if (p_atomic_dec_zero(&dri2_sync->refcount)) {
> -  dri2_dpy->fence->destroy_fence(dri2_dpy->dri_screen, dri2_sync->fence);
> +  if (dri2_sync->base.Type == EGL_SYNC_REUSABLE_KHR) {
> + cnd_destroy(&dri2_sync->cond);
> +  }
> +
> +  if (dri2_sync->fence)
> + dri2_dpy->fence->destroy_fence(dri2_dpy->dri_screen, 
> dri2_sync->fence);
> +
>free(dri2_sync);
> }
> +
> +   return EGL_TRUE;
>  }
>
>  static _EGLSync *
> @@ -2408,6 +2420,8 @@ dri2_create_sync(_EGLDriver *drv, _EGLDisplay *dpy,
> struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy);
> struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx);
> struct dri2_egl_sync *dri2_sync;
> +   EGLint ret;
> +   pthread_condattr_t attr;
>
> dri2_sync = calloc(1, sizeof(struct dri2_egl_sync));
> if (!dri2_sync) {
> @@ -2450,6 +2464,37 @@ dri2_create_sync(_EGLDriver *drv, _EGLDisplay *dpy,
>  dri2_sync->fence, 0, 0))
>   dri2_sync->base.SyncStatus = EGL_SIGNALED_KHR;
>break;
> +
> +   case EGL_SYNC_REUSABLE_KHR:
> +  /* intialize attr */
> +  ret = pthread_condattr_init(&attr);
> +
> +  if (ret) {
> + _eglError(EGL_BAD_ACCESS, "eglCreateSyncKHR");
> + free(dri2_sync);
> + return NULL;
> +  }
> +
> +  /* change clock attribute to CLOCK_MONOTONIC */
> +  ret = pthread_condattr_setclock(&attr, CLOCK_MONOTONIC);
> +
> +  if (ret) {
> + _eglError(EGL_BAD_ACCESS, "eglCreateSyncKHR");
> + free(dri2_sync);
> + return NULL;
> +  }
> +
> +  ret = pthread_cond_init(&dri2_sync->cond, &attr);
> +
> +  if (ret) {
> + _eglError(EGL_BAD_ACCESS, "eglCreateSyncKHR");
> + free(dri2_sync);
> + return NULL;
> +  }
> +
> +  /* initial status of reusable sync must be "unsignaled" */
> +  dri2_sync->base.SyncStatus = EGL_UNSIGNALED_KHR;
> +  break;
> }
>
> p_atomic_set(&dri2_sync->refcount, 1);
> @@ -2461,9 +2506,33 @@ dri2_destroy_sync(_EGLDriver *drv, _EGLDisplay *dpy, 
> _EGLSync *sync)
>  {
> struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy);
> struct dri2_egl_sync *dri2_sync = dri2_egl_sync(sync);
> +   EGLint ret = EGL_TRUE;
> +   EGLint err;
>
> -   dri2_egl_unref_sync(dri2_dpy, dri2_sync);
> -   return EGL_TRUE;
> +   /* if type of sync is EGL_SYNC_REUSABLE_KHR and it is not signaled yet,
> +* then unlock all threads possibly blocked by the reusable sync before
> +* destroying it.
> +*/
> +   if (dri2_sync->base.Type == EGL_SYNC_REUSABLE_KHR &&
> +   dri2_sync->base.SyncStatus == EGL_UNSIGNALED_KHR) {
> +  dri2_

Re: [Mesa-dev] [PATCH v2 22/23] nir: add a pass for lowering (un)pack_double_2x32

2016-04-04 Thread Samuel Iglesias Gonsálvez


On 01/04/16 21:30, Jason Ekstrand wrote:
> On Thu, Mar 31, 2016 at 3:00 AM, Samuel Iglesias Gonsálvez <
> sigles...@igalia.com> wrote:
> 
>> From: Connor Abbott 
>>
>> v2: Undo unintended change to the signature of
>> nir_normalize_cubemap_coords (Iago).
>>
>> v3: Move to compiler/nir (Iago)
>>
>> v4: Remove Authors from copyright header (Michael Schellenberger)
>>
>> Signed-off-by: Iago Toral Quiroga 
>> ---
>>  src/compiler/Makefile.sources   |   1 +
>>  src/compiler/glsl/Makefile.sources  |   1 +
>>  src/compiler/nir/Makefile.sources   |   1 +
>>  src/compiler/nir/nir.h  |   2 +
>>  src/compiler/nir/nir_lower_double_packing.c | 106
>> 
>>  5 files changed, 111 insertions(+)
>>  create mode 100644 src/compiler/nir/nir_lower_double_packing.c
>>
>> diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
>> index 0aee200..adb7caa 100644
>> --- a/src/compiler/Makefile.sources
>> +++ b/src/compiler/Makefile.sources
>> @@ -186,6 +186,7 @@ NIR_FILES = \
>> nir/nir_lower_alu_to_scalar.c \
>> nir/nir_lower_atomics.c \
>> nir/nir_lower_clip.c \
>> +   nir/nir_lower_double_packing.c \
>> nir/nir_lower_global_vars_to_local.c \
>> nir/nir_lower_gs_intrinsics.c \
>> nir/nir_lower_load_const_to_scalar.c \
>> diff --git a/src/compiler/glsl/Makefile.sources
>> b/src/compiler/glsl/Makefile.sources
>> index 538196a..fadfab8 100644
>> --- a/src/compiler/glsl/Makefile.sources
>> +++ b/src/compiler/glsl/Makefile.sources
>> @@ -38,6 +38,7 @@ NIR_FILES = \
>> nir/nir_lower_alu_to_scalar.c \
>> nir/nir_lower_atomics.c \
>> nir/nir_lower_clip.c \
>> +   nir/nir_lower_double_packing.c \
>> nir/nir_lower_global_vars_to_local.c \
>> nir/nir_lower_gs_intrinsics.c \
>> nir/nir_lower_load_const_to_scalar.c \
>> diff --git a/src/compiler/nir/Makefile.sources
>> b/src/compiler/nir/Makefile.sources
>> index 3474302..488040d 100644
>> --- a/src/compiler/nir/Makefile.sources
>> +++ b/src/compiler/nir/Makefile.sources
>> @@ -29,6 +29,7 @@ NIR_FILES = \
>> nir_lower_alu_to_scalar.c \
>> nir_lower_atomics.c \
>> nir_lower_clip.c \
>> +   nir_lower_double_packing.c \
>> nir_lower_global_vars_to_local.c \
>> nir_lower_gs_intrinsics.c \
>> nir_lower_load_const_to_scalar.c \
>> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
>> index ac709d5..4c9c16a 100644
>> --- a/src/compiler/nir/nir.h
>> +++ b/src/compiler/nir/nir.h
>> @@ -2278,6 +2278,8 @@ void nir_lower_to_source_mods(nir_shader *shader);
>>
>>  bool nir_lower_gs_intrinsics(nir_shader *shader);
>>
>> +void nir_lower_double_pack(nir_shader *shader);
>> +
>>  bool nir_normalize_cubemap_coords(nir_shader *shader);
>>
>>  void nir_live_ssa_defs_impl(nir_function_impl *impl);
>> diff --git a/src/compiler/nir/nir_lower_double_packing.c
>> b/src/compiler/nir/nir_lower_double_packing.c
>> new file mode 100644
>> index 000..7d8a4de
>> --- /dev/null
>> +++ b/src/compiler/nir/nir_lower_double_packing.c
>> @@ -0,0 +1,106 @@
>> +/*
>> + * Copyright © 2015 Intel Corporation
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the
>> "Software"),
>> + * to deal in the Software without restriction, including without
>> limitation
>> + * the rights to use, copy, modify, merge, publish, distribute,
>> sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including the
>> next
>> + * paragraph) shall be included in all copies or substantial portions of
>> the
>> + * Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>> EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>> MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
>> SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
>> OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
>> DEALINGS
>> + * IN THE SOFTWARE.
>> + *
>> + */
>> +
>> +#include "nir.h"
>> +#include "nir_builder.h"
>> +
>> +/*
>> + * lowers:
>> + *
>> + * packDouble2x32(foo) -> packDouble2x32Split(foo.x, foo.y)
>> + * unpackDouble2x32(foo) -> vec2(unpackDouble2x32_x(foo),
>> unpackDouble2x32_y(foo))
>> + */
>> +
>> +static nir_ssa_def *
>> +component(nir_builder *b, nir_ssa_def *src, unsigned component)
>> +{
>> +   return nir_swizzle(b, src, (unsigned[]) {component}, 1, true);
>> +}
>>
> 
> This already exists in nir_builder.  It's called nir_channel
> 

OK

> 
>> +
>> +static nir_ssa_def *
>> +lower_pack_double(nir

[Mesa-dev] [PATCH] glsl: Lower variable indexing of system value arrays; treat like inputs.

2016-04-04 Thread Kenneth Graunke
Some system values, such as gl_SampleMaskIn[], may be arrays.
lower_variable_index_to_cond_assign() did not handle this case and would
hit an unreachable() assert.

For now, lower when EmitNoIndirectInput is set.  We could potentially
add another flag for system values, but I'm not sure how useful that
would be.

Fixes Piglit's new samplemaskin-indirect test.  Also fixes many ES31-CTS
tests when OES_sample_variables is enabled.

Cc: mesa-sta...@lists.freedesktop.org
Signed-off-by: Kenneth Graunke 
---
 src/compiler/glsl/lower_variable_index_to_cond_assign.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp 
b/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp
index 278d545..2391089 100644
--- a/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp
+++ b/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp
@@ -385,6 +385,7 @@ public:
   case ir_var_const_in:
  return this->lower_temps;
 
+  case ir_var_system_value:
   case ir_var_shader_in:
  /* The input array size is unknown at compiler time for non-patch
   * inputs in TCS and TES. The arrays are sized to
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965: Allow 8x MSAA on >= 64bpp formats on Gen8+.

2016-04-04 Thread Kenneth Graunke
See commit 3b0279a69 - this restriction is documented in the "Surface
Format" field of RENDER_SURFACE_STATE.

Looking at newer documentation, this restriction appears to exist on
Haswell, but no longer applies on Gen8+.

Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/brw_surface_formats.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.c 
b/src/mesa/drivers/dri/i965/brw_surface_formats.c
index 3c0b23b..ba9b5b9 100644
--- a/src/mesa/drivers/dri/i965/brw_surface_formats.c
+++ b/src/mesa/drivers/dri/i965/brw_surface_formats.c
@@ -806,7 +806,8 @@ brw_render_target_supported(struct brw_context *brw,
/* Under some conditions, MSAA is not supported for formats whose width is
 * more than 64 bits.
 */
-   if (rb->NumSamples > 0 && _mesa_get_format_bytes(format) > 8) {
+   if (brw->gen < 8 &&
+   rb->NumSamples > 0 && _mesa_get_format_bytes(format) > 8) {
   /* Gen6: MSAA on >64 bit formats is unsupported. */
   if (brw->gen <= 6)
  return false;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/6] glsl: remove unused buffer block splitting function

2016-04-04 Thread Timothy Arceri
On Sun, 2016-04-03 at 23:31 -0700, Kenneth Graunke wrote:
> On Sunday, April 3, 2016 9:16:29 PM PDT Timothy Arceri wrote:
> > 
> > ---
> >  src/compiler/glsl/linker.cpp | 70 
> 
> > 
> >  1 file changed, 70 deletions(-)
> Hrm?
> 
>    glsl: remove unused buffer block splitting function
> 
> But it /is/ used...you just delete the use here.  Maybe it's just
> unnecessary?

I'm intending to squash patches 1-4 to avoid code churn so didn't put
to much time into the commit messages but yes it will now be unrequired
:)

> 
> > 
> > 
> > diff --git a/src/compiler/glsl/linker.cpp
> > b/src/compiler/glsl/linker.cpp
> > index 75a2afa..2f152d6 100644
> > --- a/src/compiler/glsl/linker.cpp
> > +++ b/src/compiler/glsl/linker.cpp
> > @@ -4122,49 +4122,6 @@ link_assign_subroutine_types(struct
> > gl_shader_program 
> *prog)
> > 
> >  }
> >  
> >  static void
> > -split_ubos_and_ssbos(void *mem_ctx,
> > - struct gl_uniform_block **s_blks,
> > - struct gl_uniform_block *p_blks,
> > - unsigned num_blocks,
> > - struct gl_uniform_block ***ubos,
> > - unsigned *num_ubos,
> > - struct gl_uniform_block ***ssbos,
> > - unsigned *num_ssbos)
> > -{
> > -   unsigned num_ubo_blocks = 0;
> > -   unsigned num_ssbo_blocks = 0;
> > -
> > -   /* Are we spliting the list of blocks for the shader or the
> > program */
> > -   bool is_shader = p_blks == NULL;
> > -
> > -   for (unsigned i = 0; i < num_blocks; i++) {
> > -  if (is_shader ? s_blks[i]->IsShaderStorage : 
> p_blks[i].IsShaderStorage)
> > 
> > - num_ssbo_blocks++;
> > -  else
> > - num_ubo_blocks++;
> > -   }
> > -
> > -   *ubos = ralloc_array(mem_ctx, gl_uniform_block *,
> > num_ubo_blocks);
> > -   *num_ubos = 0;
> > -
> > -   *ssbos = ralloc_array(mem_ctx, gl_uniform_block *,
> > num_ssbo_blocks);
> > -   *num_ssbos = 0;
> > -
> > -   for (unsigned i = 0; i < num_blocks; i++) {
> > -  struct gl_uniform_block *blk = is_shader ? s_blks[i] :
> > &p_blks[i];
> > -  if (blk->IsShaderStorage) {
> > - (*ssbos)[*num_ssbos] = blk;
> > - (*num_ssbos)++;
> > -  } else {
> > - (*ubos)[*num_ubos] = blk;
> > - (*num_ubos)++;
> > -  }
> > -   }
> > -
> > -   assert(*num_ubos + *num_ssbos == num_blocks);
> > -}
> > -
> > -static void
> >  set_always_active_io(exec_list *ir, ir_variable_mode io_mode)
> >  {
> > assert(io_mode == ir_var_shader_in || io_mode ==
> > ir_var_shader_out);
> > @@ -4701,33 +4658,6 @@ link_shaders(struct gl_context *ctx, struct 
> gl_shader_program *prog)
> > 
> >   has_xfb_qualifiers))
> >    goto done;
> >  
> > -   /* Split BufferInterfaceBlocks into UniformBlocks and 
> ShaderStorageBlocks
> > 
> > -* for gl_shader_program and gl_shader, so that drivers that
> > need 
> separate
> > 
> > -* index spaces for each set can have that.
> > -*/
> > -   for (unsigned i = MESA_SHADER_VERTEX; i < MESA_SHADER_STAGES;
> > i++) {
> > -  if (prog->_LinkedShaders[i] != NULL) {
> > - gl_shader *sh = prog->_LinkedShaders[i];
> > - split_ubos_and_ssbos(sh,
> > -  sh->BufferInterfaceBlocks,
> > -  NULL,
> > -  sh->NumBufferInterfaceBlocks,
> > -  &sh->UniformBlocks,
> > -  &sh->NumUniformBlocks,
> > -  &sh->ShaderStorageBlocks,
> > -  &sh->NumShaderStorageBlocks);
> > -  }
> > -   }
> > -
> > -   split_ubos_and_ssbos(prog,
> > -NULL,
> > -prog->BufferInterfaceBlocks,
> > -prog->NumBufferInterfaceBlocks,
> > -&prog->UniformBlocks,
> > -&prog->NumUniformBlocks,
> > -&prog->ShaderStorageBlocks,
> > -&prog->NumShaderStorageBlocks);
> > -
> > update_array_sizes(prog);
> > link_assign_uniform_locations(prog, ctx-
> > >Const.UniformBooleanTrue,
> >   num_explicit_uniform_locs,
> > 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/6] glsl: make use of separate UBO and SSBO lists

2016-04-04 Thread Timothy Arceri
On Sun, 2016-04-03 at 23:27 -0700, Kenneth Graunke wrote:
> On Sunday, April 3, 2016 9:16:28 PM PDT Timothy Arceri wrote:
> [snip]
> > 
> > @@ -3295,8 +3296,8 @@ should_add_buffer_variable(struct
> > gl_shader_program 
> *shProg,
> > 
> > if (type != GL_BUFFER_VARIABLE)
> >    return true;
> >  
> > -   for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++)
> > {
> > -  const char *block_name = shProg-
> > >BufferInterfaceBlocks[i].Name;
> > +   for (unsigned i = 0; i < shProg->NumShaderStorageBlocks; i++) {
> > +  const char *block_name = shProg-
> > >ShaderStorageBlocks[i].Name;
> >    block_name_len = strlen(block_name);
> It looks like should_add_buffer_variable() is used for both uniform
> blocks and shader storage blocks.  I don't think this is correct.
> 
> The other hunks look fine.

    if (type != GL_BUFFER_VARIABLE)
       return true;

Above returns true for uniform blocks
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 00/23] NIR: more bit-size related patches

2016-04-04 Thread Samuel Iglesias Gonsálvez


On 01/04/16 22:48, Jason Ekstrand wrote:
> On Thu, Mar 31, 2016 at 2:59 AM, Samuel Iglesias Gonsálvez <
> sigles...@igalia.com> wrote:
> 
>> Hello,
>>
>> This is the second version of this patch series [0].
>>
>> In case you prefer a repository, it is available here [1]:
>>
>> $ git clone -b nir-bit-size-fixes-2.0 https://github.com/Igalia/mesa.git
>>
>> Thanks,
>>
>> Sam
>>
>> [0] https://lists.freedesktop.org/archives/mesa-dev/2016-March/110465.html
>> [1] https://github.com/Igalia/mesa/tree/nir-bit-size-fixes-2.0
>>
>> Connor Abbott (13):
>>   nir/from_ssa: adapt to different bit sizes
>>   nir/locals_to_regs: adapt to different bit sizes
>>   nir/lower_vec: adapt to different bit sizes
>>   nir/glsl_to_nir: support doubles
>>   nir/print: add support for printing doubles and bitsize
>>   nir: handle doubles in nir_deref_get_const_initializer_load()
>>   nir/instr_set: handle 64-bit bit-sizes
>>   nir/split_var_copies: handle doubles
>>   nir: add support for d2f and f2d
>>   nir: add support for (un)pack_double_2x32
>>   nir: don't try to scalarize unpack_double_2x32
>>   nir: add split versions of (un)pack_double_2x32
>>   nir: add a pass for lowering (un)pack_double_2x32
>>
>> Iago Toral Quiroga (7):
>>   nir/lower_to_source_mods: Handle different bit sizes
>>   nir/lower_load_const_to_scalar: suppport doubles and multiple bit
>> sizes
>>   nir/glsl_to_nir: set the bit-size in the result of evaluate_rvalue
>>   nir/glsl_to_nir: set bit_size on ssbo_load result
>>   nir: add d2i, d2u, d2b opcodes
>>   nir: add i2d and u2d opcodes
>>   nir/lower_to_source_mod: Skip unsafe operations
>>
>> Samuel Iglesias Gonsálvez (3):
>>   nir: add bit_size info to nir_ssa_undef_instr_create()
>>   nir: add bit_size info to nir_load_const_instr_create()
>>   nir: verify destination bit size when checking algebraic optimizations
>>
> 
> I looked through it today and, by and large, it looks pretty good.  I agree
> with Connor on Patch 21.  I would like to see patch 13 reworked as per
> those comments.  I think we should drop patch 23 for now (it needs to go in
> a series with opt_algebraic changes to use it).
> 

OK!

> On the rest of the patches, I had mostly minor comments.  Assuming the
> comments are addressed, those patches are
> 
> Reviewed-by: Jason Ekstrand 
> 

Thanks a lot for your review!

Sam

> 
>>  src/compiler/Makefile.sources |   1 +
>>  src/compiler/glsl/Makefile.sources|   1 +
>>  src/compiler/nir/Makefile.sources |   1 +
>>  src/compiler/nir/glsl_to_nir.cpp  |  89 --
>>  src/compiler/nir/nir.c|  19 +++-
>>  src/compiler/nir/nir.h|   8 +-
>>  src/compiler/nir/nir_algebraic.py |  11 ++-
>>  src/compiler/nir/nir_builder.h|   4 +-
>>  src/compiler/nir/nir_clone.c  |   6 +-
>>  src/compiler/nir/nir_control_flow.c   |   5 +-
>>  src/compiler/nir/nir_from_ssa.c   |   2 +
>>  src/compiler/nir/nir_instr_set.c  |  22 -
>>  src/compiler/nir/nir_lower_alu_to_scalar.c|   3 +
>>  src/compiler/nir/nir_lower_atomics.c  |   5 +-
>>  src/compiler/nir/nir_lower_double_packing.c   | 106
>> ++
>>  src/compiler/nir/nir_lower_load_const_to_scalar.c |   8 +-
>>  src/compiler/nir/nir_lower_locals_to_regs.c   |   3 +-
>>  src/compiler/nir/nir_lower_to_source_mods.c   |  27 +-
>>  src/compiler/nir/nir_lower_vars_to_ssa.c  |   3 +-
>>  src/compiler/nir/nir_lower_vec_to_movs.c  |   1 +
>>  src/compiler/nir/nir_opcodes.py   |  72 +++
>>  src/compiler/nir/nir_opt_constant_folding.c   |   4 +-
>>  src/compiler/nir/nir_phi_builder.c|   3 +-
>>  src/compiler/nir/nir_print.c  |  17 +++-
>>  src/compiler/nir/nir_search.c |   5 +-
>>  src/compiler/nir/nir_split_var_copies.c   |   2 +
>>  src/compiler/nir/nir_to_ssa.c |   2 +-
>>  src/gallium/auxiliary/nir/tgsi_to_nir.c   |   2 +-
>>  28 files changed, 366 insertions(+), 66 deletions(-)
>>  create mode 100644 src/compiler/nir/nir_lower_double_packing.c
>>
>> --
>> 2.5.0
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 13/23] nir/glsl_to_nir: set the bit-size in the result of evaluate_rvalue

2016-04-04 Thread Samuel Iglesias Gonsálvez


On 01/04/16 22:45, Jason Ekstrand wrote:
> On Thu, Mar 31, 2016 at 3:00 AM, Samuel Iglesias Gonsálvez <
> sigles...@igalia.com> wrote:
> 
>> From: Iago Toral Quiroga 
>>
>> Found while testing UBO loads in scenarios like this:
>>
>> (assign (x) (var_ref vec_ctor)
>>   (expression float d2f
>>   (expression double ubo_load (constant uint (0)) (constant uint
>> (0)
>>
>> Without this patch, the src expression of d2f would be evaluated to a
>> bitsize
>> of 32, which is not correct and would hit an assert in nir_validate.
>> ---
>>  src/compiler/nir/glsl_to_nir.cpp | 1 +
>>  1 file changed, 1 insertion(+)
>>
>> diff --git a/src/compiler/nir/glsl_to_nir.cpp
>> b/src/compiler/nir/glsl_to_nir.cpp
>> index c77b430..8c75843 100644
>> --- a/src/compiler/nir/glsl_to_nir.cpp
>> +++ b/src/compiler/nir/glsl_to_nir.cpp
>> @@ -1190,6 +1190,7 @@ nir_visitor::evaluate_rvalue(ir_rvalue* ir)
>>load_instr->dest.ssa.bit_size =
>> glsl_get_bit_size(ir->type->base_type);
>> }
>>
>> +   this->result->bit_size = glsl_get_bit_size(ir->type->base_type);
>>
> 
> I don't think this is the correct solution.  Make everything 32-bit and
> then stomp it various places to fix bugs.  We need to make every instance
> if init_ssa_dest do the right thing (I would recommend doing a full
> audit).  In particular, add_instr probably needs a bit_size parameter.
> 

Right. I will do the audit.

Thanks,

Sam

> 
>> return this->result;
>>  }
>>
>> --
>> 2.5.0
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 23/23] nir: verify destination bit size when checking algebraic optimizations

2016-04-04 Thread Samuel Iglesias Gonsálvez


On 01/04/16 21:35, Jason Ekstrand wrote:
> Unless I'm missing something, this series doesn't contain anything that
> uses this patch.  Let's drop it for now and put it in with whatever adds
> the actual nir_opt_algebraic changes.
> 

Yeah, the user of this will be included in next patch series. I will
move this patch to there.

> Another option would be to silently bail if nir_search tries to create an
> expression where the opcode has an explicit bit size and it doesn't match
> the bit size that it needs.  I'm not sure what the best thing to do is.
> I'll think on it.
> 

OK.

Thanks,

Sam

> On Thu, Mar 31, 2016 at 3:00 AM, Samuel Iglesias Gonsálvez <
> sigles...@igalia.com> wrote:
> 
>> Some instructions (like flrp in i965) cannot be lowered depending on the
>> bit size because it doesn't support all bit sizes.
>>
>> If the bit size field is defined in nir_opt_algebraic.py, take it into
>> account.
>>
>> Signed-off-by: Samuel Iglesias Gonsálvez 
>> ---
>>  src/compiler/nir/nir_algebraic.py | 11 ++-
>>  1 file changed, 10 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/compiler/nir/nir_algebraic.py
>> b/src/compiler/nir/nir_algebraic.py
>> index d05564f..9c679ac 100644
>> --- a/src/compiler/nir/nir_algebraic.py
>> +++ b/src/compiler/nir/nir_algebraic.py
>> @@ -180,6 +180,11 @@ class SearchAndReplace(object):
>>else:
>>   self.condition = 'true'
>>
>> +  if len(transform) > 3:
>> + self.bit_size = transform[3]
>> +  else:
>> + self.bit_size = 0
>> +
>>if self.condition not in condition_list:
>>   condition_list.append(self.condition)
>>self.condition_index = condition_list.index(self.condition)
>> @@ -208,6 +213,7 @@ struct transform {
>> const nir_search_expression *search;
>> const nir_search_value *replace;
>> unsigned condition_offset;
>> +   unsigned bit_size;
>>  };
>>
>>  struct opt_state {
>> @@ -226,7 +232,7 @@ struct opt_state {
>>
>>  static const struct transform ${pass_name}_${opcode}_xforms[] = {
>>  % for xform in xform_list:
>> -   { &${xform.search.name}, ${xform.replace.c_ptr},
>> ${xform.condition_index} },
>> +   { &${xform.search.name}, ${xform.replace.c_ptr},
>> ${xform.condition_index}, ${xform.bit_size}},
>>  % endfor
>>  };
>>  % endfor
>> @@ -249,6 +255,9 @@ ${pass_name}_block(nir_block *block, void *void_state)
>>case nir_op_${opcode}:
>>   for (unsigned i = 0; i <
>> ARRAY_SIZE(${pass_name}_${opcode}_xforms); i++) {
>>  const struct transform *xform =
>> &${pass_name}_${opcode}_xforms[i];
>> +if (xform->bit_size != 0 &&
>> +alu->dest.dest.ssa.bit_size != xform->bit_size)
>> +   continue;
>>  if (state->condition_flags[xform->condition_offset] &&
>>  nir_replace_instr(alu, xform->search, xform->replace,
>>state->mem_ctx)) {
>> --
>> 2.5.0
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 03/23] nir: add bit_size info to nir_ssa_undef_instr_create()

2016-04-04 Thread Samuel Iglesias Gonsálvez


On 01/04/16 21:02, Jason Ekstrand wrote:
> On Thu, Mar 31, 2016 at 2:59 AM, Samuel Iglesias Gonsálvez <
> sigles...@igalia.com> wrote:
> 
>> Signed-off-by: Samuel Iglesias Gonsálvez 
>> ---
>>  src/compiler/nir/glsl_to_nir.cpp | 2 +-
>>  src/compiler/nir/nir.c   | 6 --
>>  src/compiler/nir/nir.h   | 3 ++-
>>  src/compiler/nir/nir_builder.h   | 2 +-
>>  src/compiler/nir/nir_clone.c | 3 ++-
>>  src/compiler/nir/nir_control_flow.c  | 5 +++--
>>  src/compiler/nir/nir_lower_vars_to_ssa.c | 3 ++-
>>  src/compiler/nir/nir_phi_builder.c   | 3 ++-
>>  src/compiler/nir/nir_to_ssa.c| 2 +-
>>  9 files changed, 18 insertions(+), 11 deletions(-)
>>
>> diff --git a/src/compiler/nir/glsl_to_nir.cpp
>> b/src/compiler/nir/glsl_to_nir.cpp
>> index 1fac481..c35ec7e 100644
>> --- a/src/compiler/nir/glsl_to_nir.cpp
>> +++ b/src/compiler/nir/glsl_to_nir.cpp
>> @@ -735,7 +735,7 @@ nir_visitor::visit(ir_call *ir)
>>case nir_intrinsic_image_samples:
>>case nir_intrinsic_image_size: {
>>   nir_ssa_undef_instr *instr_undef =
>> -nir_ssa_undef_instr_create(shader, 1);
>> +nir_ssa_undef_instr_create(shader, 1, 32);
>>   nir_builder_instr_insert(&b, &instr_undef->instr);
>>
>>   /* Set the image variable dereference. */
>> diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
>> index b67916d..e45f727 100644
>> --- a/src/compiler/nir/nir.c
>> +++ b/src/compiler/nir/nir.c
>> @@ -558,12 +558,14 @@ nir_parallel_copy_instr_create(nir_shader *shader)
>>  }
>>
>>  nir_ssa_undef_instr *
>> -nir_ssa_undef_instr_create(nir_shader *shader, unsigned num_components)
>> +nir_ssa_undef_instr_create(nir_shader *shader,
>> +   unsigned num_components,
>> +   unsigned bit_size)
>>  {
>> nir_ssa_undef_instr *instr = ralloc(shader, nir_ssa_undef_instr);
>> instr_init(&instr->instr, nir_instr_type_ssa_undef);
>>
>> -   nir_ssa_def_init(&instr->instr, &instr->def, num_components, 32, NULL);
>> +   nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size,
>> NULL);
>>
>> return instr;
>>  }
>> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
>> index c19ae59..3a4c771 100644
>> --- a/src/compiler/nir/nir.h
>> +++ b/src/compiler/nir/nir.h
>> @@ -1845,7 +1845,8 @@ nir_phi_instr *nir_phi_instr_create(nir_shader
>> *shader);
>>  nir_parallel_copy_instr *nir_parallel_copy_instr_create(nir_shader
>> *shader);
>>
>>  nir_ssa_undef_instr *nir_ssa_undef_instr_create(nir_shader *shader,
>> -unsigned num_components);
>> +unsigned num_components,
>> +unsigned bit_size);
>>
>>  nir_deref_var *nir_deref_var_create(void *mem_ctx, nir_variable *var);
>>  nir_deref_array *nir_deref_array_create(void *mem_ctx);
>> diff --git a/src/compiler/nir/nir_builder.h
>> b/src/compiler/nir/nir_builder.h
>> index 3dc7c25..a894aba 100644
>> --- a/src/compiler/nir/nir_builder.h
>> +++ b/src/compiler/nir/nir_builder.h
>> @@ -78,7 +78,7 @@ static inline nir_ssa_def *
>>  nir_ssa_undef(nir_builder *build, unsigned num_components, unsigned
>> bit_size)
>>  {
>> nir_ssa_undef_instr *undef =
>> -  nir_ssa_undef_instr_create(build->shader, num_components);
>> +  nir_ssa_undef_instr_create(build->shader, num_components, bit_size);
>> undef->def.bit_size = bit_size;
>> if (!undef)
>>return NULL;
>> diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c
>> index 7d2e383..a3d467d 100644
>> --- a/src/compiler/nir/nir_clone.c
>> +++ b/src/compiler/nir/nir_clone.c
>> @@ -372,7 +372,8 @@ static nir_ssa_undef_instr *
>>  clone_ssa_undef(clone_state *state, const nir_ssa_undef_instr *sa)
>>  {
>> nir_ssa_undef_instr *nsa =
>> -  nir_ssa_undef_instr_create(state->ns, sa->def.num_components);
>> +  nir_ssa_undef_instr_create(state->ns, sa->def.num_components,
>> + sa->def.bit_size);
>>
>> add_remap(state, &nsa->def, &sa->def);
>>
>> diff --git a/src/compiler/nir/nir_control_flow.c
>> b/src/compiler/nir/nir_control_flow.c
>> index 33b06d0..6d9fa1b 100644
>> --- a/src/compiler/nir/nir_control_flow.c
>> +++ b/src/compiler/nir/nir_control_flow.c
>> @@ -281,7 +281,8 @@ insert_phi_undef(nir_block *block, nir_block *pred)
>>nir_phi_instr *phi = nir_instr_as_phi(instr);
>>nir_ssa_undef_instr *undef =
>>   nir_ssa_undef_instr_create(ralloc_parent(phi),
>> -phi->dest.ssa.num_components);
>> +phi->dest.ssa.num_components,
>> +32);
>>
> 
> Should be phi->dest.ssa.bit_size
> 
> 
>>nir_instr_insert_before_cf_list(&impl->body, &undef->instr);
>>nir_phi_src *src = ralloc(phi, nir_phi_src);
>>src->pred

Re: [Mesa-dev] [PATCH v2 10/23] nir: handle doubles in nir_deref_get_const_initializer_load()

2016-04-04 Thread Samuel Iglesias Gonsálvez


On 01/04/16 21:11, Jason Ekstrand wrote:
> On Thu, Mar 31, 2016 at 2:59 AM, Samuel Iglesias Gonsálvez <
> sigles...@igalia.com> wrote:
> 
>> From: Connor Abbott 
>>
>> ---
>>  src/compiler/nir/nir.c | 5 +
>>  1 file changed, 5 insertions(+)
>>
>> diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
>> index 56a5009..35ad2b3 100644
>> --- a/src/compiler/nir/nir.c
>> +++ b/src/compiler/nir/nir.c
>> @@ -698,6 +698,8 @@ nir_deref_get_const_initializer_load(nir_shader
>> *shader, nir_deref_var *deref)
>>nir_load_const_instr_create(shader,
>> glsl_get_vector_elements(tail->type),
>>32);
>>
>> +   load->def.bit_size = glsl_get_bit_size(glsl_get_base_type(tail->type));
>>
> 
> This should just be passed into the load_const_initializer_create() above.
> 

Oh, I missed it.

Thanks!

Sam

> 
>> +
>> matrix_offset *= load->def.num_components;
>> for (unsigned i = 0; i < load->def.num_components; i++) {
>>switch (glsl_get_base_type(tail->type)) {
>> @@ -706,6 +708,9 @@ nir_deref_get_const_initializer_load(nir_shader
>> *shader, nir_deref_var *deref)
>>case GLSL_TYPE_UINT:
>>   load->value.u32[i] = constant->value.u[matrix_offset + i];
>>   break;
>> +  case GLSL_TYPE_DOUBLE:
>> + load->value.f64[i] = constant->value.d[matrix_offset + i];
>> + break;
>>case GLSL_TYPE_BOOL:
>>   load->value.u32[i] = constant->value.b[matrix_offset + i] ?
>>   NIR_TRUE : NIR_FALSE;
>> --
>> 2.5.0
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 09/23] nir/print: add support for printing doubles and bitsize

2016-04-04 Thread Samuel Iglesias Gonsálvez


On 01/04/16 21:09, Jason Ekstrand wrote:
> On Thu, Mar 31, 2016 at 2:59 AM, Samuel Iglesias Gonsálvez <
> sigles...@igalia.com> wrote:
> 
>> From: Connor Abbott 
>>
>> v2:
>> - Squash the printing doubles related patches into one patch (Sam).
>> ---
>>  src/compiler/nir/nir_print.c | 17 ++---
>>  1 file changed, 14 insertions(+), 3 deletions(-)
>>
>> diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
>> index c295c19..df7ef91 100644
>> --- a/src/compiler/nir/nir_print.c
>> +++ b/src/compiler/nir/nir_print.c
>> @@ -68,7 +68,7 @@ static void
>>  print_register_decl(nir_register *reg, print_state *state)
>>  {
>> FILE *fp = state->fp;
>> -   fprintf(fp, "decl_reg %s ", sizes[reg->num_components]);
>> +   fprintf(fp, "decl_reg %s %u ", sizes[reg->num_components],
>> reg->bit_size);
>> if (reg->is_packed)
>>fprintf(fp, "(packed) ");
>> print_register(reg, state);
>> @@ -83,7 +83,8 @@ print_ssa_def(nir_ssa_def *def, print_state *state)
>> FILE *fp = state->fp;
>> if (def->name != NULL)
>>fprintf(fp, "/* %s */ ", def->name);
>> -   fprintf(fp, "%s ssa_%u", sizes[def->num_components], def->index);
>> +   fprintf(fp, "%s %u ssa_%u", sizes[def->num_components], def->bit_size,
>> +   def->index);
>>  }
>>
>>  static void
>> @@ -279,6 +280,13 @@ print_constant(nir_constant *c, const struct
>> glsl_type *type, print_state *state
>>}
>>break;
>>
>> +   case GLSL_TYPE_DOUBLE:
>> +  for (i = 0; i < total_elems; i++) {
>> + if (i > 0) fprintf(fp, ", ");
>> + fprintf(fp, "%f", c->value.d[i]);
>> +  }
>> +  break;
>> +
>> case GLSL_TYPE_STRUCT:
>>for (i = 0; i < c->num_elements; i++) {
>>   if (i > 0) fprintf(fp, ", ");
>> @@ -713,7 +721,10 @@ print_load_const_instr(nir_load_const_instr *instr,
>> print_state *state)
>> * and then print the float in a comment for readability.
>> */
>>
>> -  fprintf(fp, "0x%08x /* %f */", instr->value.u32[i],
>> instr->value.f32[i]);
>> +  if (instr->def.bit_size == 64)
>> + fprintf(fp, "0x%16lx /* %f */", instr->value.u64[i],
>> instr->value.f64[i]);
>>
> 
> I think you want llx.  Long is 32-bit on some 32-bit platforms but long
> long is basically always 64-bit.
> 

Oh right. Actually, it should be PRIx64 macro which is defined in
inttypes.h. This is also done in prog_print.c and other parts of Mesa.

Thanks,

Sam

> 
>> +  else
>> + fprintf(fp, "0x%08x /* %f */", instr->value.u32[i],
>> instr->value.f32[i]);
>> }
>>
>> fprintf(fp, ")");
>> --
>> 2.5.0
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   >