Re: [Mesa-dev] [PATCH] i965: Emit surface states for extra planes prior to gen8

2016-06-08 Thread Jason Ekstrand
On Jun 8, 2016 9:25 PM, "Kristian Høgsberg"  wrote:
>
> On Wed, Jun 8, 2016 at 9:07 PM, Jason Ekstrand 
wrote:
> > When Kristian implemented GL_TEXTURE_EXTERNAL_OES, he hooked it up for
gen8
> > but not for gen7 or earlier.  It all works, we just need to emit the
states
> > for the extra planes.
>
> Ah yes, thanks.
>
> Reviewed-by: Kristian Høgsberg 

Thanks!

> > Cc: "12.0" 
> > Cc: Kristian Høgsberg 
> > ---
> >  src/mesa/drivers/dri/i965/brw_wm_surface_state.c  | 9 +
> >  src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 9 +
> >  2 files changed, 18 insertions(+)
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> > index b73d5d5..133a944 100644
> > --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> > +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> > @@ -338,6 +338,15 @@ brw_update_texture_surface(struct gl_context *ctx,
> > uint32_t tex_format = translate_tex_format(brw, mt->format,
> >sampler->sRGBDecode);
> >
> > +   if (tObj->Target == GL_TEXTURE_EXTERNAL_OES) {
> > +  if (plane > 0)
> > + mt = mt->plane[plane - 1];
> > +  if (mt == NULL)
> > + return;
> > +
> > +  tex_format = translate_tex_format(brw, mt->format,
sampler->sRGBDecode);
> > +   }
> > +
> > if (for_gather) {
> >/* Sandybridge's gather4 message is broken for integer formats.
> > * To work around this, we pretend the surface is UNORM for
> > diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
> > index 329164d..60589bc 100644
> > --- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
> > +++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
> > @@ -386,6 +386,15 @@ gen7_update_texture_surface(struct gl_context *ctx,
> >unsigned format = translate_tex_format(
> >   brw, intel_obj->_Format, sampler->sRGBDecode);
> >
> > +  if (obj->Target == GL_TEXTURE_EXTERNAL_OES) {
> > + if (plane > 0)
> > +mt = mt->plane[plane - 1];
> > + if (mt == NULL)
> > +return;
> > +
> > + format = translate_tex_format(brw, mt->format,
sampler->sRGBDecode);
> > +  }
> > +
> >if (for_gather && format == BRW_SURFACEFORMAT_R32G32_FLOAT)
> >   format = BRW_SURFACEFORMAT_R32G32_FLOAT_LD;
> >
> > --
> > 2.5.0.400.gff86faf
> >
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] mesa/gallium: Move u_bit_scan{, 64} from gallium to mesa.

2016-06-08 Thread Mathias . Froehlich
From: Mathias Fröhlich 

The functions are also useful for mesa.
Introduce src/util/bitscan.{h,c}. Move ffs function
implementations from src/mesa/main/imports.{h,c}.
Move bit scan related functions from
src/gallium/auxiliary/util/u_math.h. Merge platform
handling with what is available from within mesa.

Signed-off-by: Mathias Fröhlich 
---
 src/gallium/auxiliary/util/u_math.h | 149 +--
 src/mesa/main/imports.c |  58 --
 src/mesa/main/imports.h |  16 
 src/util/Makefile.sources   |   2 +
 src/util/bitscan.c  |  80 +++
 src/util/bitscan.h  | 151 
 6 files changed, 234 insertions(+), 222 deletions(-)
 create mode 100644 src/util/bitscan.c
 create mode 100644 src/util/bitscan.h

diff --git a/src/gallium/auxiliary/util/u_math.h 
b/src/gallium/auxiliary/util/u_math.h
index ecb1d63..c94967e 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -46,14 +46,7 @@
 #include 
 #include 
 
-#ifdef PIPE_OS_UNIX
-#include  /* for ffs */
-#endif
-
-#if defined(_MSC_VER)
-#include 
-#endif
-
+#include "util/bitscan.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -354,80 +347,6 @@ util_half_inf_sign(int16_t x)
 
 
 /**
- * Find first bit set in word.  Least significant bit is 1.
- * Return 0 if no bits set.
- */
-#ifndef FFS_DEFINED
-#define FFS_DEFINED 1
-
-#if defined(_MSC_VER) && (_M_IX86 || _M_AMD64 || _M_IA64)
-static inline
-unsigned long ffs( unsigned long u )
-{
-   unsigned long i;
-   if (_BitScanForward(, u))
-  return i + 1;
-   else
-  return 0;
-}
-#elif defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86)
-static inline
-unsigned ffs( unsigned u )
-{
-   unsigned i;
-
-   if (u == 0) {
-  return 0;
-   }
-
-   __asm bsf eax, [u]
-   __asm inc eax
-   __asm mov [i], eax
-
-   return i;
-}
-#elif defined(__MINGW32__) || defined(PIPE_OS_ANDROID) || \
-defined(HAVE___BUILTIN_FFS)
-#define ffs __builtin_ffs
-#endif
-
-#ifdef HAVE___BUILTIN_FFSLL
-#define ffsll __builtin_ffsll
-#else
-static inline int
-ffsll(long long int val)
-{
-   int bit;
-
-   bit = ffs((unsigned) (val & 0x));
-   if (bit != 0)
-  return bit;
-
-   bit = ffs((unsigned) (val >> 32));
-   if (bit != 0)
-  return 32 + bit;
-
-   return 0;
-}
-#endif
-
-#endif /* FFS_DEFINED */
-
-/**
- * Find first bit set in long long.  Least significant bit is 1.
- * Return 0 if no bits set.
- */
-#ifndef FFSLL_DEFINED
-#define FFSLL_DEFINED 1
-
-#if defined(__MINGW32__) || defined(PIPE_OS_ANDROID) || \
-defined(HAVE___BUILTIN_FFSLL)
-#define ffsll __builtin_ffsll
-#endif
-
-#endif /* FFSLL_DEFINED */
-
-/**
  * Find last bit set in a word.  The least significant bit is 1.
  * Return 0 if no bits are set.
  */
@@ -479,72 +398,6 @@ util_last_bit_signed(int i)
   return util_last_bit(~(unsigned)i);
 }
 
-/* Destructively loop over all of the bits in a mask as in:
- *
- * while (mymask) {
- *   int i = u_bit_scan();
- *   ... process element i
- * }
- *
- */
-static inline int
-u_bit_scan(unsigned *mask)
-{
-   int i = ffs(*mask) - 1;
-   *mask &= ~(1u << i);
-   return i;
-}
-
-#ifndef _MSC_VER
-static inline int
-u_bit_scan64(uint64_t *mask)
-{
-   int i = ffsll(*mask) - 1;
-   *mask &= ~(1llu << i);
-   return i;
-}
-#endif
-
-/* For looping over a bitmask when you want to loop over consecutive bits
- * manually, for example:
- *
- * while (mask) {
- *int start, count, i;
- *
- *u_bit_scan_consecutive_range(, , );
- *
- *for (i = 0; i < count; i++)
- *   ... process element (start+i)
- * }
- */
-static inline void
-u_bit_scan_consecutive_range(unsigned *mask, int *start, int *count)
-{
-   if (*mask == 0x) {
-  *start = 0;
-  *count = 32;
-  *mask = 0;
-  return;
-   }
-   *start = ffs(*mask) - 1;
-   *count = ffs(~(*mask >> *start)) - 1;
-   *mask &= ~(((1u << *count) - 1) << *start);
-}
-
-static inline void
-u_bit_scan_consecutive_range64(uint64_t *mask, int *start, int *count)
-{
-   if (*mask == ~0llu) {
-  *start = 0;
-  *count = 64;
-  *mask = 0;
-  return;
-   }
-   *start = ffsll(*mask) - 1;
-   *count = ffsll(~(*mask >> *start)) - 1;
-   *mask &= ~(((1llu << *count) - 1) << *start);
-}
-
 /* Returns a bitfield in which the first count bits starting at start are
  * set.
  */
diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c
index fe54109..808b8f6 100644
--- a/src/mesa/main/imports.c
+++ b/src/mesa/main/imports.c
@@ -219,64 +219,6 @@ _mesa_align_realloc(void *oldBuffer, size_t oldSize, 
size_t newSize,
 /*@{*/
 
 
-#ifndef HAVE___BUILTIN_FFS
-/**
- * Find the first bit set in a word.
- */
-int
-ffs(int i)
-{
-   register int bit = 0;
-   if (i != 0) {
-  if ((i & 0x) == 0) {
- bit += 16;
- i >>= 16;
-  }
-  if ((i & 0xff) == 0) {
- bit += 8;
- i >>= 8;
-  

[Mesa-dev] [PATCH 0/2] Move u_bit_scan from gallium to util.

2016-06-08 Thread Mathias . Froehlich
From: Mathias Fröhlich 

Hi all,

I just put the two leading patches to move u_bit_scan
from gallium to util. The architecture handling of
ffs{,ll} is merged with what is available in the
mesa/util directory. It compiles and works here and
we should get slightly better intrinsic coverage on win32.
At least according to what is documented at msdn. But
the chance to break one of our other builds or
architectures is high with this kind of change.
So any hints or testing according that are
even more appreciated.

The rest of the series posted earlier gets posted
again when this part arrived.

Please review.

Thanks

Mathias

Mathias Fröhlich (2):
  mesa/gallium: Move u_bit_scan{,64} from gallium to mesa.
  mesa: Make use of u_bit_scan{,64}.

 src/gallium/auxiliary/util/u_math.h | 149 +--
 src/mesa/main/arrayobj.c|   6 +-
 src/mesa/main/buffers.c |   4 +-
 src/mesa/main/imports.c |  58 --
 src/mesa/main/imports.h |  16 
 src/util/Makefile.sources   |   2 +
 src/util/bitscan.c  |  80 +++
 src/util/bitscan.h  | 151 
 8 files changed, 239 insertions(+), 227 deletions(-)
 create mode 100644 src/util/bitscan.c
 create mode 100644 src/util/bitscan.h

-- 
2.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] mesa: Make use of u_bit_scan{,64}.

2016-06-08 Thread Mathias . Froehlich
From: Mathias Fröhlich 

Signed-off-by: Mathias Fröhlich 
---
 src/mesa/main/arrayobj.c | 6 +++---
 src/mesa/main/buffers.c  | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/mesa/main/arrayobj.c b/src/mesa/main/arrayobj.c
index 897dac6..9c3451e 100644
--- a/src/mesa/main/arrayobj.c
+++ b/src/mesa/main/arrayobj.c
@@ -52,6 +52,7 @@
 #include "mtypes.h"
 #include "varray.h"
 #include "main/dispatch.h"
+#include "util/bitscan.h"
 
 
 /**
@@ -342,13 +343,12 @@ _mesa_update_vao_client_arrays(struct gl_context *ctx,
GLbitfield64 arrays = vao->NewArrays;
 
while (arrays) {
+  const int attrib = u_bit_scan64();
+
   struct gl_client_array *client_array;
   struct gl_vertex_attrib_array *attrib_array;
   struct gl_vertex_buffer_binding *buffer_binding;
 
-  GLint attrib = ffsll(arrays) - 1;
-  arrays ^= BITFIELD64_BIT(attrib);
-
   attrib_array = >VertexAttrib[attrib];
   buffer_binding = >VertexBinding[attrib_array->VertexBinding];
   client_array = >_VertexAttrib[attrib];
diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c
index a28c583..e8aedde 100644
--- a/src/mesa/main/buffers.c
+++ b/src/mesa/main/buffers.c
@@ -36,6 +36,7 @@
 #include "enums.h"
 #include "fbobject.h"
 #include "mtypes.h"
+#include "util/bitscan.h"
 
 
 #define BAD_MASK ~0u
@@ -595,13 +596,12 @@ _mesa_drawbuffers(struct gl_context *ctx, struct 
gl_framebuffer *fb,
if (n > 0 && _mesa_bitcount(destMask[0]) > 1) {
   GLuint count = 0, destMask0 = destMask[0];
   while (destMask0) {
- GLint bufIndex = ffs(destMask0) - 1;
+ const int bufIndex = u_bit_scan();
  if (fb->_ColorDrawBufferIndexes[count] != bufIndex) {
 updated_drawbuffers(ctx, fb);
 fb->_ColorDrawBufferIndexes[count] = bufIndex;
  }
  count++;
- destMask0 &= ~(1 << bufIndex);
   }
   fb->ColorDrawBuffer[0] = buffers[0];
   fb->_NumColorDrawBuffers = count;
-- 
2.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glsl: fix max varyings count for ARB_enhanced_layouts

2016-06-08 Thread Timothy Arceri
Since this extension allows more than one varying to share a single
location we can't just count the number of slots a varying takes and
add it to the total.

Instead we now reuse the reserved varyings bitfield to determine how
many slots are reserved for explicit locations instead.
---
 src/compiler/glsl/link_varyings.cpp | 25 +
 src/compiler/glsl/link_varyings.h   | 12 +---
 src/compiler/glsl/linker.cpp| 26 +-
 3 files changed, 43 insertions(+), 20 deletions(-)

diff --git a/src/compiler/glsl/link_varyings.cpp 
b/src/compiler/glsl/link_varyings.cpp
index 70fcb01..7871562 100644
--- a/src/compiler/glsl/link_varyings.cpp
+++ b/src/compiler/glsl/link_varyings.cpp
@@ -1937,7 +1937,7 @@ canonicalize_shader_io(exec_list *ir, enum 
ir_variable_mode io_mode)
  * 64 bit map. Per-vertex and per-patch both have separate location domains
  * with a max of MAX_VARYING.
  */
-static uint64_t
+uint64_t
 reserved_varying_slot(struct gl_shader *stage, ir_variable_mode io_mode)
 {
assert(io_mode == ir_var_shader_in || io_mode == ir_var_shader_out);
@@ -2063,7 +2063,8 @@ assign_varying_locations(struct gl_context *ctx,
  struct gl_shader_program *prog,
  gl_shader *producer, gl_shader *consumer,
  unsigned num_tfeedback_decls,
- tfeedback_decl *tfeedback_decls)
+ tfeedback_decl *tfeedback_decls,
+ const uint64_t reserved_slots)
 {
/* Tessellation shaders treat inputs and outputs as shared memory and can
 * access inputs and outputs of other invocations.
@@ -2247,10 +2248,6 @@ assign_varying_locations(struct gl_context *ctx,
   }
}
 
-   const uint64_t reserved_slots =
-  reserved_varying_slot(producer, ir_var_shader_out) |
-  reserved_varying_slot(consumer, ir_var_shader_in);
-
const unsigned slots_used = matches.assign_locations(prog, reserved_slots);
matches.store_locations();
 
@@ -2333,14 +2330,16 @@ assign_varying_locations(struct gl_context *ctx,
 bool
 check_against_output_limit(struct gl_context *ctx,
struct gl_shader_program *prog,
-   gl_shader *producer)
+   gl_shader *producer,
+   unsigned num_explicit_locations)
 {
-   unsigned output_vectors = 0;
+   unsigned output_vectors = num_explicit_locations;
 
foreach_in_list(ir_instruction, node, producer->ir) {
   ir_variable *const var = node->as_variable();
 
-  if (var && var->data.mode == ir_var_shader_out &&
+  if (var && !var->data.explicit_location &&
+  var->data.mode == ir_var_shader_out &&
   var_counts_against_varying_limit(producer->Stage, var)) {
  /* outputs for fragment shader can't be doubles */
  output_vectors += var->type->count_attribute_slots(false);
@@ -2375,14 +2374,16 @@ check_against_output_limit(struct gl_context *ctx,
 bool
 check_against_input_limit(struct gl_context *ctx,
   struct gl_shader_program *prog,
-  gl_shader *consumer)
+  gl_shader *consumer,
+  unsigned num_explicit_locations)
 {
-   unsigned input_vectors = 0;
+   unsigned input_vectors = num_explicit_locations;
 
foreach_in_list(ir_instruction, node, consumer->ir) {
   ir_variable *const var = node->as_variable();
 
-  if (var && var->data.mode == ir_var_shader_in &&
+  if (var && !var->data.explicit_location &&
+  var->data.mode == ir_var_shader_in &&
   var_counts_against_varying_limit(consumer->Stage, var)) {
  /* vertex inputs aren't varying counted */
  input_vectors += var->type->count_attribute_slots(false);
diff --git a/src/compiler/glsl/link_varyings.h 
b/src/compiler/glsl/link_varyings.h
index 2126a5c..39e9070 100644
--- a/src/compiler/glsl/link_varyings.h
+++ b/src/compiler/glsl/link_varyings.h
@@ -320,16 +320,22 @@ assign_varying_locations(struct gl_context *ctx,
 struct gl_shader_program *prog,
 gl_shader *producer, gl_shader *consumer,
  unsigned num_tfeedback_decls,
- tfeedback_decl *tfeedback_decls);
+ tfeedback_decl *tfeedback_decls,
+ const uint64_t reserved_slots);
+
+uint64_t
+reserved_varying_slot(struct gl_shader *stage, ir_variable_mode io_mode);
 
 bool
 check_against_output_limit(struct gl_context *ctx,
struct gl_shader_program *prog,
-   gl_shader *producer);
+   gl_shader *producer,
+   unsigned num_explicit_locations);
 
 bool
 check_against_input_limit(struct gl_context *ctx,
   struct gl_shader_program *prog,
-  gl_shader 

Re: [Mesa-dev] [PATCH] i965: Emit surface states for extra planes prior to gen8

2016-06-08 Thread Kristian Høgsberg
On Wed, Jun 8, 2016 at 9:07 PM, Jason Ekstrand  wrote:
> When Kristian implemented GL_TEXTURE_EXTERNAL_OES, he hooked it up for gen8
> but not for gen7 or earlier.  It all works, we just need to emit the states
> for the extra planes.

Ah yes, thanks.

Reviewed-by: Kristian Høgsberg 

> Cc: "12.0" 
> Cc: Kristian Høgsberg 
> ---
>  src/mesa/drivers/dri/i965/brw_wm_surface_state.c  | 9 +
>  src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 9 +
>  2 files changed, 18 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
> b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> index b73d5d5..133a944 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> @@ -338,6 +338,15 @@ brw_update_texture_surface(struct gl_context *ctx,
> uint32_t tex_format = translate_tex_format(brw, mt->format,
>sampler->sRGBDecode);
>
> +   if (tObj->Target == GL_TEXTURE_EXTERNAL_OES) {
> +  if (plane > 0)
> + mt = mt->plane[plane - 1];
> +  if (mt == NULL)
> + return;
> +
> +  tex_format = translate_tex_format(brw, mt->format, 
> sampler->sRGBDecode);
> +   }
> +
> if (for_gather) {
>/* Sandybridge's gather4 message is broken for integer formats.
> * To work around this, we pretend the surface is UNORM for
> diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c 
> b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
> index 329164d..60589bc 100644
> --- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
> @@ -386,6 +386,15 @@ gen7_update_texture_surface(struct gl_context *ctx,
>unsigned format = translate_tex_format(
>   brw, intel_obj->_Format, sampler->sRGBDecode);
>
> +  if (obj->Target == GL_TEXTURE_EXTERNAL_OES) {
> + if (plane > 0)
> +mt = mt->plane[plane - 1];
> + if (mt == NULL)
> +return;
> +
> + format = translate_tex_format(brw, mt->format, sampler->sRGBDecode);
> +  }
> +
>if (for_gather && format == BRW_SURFACEFORMAT_R32G32_FLOAT)
>   format = BRW_SURFACEFORMAT_R32G32_FLOAT_LD;
>
> --
> 2.5.0.400.gff86faf
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965: Emit surface states for extra planes prior to gen8

2016-06-08 Thread Jason Ekstrand
When Kristian implemented GL_TEXTURE_EXTERNAL_OES, he hooked it up for gen8
but not for gen7 or earlier.  It all works, we just need to emit the states
for the extra planes.

Cc: "12.0" 
Cc: Kristian Høgsberg 
---
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c  | 9 +
 src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 9 +
 2 files changed, 18 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index b73d5d5..133a944 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -338,6 +338,15 @@ brw_update_texture_surface(struct gl_context *ctx,
uint32_t tex_format = translate_tex_format(brw, mt->format,
   sampler->sRGBDecode);
 
+   if (tObj->Target == GL_TEXTURE_EXTERNAL_OES) {
+  if (plane > 0)
+ mt = mt->plane[plane - 1];
+  if (mt == NULL)
+ return;
+
+  tex_format = translate_tex_format(brw, mt->format, sampler->sRGBDecode);
+   }
+
if (for_gather) {
   /* Sandybridge's gather4 message is broken for integer formats.
* To work around this, we pretend the surface is UNORM for
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
index 329164d..60589bc 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
@@ -386,6 +386,15 @@ gen7_update_texture_surface(struct gl_context *ctx,
   unsigned format = translate_tex_format(
  brw, intel_obj->_Format, sampler->sRGBDecode);
 
+  if (obj->Target == GL_TEXTURE_EXTERNAL_OES) {
+ if (plane > 0)
+mt = mt->plane[plane - 1];
+ if (mt == NULL)
+return;
+
+ format = translate_tex_format(brw, mt->format, sampler->sRGBDecode);
+  }
+
   if (for_gather && format == BRW_SURFACEFORMAT_R32G32_FLOAT)
  format = BRW_SURFACEFORMAT_R32G32_FLOAT_LD;
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965: Emit surface states for extra planes prior to gen8

2016-06-08 Thread Jason Ekstrand
When Kristian implemented GL_TEXTURE_EXTERNAL_OES, he hooked it up for gen8
but not for gen7 or earlier.  It all works, we just need to emit the states
for the extra planes.

Cc: "12.0" 
Cc: Kristian Høgsberg 
---
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c  | 9 +
 src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 9 +
 2 files changed, 18 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index b73d5d5..133a944 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -338,6 +338,15 @@ brw_update_texture_surface(struct gl_context *ctx,
uint32_t tex_format = translate_tex_format(brw, mt->format,
   sampler->sRGBDecode);
 
+   if (tObj->Target == GL_TEXTURE_EXTERNAL_OES) {
+  if (plane > 0)
+ mt = mt->plane[plane - 1];
+  if (mt == NULL)
+ return;
+
+  tex_format = translate_tex_format(brw, mt->format, sampler->sRGBDecode);
+   }
+
if (for_gather) {
   /* Sandybridge's gather4 message is broken for integer formats.
* To work around this, we pretend the surface is UNORM for
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
index 329164d..60589bc 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
@@ -386,6 +386,15 @@ gen7_update_texture_surface(struct gl_context *ctx,
   unsigned format = translate_tex_format(
  brw, intel_obj->_Format, sampler->sRGBDecode);
 
+  if (obj->Target == GL_TEXTURE_EXTERNAL_OES) {
+ if (plane > 0)
+mt = mt->plane[plane - 1];
+ if (mt == NULL)
+return;
+
+ format = translate_tex_format(brw, mt->format, sampler->sRGBDecode);
+  }
+
   if (for_gather && format == BRW_SURFACEFORMAT_R32G32_FLOAT)
  format = BRW_SURFACEFORMAT_R32G32_FLOAT_LD;
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [rfc] ARB_gpu_shader_int64 support (softpipe/llvmpipe)

2016-06-08 Thread Dave Airlie
On 9 June 2016 at 10:48, Dave Airlie  wrote:
> I started this quite a while back, and decided to spend some
> time on it again, since it's the basis for a bunch of the newer
> GL extensions.
>
> So this is my first pass at it, it's fairly complete, but it
> definitely is missing some pieces before it's finished.
>
> The patchset add core mesa, gallium, softpipe and llvmpipe support.
> I've got a few tests in piglit using a generator for all the built-ins
> and operations that all pass.
>
> So the big thing here was OMG the conversions, and is also
> the biggest missing thing in the gallium code, I haven't plumbed
> through all the conversions into opcodes yet, I'm not really sure
> if we need all of them, but it'll be a quite large opcode set,
> and I wasn't feeling brave enough yet.
>
> The other things is I've implement MOD/DIV opcodes that may or
> may not be useful on real hardware, if gallium drivers are okay
> with lowering this in the backends then I'm fine with how it is.
> (and gallium docs do need updating).

I've also posted this to my arb_gpu_shader_int64 branch, and I've
added some more bits I've been finding as I run tests.

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/5] glsl: Optionally lower TES gl_PatchVerticesIn to a uniform.

2016-06-08 Thread Kenneth Graunke
On Wednesday, June 8, 2016 7:45:16 PM PDT Alejandro Piñeiro wrote:
> On 02/06/16 23:09, Kenneth Graunke wrote:
> > i965 has no special hardware for this, so we need to pass this value in
> > as a uniform (unless the TES is linked against a TCS, in which case the
> > linker can just replace this with a constant).
> >
> > Signed-off-by: Kenneth Graunke 
> > Cc: mesa-sta...@lists.freedesktop.org
> > ---
> >  src/compiler/glsl/linker.cpp  | 20 +---
> >  src/mesa/main/mtypes.h|  1 +
> >  src/mesa/program/prog_statevars.c |  7 +++
> >  src/mesa/program/prog_statevars.h |  1 +
> >  4 files changed, 26 insertions(+), 3 deletions(-)
> >
> > diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
> > index 9e65590..7c7dbfe 100644
> > --- a/src/compiler/glsl/linker.cpp
> > +++ b/src/compiler/glsl/linker.cpp
> > @@ -72,6 +72,7 @@
> >  #include "ir.h"
> >  #include "program.h"
> >  #include "program/hash_table.h"
> > +#include "program/prog_instruction.h"
> >  #include "linker.h"
> >  #include "link_varyings.h"
> >  #include "ir_optimization.h"
> > @@ -2485,7 +2486,7 @@ resize_tes_inputs(struct gl_context *ctx,
> >ir->accept(_resize_visitor);
> > }
> >  
> > -   if (tcs) {
> > +   if (tcs || ctx->Const.LowerTESPatchVerticesIn) {
> >/* Convert the gl_PatchVerticesIn system value into a constant, since
> > * the value is known at this point.
> 
> Now this comment became obsolete as can be converted to a constant or a
> uniform. Perhaps it would be better to just remove the comment.
> 
> > */
> > @@ -2494,9 +2495,22 @@ resize_tes_inputs(struct gl_context *ctx,
> >   if (var && var->data.mode == ir_var_system_value &&
> >   var->data.location == SYSTEM_VALUE_VERTICES_IN) {
> >  void *mem_ctx = ralloc_parent(var);
> > -var->data.mode = ir_var_auto;
> >  var->data.location = 0;
> > -var->constant_value = new(mem_ctx) ir_constant(num_vertices);
> > +var->data.explicit_location = false;
> 
> Nitpick: With this change you are also changing slightly the option that
> was present before (use a constant) in addition to add the option to
> lower to an uniform. Not sure if it is worth to be mentioned though,
> feel free to ignore.

I am?  Before this patch, there was no option.  If the program being
linked contained a TCS, we would turn it into a constant.  If not, we
would leave it as a system value.

With this patch, my intention was to do the same behavior when no
options were set.  If ctx->Const.LowerTESPatchVerticesIn is set, we
turn it into a constant if there's a linked TCS (as before), and turn
it into a uniform otherwise (instead of a system value).

Did I miss something?


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/5] glsl: Optionally lower TCS gl_PatchVerticesIn to a uniform.

2016-06-08 Thread Kenneth Graunke
On Wednesday, June 8, 2016 7:45:43 PM PDT Alejandro Piñeiro wrote:
> On 02/06/16 23:09, Kenneth Graunke wrote:
> > i965 has no special hardware for this, so the best way to implement
> > this is to pass it in via a uniform.
> 
> Is this comment accurate? On the next patch you implement the i965 bits,
> and you only do the lowering on gen8+, mentioning that you would like to
> do the same on gen7. So for gen7 it is implemented, but an uniform is
> not used. Or am I missing something?

i965 hardware doesn't provide gl_PatchVerticesIn in the TCS thread
payload, as one might expect.  Currently, we implement it as a
compile-time constant, and recompile the shader whenever it changes
(on all generations).

Another way to implement it is to pass it in as a uniform.  This is
arguably better because it doesn't require recompiling the shader
whenever you change the number of vertices in your input patch.

The next patch makes it use a uniform and stop recompiling on Gen8+.

However, on Gen7, we also have to loop over all the input vertices
and emit a URB write message (input release) for every pair.  Because
we know this at compile time, it's easy to emit the right number of
URB write messages.  So I left it as is for now (still recompiling).

I could instead rework that to use an actual DO..WHILE loop in the
shader itself, and then a uniform would work.


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 19/23] gallium/tgsi: add support for 64-bit integer immediates.

2016-06-08 Thread Dave Airlie
From: Dave Airlie 

This adds support to TGSI for 64-bit integer immediates.

Signed-off-by: Dave Airlie 
---
 src/gallium/auxiliary/tgsi/tgsi_dump.c |  14 ++
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 244 -
 src/gallium/auxiliary/tgsi/tgsi_parse.c|   2 +
 src/gallium/auxiliary/tgsi/tgsi_text.c |  44 ++
 src/gallium/auxiliary/tgsi/tgsi_ureg.c |  45 +-
 src/gallium/auxiliary/tgsi/tgsi_ureg.h |  10 ++
 src/gallium/include/pipe/p_shader_tokens.h |   2 +
 7 files changed, 358 insertions(+), 3 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c 
b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index d59b7ff..614bcb2 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -254,6 +254,20 @@ dump_imm_data(struct tgsi_iterate_context *iter,
  i++;
  break;
   }
+  case TGSI_IMM_INT64: {
+ union di d;
+ d.i = data[i].Uint | (uint64_t)data[i+1].Uint << 32;
+ UID( d.i );
+ i++;
+ break;
+  }
+  case TGSI_IMM_UINT64: {
+ union di d;
+ d.ui = data[i].Uint | (uint64_t)data[i+1].Uint << 32;
+ UID( d.ui );
+ i++;
+ break;
+  }
   case TGSI_IMM_FLOAT32:
  if (ctx->dump_float_as_hex)
 HFLT( data[i].Float );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c 
b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 1457c06..c929475 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -77,6 +77,8 @@
 union tgsi_double_channel {
double d[TGSI_QUAD_SIZE];
unsigned u[TGSI_QUAD_SIZE][2];
+   uint64_t u64[TGSI_QUAD_SIZE];
+   int64_t i64[TGSI_QUAD_SIZE];
 };
 
 struct tgsi_double_vector {
@@ -692,11 +694,251 @@ micro_u2d(union tgsi_double_channel *dst,
dst->d[3] = (double)src->u[3];
 }
 
+static void
+micro_i64abs(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+   dst->i64[0] = src->i64[0] >= 0.0 ? src->i64[0] : -src->i64[0];
+   dst->i64[1] = src->i64[1] >= 0.0 ? src->i64[1] : -src->i64[1];
+   dst->i64[2] = src->i64[2] >= 0.0 ? src->i64[2] : -src->i64[2];
+   dst->i64[3] = src->i64[3] >= 0.0 ? src->i64[3] : -src->i64[3];
+}
+
+static void
+micro_i64sgn(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+   dst->i64[0] = src->i64[0] < 0 ? -1 : src->i64[0] > 0 ? 1 : 0;
+   dst->i64[1] = src->i64[1] < 0 ? -1 : src->i64[1] > 0 ? 1 : 0;
+   dst->i64[2] = src->i64[2] < 0 ? -1 : src->i64[2] > 0 ? 1 : 0;
+   dst->i64[3] = src->i64[3] < 0 ? -1 : src->i64[3] > 0 ? 1 : 0;
+}
+
+static void
+micro_i64neg(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+   dst->i64[0] = -src->i64[0];
+   dst->i64[1] = -src->i64[1];
+   dst->i64[2] = -src->i64[2];
+   dst->i64[3] = -src->i64[3];
+}
+
+static void
+micro_u64seq(union tgsi_double_channel *dst,
+   const union tgsi_double_channel *src)
+{
+   dst->u[0][0] = src[0].u64[0] == src[1].u64[0] ? ~0U : 0U;
+   dst->u[1][0] = src[0].u64[1] == src[1].u64[1] ? ~0U : 0U;
+   dst->u[2][0] = src[0].u64[2] == src[1].u64[2] ? ~0U : 0U;
+   dst->u[3][0] = src[0].u64[3] == src[1].u64[3] ? ~0U : 0U;
+}
+
+static void
+micro_u64sne(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+   dst->u[0][0] = src[0].u64[0] != src[1].u64[0] ? ~0U : 0U;
+   dst->u[1][0] = src[0].u64[1] != src[1].u64[1] ? ~0U : 0U;
+   dst->u[2][0] = src[0].u64[2] != src[1].u64[2] ? ~0U : 0U;
+   dst->u[3][0] = src[0].u64[3] != src[1].u64[3] ? ~0U : 0U;
+}
+
+static void
+micro_i64slt(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+   dst->u[0][0] = src[0].i64[0] < src[1].i64[0] ? ~0U : 0U;
+   dst->u[1][0] = src[0].i64[1] < src[1].i64[1] ? ~0U : 0U;
+   dst->u[2][0] = src[0].i64[2] < src[1].i64[2] ? ~0U : 0U;
+   dst->u[3][0] = src[0].i64[3] < src[1].i64[3] ? ~0U : 0U;
+}
+
+static void
+micro_u64slt(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+   dst->u[0][0] = src[0].u64[0] < src[1].u64[0] ? ~0U : 0U;
+   dst->u[1][0] = src[0].u64[1] < src[1].u64[1] ? ~0U : 0U;
+   dst->u[2][0] = src[0].u64[2] < src[1].u64[2] ? ~0U : 0U;
+   dst->u[3][0] = src[0].u64[3] < src[1].u64[3] ? ~0U : 0U;
+}
+
+static void
+micro_i64sge(union tgsi_double_channel *dst,
+   const union tgsi_double_channel *src)
+{
+   dst->u[0][0] = src[0].i64[0] >= src[1].i64[0] ? ~0U : 0U;
+   dst->u[1][0] = src[0].i64[1] >= src[1].i64[1] ? ~0U : 0U;
+   dst->u[2][0] = src[0].i64[2] >= src[1].i64[2] ? ~0U : 0U;
+   dst->u[3][0] = src[0].i64[3] >= src[1].i64[3] ? ~0U : 0U;
+}
+
+static void
+micro_u64sge(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+   dst->u[0][0] = src[0].u64[0] >= src[1].u64[0] ? ~0U : 0U;
+   dst->u[1][0] = src[0].u64[1] >= src[1].u64[1] ? 

[Mesa-dev] [PATCH 18/23] gallium: add opcode and types for 64-bit integers.

2016-06-08 Thread Dave Airlie
From: Dave Airlie 

This just adds the basic support for 64-bit opcodes,
and the new types.

Signed-off-by: Dave Airlie 
---
 src/gallium/auxiliary/tgsi/tgsi_info.c | 54 ++
 src/gallium/auxiliary/tgsi/tgsi_info.h |  4 ++-
 src/gallium/include/pipe/p_shader_tokens.h | 28 +++-
 3 files changed, 84 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c 
b/src/gallium/auxiliary/tgsi/tgsi_info.c
index 60e0f2c..f33d057 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -265,6 +265,28 @@ static const struct tgsi_opcode_info 
opcode_info[TGSI_OPCODE_LAST] =
{ 1, 1, 0, 0, 0, 0, 0, COMP, "VOTE_ANY", TGSI_OPCODE_VOTE_ANY },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "VOTE_ALL", TGSI_OPCODE_VOTE_ALL },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "VOTE_EQ", TGSI_OPCODE_VOTE_EQ },
+   { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SEQ", TGSI_OPCODE_U64SEQ },
+   { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SNE", TGSI_OPCODE_U64SNE },
+   { 1, 2, 0, 0, 0, 0, 0, COMP, "I64SLT", TGSI_OPCODE_I64SLT },
+   { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SLT", TGSI_OPCODE_U64SLT },
+   { 1, 2, 0, 0, 0, 0, 0, COMP, "I64SGE", TGSI_OPCODE_I64SGE },
+   { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SGE", TGSI_OPCODE_U64SGE },
+   { 1, 2, 0, 0, 0, 0, 0, COMP, "I64MIN", TGSI_OPCODE_I64MIN },
+   { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MIN", TGSI_OPCODE_U64MIN },
+   { 1, 2, 0, 0, 0, 0, 0, COMP, "I64MAX", TGSI_OPCODE_I64MAX },
+   { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MAX", TGSI_OPCODE_U64MAX },
+   { 1, 1, 0, 0, 0, 0, 0, COMP, "I64ABS", TGSI_OPCODE_I64ABS },
+   { 1, 1, 0, 0, 0, 0, 0, COMP, "I64SSG", TGSI_OPCODE_I64SSG },
+   { 1, 1, 0, 0, 0, 0, 0, COMP, "I64NEG", TGSI_OPCODE_I64NEG },
+   { 1, 2, 0, 0, 0, 0, 0, COMP, "U64ADD", TGSI_OPCODE_U64ADD },
+   { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MUL", TGSI_OPCODE_U64MUL },
+   { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SHL", TGSI_OPCODE_U64SHL },
+   { 1, 2, 0, 0, 0, 0, 0, COMP, "I64SHR", TGSI_OPCODE_I64SHR },
+   { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SHR", TGSI_OPCODE_U64SHR },
+   { 1, 2, 0, 0, 0, 0, 0, COMP, "I64DIV", TGSI_OPCODE_I64DIV },
+   { 1, 2, 0, 0, 0, 0, 0, COMP, "U64DIV", TGSI_OPCODE_U64DIV },
+   { 1, 2, 0, 0, 0, 0, 0, COMP, "I64MOD", TGSI_OPCODE_I64MOD },
+   { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MOD", TGSI_OPCODE_U64MOD },
 };
 
 const struct tgsi_opcode_info *
@@ -382,6 +404,12 @@ tgsi_opcode_infer_type( uint opcode )
case TGSI_OPCODE_DSGE:
case TGSI_OPCODE_DSLT:
case TGSI_OPCODE_DSNE:
+   case TGSI_OPCODE_U64SEQ:
+   case TGSI_OPCODE_U64SNE:
+   case TGSI_OPCODE_U64SLT:
+   case TGSI_OPCODE_U64SGE:
+   case TGSI_OPCODE_I64SLT:
+   case TGSI_OPCODE_I64SGE:
   return TGSI_TYPE_SIGNED;
case TGSI_OPCODE_DADD:
case TGSI_OPCODE_DABS:
@@ -406,6 +434,24 @@ tgsi_opcode_infer_type( uint opcode )
case TGSI_OPCODE_I2D:
case TGSI_OPCODE_U2D:
   return TGSI_TYPE_DOUBLE;
+   case TGSI_OPCODE_U64MAX:
+   case TGSI_OPCODE_U64MIN:
+   case TGSI_OPCODE_U64ADD:
+   case TGSI_OPCODE_U64MUL:
+   case TGSI_OPCODE_U64DIV:
+   case TGSI_OPCODE_U64MOD:
+   case TGSI_OPCODE_U64SHL:
+   case TGSI_OPCODE_U64SHR:
+  return TGSI_TYPE_UNSIGNED64;
+   case TGSI_OPCODE_I64MAX:
+   case TGSI_OPCODE_I64MIN:
+   case TGSI_OPCODE_I64ABS:
+   case TGSI_OPCODE_I64SSG:
+   case TGSI_OPCODE_I64NEG:
+   case TGSI_OPCODE_I64SHR:
+   case TGSI_OPCODE_I64DIV:
+   case TGSI_OPCODE_I64MOD:
+  return TGSI_TYPE_SIGNED64;
default:
   return TGSI_TYPE_FLOAT;
}
@@ -455,6 +501,14 @@ tgsi_opcode_infer_src_type( uint opcode )
case TGSI_OPCODE_DSLT:
case TGSI_OPCODE_DSNE:
   return TGSI_TYPE_DOUBLE;
+   case TGSI_OPCODE_U64SEQ:
+   case TGSI_OPCODE_U64SNE:
+   case TGSI_OPCODE_U64SLT:
+   case TGSI_OPCODE_U64SGE:
+  return TGSI_TYPE_UNSIGNED64;
+   case TGSI_OPCODE_I64SLT:
+   case TGSI_OPCODE_I64SGE:
+return TGSI_TYPE_SIGNED64;
default:
   return tgsi_opcode_infer_type(opcode);
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.h 
b/src/gallium/auxiliary/tgsi/tgsi_info.h
index 46f03cd..2eaa09a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.h
@@ -98,7 +98,9 @@ enum tgsi_opcode_type {
TGSI_TYPE_UNSIGNED,
TGSI_TYPE_SIGNED,
TGSI_TYPE_FLOAT,
-   TGSI_TYPE_DOUBLE
+   TGSI_TYPE_DOUBLE,
+   TGSI_TYPE_UNSIGNED64,
+   TGSI_TYPE_SIGNED64,
 };
 
 enum tgsi_opcode_type
diff --git a/src/gallium/include/pipe/p_shader_tokens.h 
b/src/gallium/include/pipe/p_shader_tokens.h
index abf26d3..37ce771 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -566,7 +566,33 @@ struct tgsi_property_data {
 #define TGSI_OPCODE_VOTE_ALL224
 #define TGSI_OPCODE_VOTE_EQ 225
 
-#define TGSI_OPCODE_LAST226
+#define TGSI_OPCODE_U64SEQ  226
+#define TGSI_OPCODE_U64SNE  227
+#define TGSI_OPCODE_I64SLT  228
+#define 

[Mesa-dev] [PATCH 23/23] gallivm/llvmpipe: add support for ARB_gpu_shader_int64.

2016-06-08 Thread Dave Airlie
From: Dave Airlie 

This enables 64-bit integer support in gallivm and
llvmpipe.

Signed-off-by: Dave Airlie 
---
 src/gallium/auxiliary/gallivm/lp_bld_limits.h  |   1 +
 src/gallium/auxiliary/gallivm/lp_bld_tgsi.c|   8 +-
 src/gallium/auxiliary/gallivm/lp_bld_tgsi.h|   4 +
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 352 +
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c|  68 +++-
 src/gallium/auxiliary/tgsi/tgsi_info.h |   9 +
 6 files changed, 425 insertions(+), 17 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_limits.h 
b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
index 32addec..017c86f 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_limits.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
@@ -134,6 +134,7 @@ gallivm_get_shader_param(enum pipe_shader_cap param)
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
   return 1;
case PIPE_SHADER_CAP_DOUBLES:
+   case PIPE_SHADER_CAP_INT64:
   return 1;
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
index 614c655..ceefe73 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
@@ -204,12 +204,12 @@ static int get_src_chan_idx(unsigned opcode,
enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(opcode);
enum tgsi_opcode_type stype = tgsi_opcode_infer_src_type(opcode);
 
-   if (dtype != TGSI_TYPE_DOUBLE && stype != TGSI_TYPE_DOUBLE)
+   if (!tgsi_type_is64bit(dtype) && !tgsi_type_is64bit(stype))
   return dst_chan_index;
-   if (dtype == TGSI_TYPE_DOUBLE) {
+   if (tgsi_type_is64bit(dtype)) {
   if (dst_chan_index == 1 || dst_chan_index == 3)
  return -1;
-  if (stype == TGSI_TYPE_DOUBLE)
+  if (tgsi_type_is64bit(stype))
  return dst_chan_index;
   if (dst_chan_index == 0)
  return 0;
@@ -364,6 +364,8 @@ lp_build_emit_fetch(
  break;
   case TGSI_TYPE_UNSIGNED:
   case TGSI_TYPE_SIGNED:
+  case TGSI_TYPE_UNSIGNED64:
+  case TGSI_TYPE_SIGNED64:
   case TGSI_TYPE_VOID:
   default:
  /* abs modifier is only legal on floating point types */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
index b9094dc..50866ad 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
@@ -337,6 +337,10 @@ struct lp_build_tgsi_context
struct lp_build_context int_bld;
 
struct lp_build_context dbl_bld;
+
+   struct lp_build_context uint64_bld;
+   struct lp_build_context int64_bld;
+
/** This array stores functions that are used to transform TGSI opcodes to
  * LLVM instructions.
  */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
index 43af6b4..c0e7338 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
@@ -2127,6 +2127,331 @@ dsqrt_emit_cpu(
   emit_data->args[0]);
 }
 
+static void
+i64abs_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_abs(_base->int64_bld,
+   emit_data->args[0]);
+}
+
+static void
+i64ssg_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_sgn(_base->int64_bld,
+   emit_data->args[0]);
+}
+
+/* TGSI_OPCODE_INEG (CPU Only) */
+static void
+i64neg_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_sub(_base->int64_bld,
+ bld_base->int64_bld.zero,
+ emit_data->args[0]);
+}
+
+/* TGSI_OPCODE_DSET Helper (CPU Only) */
+static void
+u64set_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data,
+   unsigned pipe_func)
+{
+   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+   LLVMValueRef cond = lp_build_cmp(_base->uint64_bld, pipe_func,
+emit_data->args[0], emit_data->args[1]);
+   /* arguments were 64 bit but store as 32 bit */
+   cond = LLVMBuildTrunc(builder, cond, bld_base->int_bld.int_vec_type, "");
+   

[Mesa-dev] [PATCH 06/23] glsl/ast/ir: add 64-bit integer constant support

2016-06-08 Thread Dave Airlie
From: Dave Airlie 

This adds support for 64-bit integer constants to the parser,
ast and ir.

Signed-off-by: Dave Airlie 
---
 src/compiler/glsl/ast.h  |  4 
 src/compiler/glsl/ast_to_hir.cpp | 14 ++
 src/compiler/glsl/glsl_lexer.ll  | 27 --
 src/compiler/glsl/glsl_parser.yy | 16 +++
 src/compiler/glsl/ir.cpp | 42 
 src/compiler/glsl/ir.h   |  6 ++
 6 files changed, 103 insertions(+), 6 deletions(-)

diff --git a/src/compiler/glsl/ast.h b/src/compiler/glsl/ast.h
index 06c7b03..5b4ab44 100644
--- a/src/compiler/glsl/ast.h
+++ b/src/compiler/glsl/ast.h
@@ -195,6 +195,8 @@ enum ast_operators {
ast_float_constant,
ast_bool_constant,
ast_double_constant,
+   ast_int64_constant,
+   ast_uint64_constant,
 
ast_sequence,
ast_aggregate
@@ -246,6 +248,8 @@ public:
   unsigned uint_constant;
   int bool_constant;
   double double_constant;
+  uint64_t uint64_constant;
+  int64_t int64_constant;
} primary_expression;
 
 
diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp
index b75ddbd..0cf0941 100644
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -1254,6 +1254,10 @@ constant_one_for_inc_dec(void *ctx, const glsl_type 
*type)
   return new(ctx) ir_constant((unsigned) 1);
case GLSL_TYPE_INT:
   return new(ctx) ir_constant(1);
+   case GLSL_TYPE_UINT64:
+  return new(ctx) ir_constant((uint64_t) 1);
+   case GLSL_TYPE_INT64:
+  return new(ctx) ir_constant((int64_t) 1);
default:
case GLSL_TYPE_FLOAT:
   return new(ctx) ir_constant(1.0f);
@@ -1973,6 +1977,14 @@ ast_expression::do_hir(exec_list *instructions,
   result = new(ctx) ir_constant(this->primary_expression.double_constant);
   break;
 
+   case ast_uint64_constant:
+  result = new(ctx) ir_constant(this->primary_expression.uint64_constant);
+  break;
+
+   case ast_int64_constant:
+  result = new(ctx) ir_constant(this->primary_expression.int64_constant);
+  break;
+
case ast_sequence: {
   /* It should not be possible to generate a sequence in the AST without
* any expressions in it.
@@ -2099,6 +2111,8 @@ ast_expression::has_sequence_subexpression() const
case ast_float_constant:
case ast_bool_constant:
case ast_double_constant:
+   case ast_int64_constant:
+   case ast_uint64_constant:
   return false;
 
case ast_aggregate:
diff --git a/src/compiler/glsl/glsl_lexer.ll b/src/compiler/glsl/glsl_lexer.ll
index 9c6d943..7145b23 100644
--- a/src/compiler/glsl/glsl_lexer.ll
+++ b/src/compiler/glsl/glsl_lexer.ll
@@ -107,17 +107,29 @@ literal_integer(char *text, int len, struct 
_mesa_glsl_parse_state *state,
 {
bool is_uint = (text[len - 1] == 'u' ||
   text[len - 1] == 'U');
+   bool is_long = (text[len - 1] == 'l' || text[len - 1] == 'L');
const char *digits = text;
 
+   if (is_long)
+  is_uint = (text[len - 2] == 'u' && text[len - 1] == 'l') ||
+(text[len - 2] == 'U' && text[len - 1] == 'L');
/* Skip "0x" */
if (base == 16)
   digits += 2;
 
unsigned long long value = strtoull(digits, NULL, base);
 
-   lval->n = (int)value;
+   if (is_long)
+  lval->n64 = (int64_t)value;
+   else
+  lval->n = (int)value;
 
-   if (value > UINT_MAX) {
+   if (is_long && !is_uint && base == 10 && value > (uint64_t)LLONG_MAX + 1) {
+  /* Tries to catch unintentionally providing a negative value. */
+  _mesa_glsl_warning(lloc, state,
+ "signed literal value `%s' is interpreted as %lld",
+ text, lval->n64);
+   } else if (!is_long && value > UINT_MAX) {
   /* Note that signed 0x is valid, not out of range! */
   if (state->is_version(130, 300)) {
 _mesa_glsl_error(lloc, state,
@@ -135,7 +147,10 @@ literal_integer(char *text, int len, struct 
_mesa_glsl_parse_state *state,
 "signed literal value `%s' is interpreted as %d",
 text, lval->n);
}
-   return is_uint ? UINTCONSTANT : INTCONSTANT;
+   if (is_long)
+  return is_uint ? UINT64CONSTANT : INT64CONSTANT;
+   else
+  return is_uint ? UINTCONSTANT : INTCONSTANT;
 }
 
 #define LITERAL_INTEGER(base) \
@@ -458,13 +473,13 @@ layout{
 \|=return OR_ASSIGN;
 -= return SUB_ASSIGN;
 
-[1-9][0-9]*[uU]?   {
+[1-9][0-9]*([uU]|[lL]|ul|UL)?  {
return LITERAL_INTEGER(10);
}
-0[xX][0-9a-fA-F]+[uU]? {
+0[xX][0-9a-fA-F]+([uU]|[lL]|ul|UL)?{
return LITERAL_INTEGER(16);
}
-0[0-7]*[uU]?   {
+0[0-7]*([uU]|[lL]|ul|UL)?  {
return LITERAL_INTEGER(8);
}
 
diff --git 

[Mesa-dev] [PATCH 12/23] glsl/ir_builder: add support for some 64-bit bitcasts.

2016-06-08 Thread Dave Airlie
From: Dave Airlie 

We need builder support to implement some of the builtins.

Signed-off-by: Dave Airlie 
---
 src/compiler/glsl/ir_builder.cpp | 24 
 src/compiler/glsl/ir_builder.h   |  6 ++
 2 files changed, 30 insertions(+)

diff --git a/src/compiler/glsl/ir_builder.cpp b/src/compiler/glsl/ir_builder.cpp
index d68647f..d78ff39 100644
--- a/src/compiler/glsl/ir_builder.cpp
+++ b/src/compiler/glsl/ir_builder.cpp
@@ -512,6 +512,30 @@ b2f(operand a)
return expr(ir_unop_b2f, a);
 }
 
+ir_expression*
+bitcast_d2i64(operand a)
+{
+   return expr(ir_unop_bitcast_d2i64, a);
+}
+
+ir_expression*
+bitcast_d2u64(operand a)
+{
+   return expr(ir_unop_bitcast_d2u64, a);
+}
+
+ir_expression*
+bitcast_i642d(operand a)
+{
+   return expr(ir_unop_bitcast_i642d, a);
+}
+
+ir_expression*
+bitcast_u642d(operand a)
+{
+   return expr(ir_unop_bitcast_u642d, a);
+}
+
 ir_expression *
 interpolate_at_centroid(operand a)
 {
diff --git a/src/compiler/glsl/ir_builder.h b/src/compiler/glsl/ir_builder.h
index b483ebf..6691504 100644
--- a/src/compiler/glsl/ir_builder.h
+++ b/src/compiler/glsl/ir_builder.h
@@ -190,6 +190,12 @@ ir_expression *f2d(operand a);
 ir_expression *i2d(operand a);
 ir_expression *u2d(operand a);
 
+ir_expression *bitcast_d2i64(operand a);
+ir_expression *bitcast_d2u64(operand a);
+
+ir_expression *bitcast_i642d(operand a);
+ir_expression *bitcast_u642d(operand a);
+
 ir_expression *min2(operand a, operand b);
 ir_expression *max2(operand a, operand b);
 
-- 
2.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/23] glsl/varyings: add 64-bit integer support.

2016-06-08 Thread Dave Airlie
From: Dave Airlie 

This adds 64-bit ints to the link_varyings 64-bit support.

Signed-off-by: Dave Airlie 
---
 src/compiler/glsl/link_varyings.h | 8 
 1 file changed, 8 insertions(+)

diff --git a/src/compiler/glsl/link_varyings.h 
b/src/compiler/glsl/link_varyings.h
index 2126a5c..0b67a15 100644
--- a/src/compiler/glsl/link_varyings.h
+++ b/src/compiler/glsl/link_varyings.h
@@ -176,6 +176,14 @@ private:
   case GL_DOUBLE_MAT4:
   case GL_DOUBLE_MAT4x2:
   case GL_DOUBLE_MAT4x3:
+  case GL_INT64_ARB:
+  case GL_INT64_VEC2_ARB:
+  case GL_INT64_VEC3_ARB:
+  case GL_INT64_VEC4_ARB:
+  case GL_UNSIGNED_INT64_ARB:
+  case GL_UNSIGNED_INT64_VEC2_ARB:
+  case GL_UNSIGNED_INT64_VEC3_ARB:
+  case GL_UNSIGNED_INT64_VEC4_ARB:
  return true;
   default:
  return false;
-- 
2.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 22/23] tgsi/softpipe: enable ARB_gpu_shader_int64 support.

2016-06-08 Thread Dave Airlie
From: Dave Airlie 

This adds all the opcodes to tgsi_exec for softpipe to use.

It also enables the cap.

Signed-off-by: Dave Airlie 
---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 104 +
 src/gallium/auxiliary/tgsi/tgsi_exec.h |   1 +
 2 files changed, 105 insertions(+)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c 
b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index c929475..972795d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -3638,6 +3638,10 @@ exec_endswitch(struct tgsi_exec_machine *mach)
 typedef void (* micro_dop)(union tgsi_double_channel *dst,
const union tgsi_double_channel *src);
 
+typedef void (* micro_dop_sop)(union tgsi_double_channel *dst,
+   const union tgsi_double_channel *src0,
+   union tgsi_exec_channel *src1);
+
 static void
 fetch_double_channel(struct tgsi_exec_machine *mach,
  union tgsi_double_channel *chan,
@@ -3961,6 +3965,32 @@ exec_dfracexp(struct tgsi_exec_machine *mach,
}
 }
 
+static void
+exec_arg0_64_arg1_32(struct tgsi_exec_machine *mach,
+const struct tgsi_full_instruction *inst,
+micro_dop_sop op)
+{
+   union tgsi_double_channel src0;
+   union tgsi_exec_channel src1;
+   union tgsi_double_channel dst;
+   int wmask;
+
+   wmask = inst->Dst[0].Register.WriteMask;
+   if (wmask & TGSI_WRITEMASK_XY) {
+  fetch_double_channel(mach, , >Src[0], TGSI_CHAN_X, 
TGSI_CHAN_Y);
+  fetch_source(mach, , >Src[1], TGSI_CHAN_X, 
TGSI_EXEC_DATA_INT);
+  op(, , );
+  store_double_channel(mach, , >Dst[0], inst, TGSI_CHAN_X, 
TGSI_CHAN_Y);
+   }
+
+   if (wmask & TGSI_WRITEMASK_ZW) {
+  fetch_double_channel(mach, , >Src[0], TGSI_CHAN_Z, 
TGSI_CHAN_W);
+  fetch_source(mach, , >Src[1], TGSI_CHAN_Z, 
TGSI_EXEC_DATA_INT);
+  op(, , );
+  store_double_channel(mach, , >Dst[0], inst, TGSI_CHAN_Z, 
TGSI_CHAN_W);
+   }
+}
+
 static int
 get_image_coord_dim(unsigned tgsi_tex)
 {
@@ -6042,6 +6072,80 @@ exec_instruction(
case TGSI_OPCODE_MEMBAR:
   return TRUE;
   break;
+
+   case TGSI_OPCODE_I64ABS:
+  exec_double_unary(mach, inst, micro_i64abs);
+  break;
+
+   case TGSI_OPCODE_I64SSG:
+  exec_double_unary(mach, inst, micro_i64sgn);
+  break;
+
+   case TGSI_OPCODE_I64NEG:
+  exec_double_unary(mach, inst, micro_i64neg);
+  break;
+
+   case TGSI_OPCODE_U64SEQ:
+  exec_double_binary(mach, inst, micro_u64seq, TGSI_EXEC_DATA_UINT);
+  break;
+
+   case TGSI_OPCODE_U64SNE:
+  exec_double_binary(mach, inst, micro_u64sne, TGSI_EXEC_DATA_UINT);
+  break;
+
+   case TGSI_OPCODE_I64SLT:
+  exec_double_binary(mach, inst, micro_i64slt, TGSI_EXEC_DATA_UINT);
+  break;
+   case TGSI_OPCODE_U64SLT:
+  exec_double_binary(mach, inst, micro_u64slt, TGSI_EXEC_DATA_UINT);
+  break;
+
+   case TGSI_OPCODE_I64SGE:
+  exec_double_binary(mach, inst, micro_i64sge, TGSI_EXEC_DATA_UINT);
+  break;
+   case TGSI_OPCODE_U64SGE:
+  exec_double_binary(mach, inst, micro_u64sge, TGSI_EXEC_DATA_UINT);
+  break;
+
+   case TGSI_OPCODE_I64MIN:
+  exec_double_binary(mach, inst, micro_i64min, TGSI_EXEC_DATA_INT64);
+  break;
+   case TGSI_OPCODE_U64MIN:
+  exec_double_binary(mach, inst, micro_u64min, TGSI_EXEC_DATA_UINT64);
+  break;
+   case TGSI_OPCODE_I64MAX:
+  exec_double_binary(mach, inst, micro_i64max, TGSI_EXEC_DATA_INT64);
+  break;
+   case TGSI_OPCODE_U64MAX:
+  exec_double_binary(mach, inst, micro_u64max, TGSI_EXEC_DATA_UINT64);
+  break;
+   case TGSI_OPCODE_U64ADD:
+  exec_double_binary(mach, inst, micro_u64add, TGSI_EXEC_DATA_UINT64);
+  break;
+   case TGSI_OPCODE_U64MUL:
+  exec_double_binary(mach, inst, micro_u64mul, TGSI_EXEC_DATA_UINT64);
+  break;
+   case TGSI_OPCODE_U64SHL:
+  exec_arg0_64_arg1_32(mach, inst, micro_u64shl);
+  break;
+   case TGSI_OPCODE_I64SHR:
+  exec_arg0_64_arg1_32(mach, inst, micro_i64shr);
+  break;
+   case TGSI_OPCODE_U64SHR:
+  exec_arg0_64_arg1_32(mach, inst, micro_u64shr);
+  break;
+   case TGSI_OPCODE_U64DIV:
+  exec_double_binary(mach, inst, micro_u64div, TGSI_EXEC_DATA_UINT64);
+  break;
+   case TGSI_OPCODE_I64DIV:
+  exec_double_binary(mach, inst, micro_i64div, TGSI_EXEC_DATA_INT64);
+  break;
+   case TGSI_OPCODE_U64MOD:
+  exec_double_binary(mach, inst, micro_u64mod, TGSI_EXEC_DATA_UINT64);
+  break;
+   case TGSI_OPCODE_I64MOD:
+  exec_double_binary(mach, inst, micro_i64mod, TGSI_EXEC_DATA_INT64);
+  break;
default:
   assert( 0 );
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h 
b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index 9343d78..018b9dd 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -529,6 +529,7 @@ 

[Mesa-dev] [PATCH 17/23] gallium: add integer 64 capability.

2016-06-08 Thread Dave Airlie
From: Dave Airlie 

TODO: go back and fill this out to avoid warnings.

Signed-off-by: Dave Airlie 
---
 src/gallium/include/pipe/p_defines.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/include/pipe/p_defines.h 
b/src/gallium/include/pipe/p_defines.h
index 569725f..1e1b875 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -799,6 +799,7 @@ enum pipe_shader_cap
PIPE_SHADER_CAP_MAX_SHADER_BUFFERS,
PIPE_SHADER_CAP_SUPPORTED_IRS,
PIPE_SHADER_CAP_MAX_SHADER_IMAGES,
+   PIPE_SHADER_CAP_INT64,
 };
 
 /**
-- 
2.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 13/23] glsl: add 64-bit integer support to some operations.

2016-06-08 Thread Dave Airlie
From: Dave Airlie 

This adds 64-bit integer support to some AST and IR operations
where it is needed.

Signed-off-by: Dave Airlie 
---
 src/compiler/glsl/ast_to_hir.cpp  | 12 ++--
 src/compiler/glsl/ir_validate.cpp |  4 ++--
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp
index abcd491..1c64a4b 100644
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -532,12 +532,12 @@ bit_logic_result_type(ir_rvalue * _a, ir_rvalue * 
_b,
 * (|). The operands must be of type signed or unsigned integers or
 * integer vectors."
 */
-   if (!type_a->is_integer()) {
+   if (!type_a->is_integer_32_64()) {
   _mesa_glsl_error(loc, state, "LHS of `%s' must be an integer",
 ast_expression::operator_string(op));
   return glsl_type::error_type;
}
-   if (!type_b->is_integer()) {
+   if (!type_b->is_integer_32_64()) {
   _mesa_glsl_error(loc, state, "RHS of `%s' must be an integer",
ast_expression::operator_string(op));
   return glsl_type::error_type;
@@ -618,11 +618,11 @@ modulus_result_type(ir_rvalue * _a, ir_rvalue * 
_b,
 *"The operator modulus (%) operates on signed or unsigned integers or
 *integer vectors."
 */
-   if (!type_a->is_integer()) {
+   if (!type_a->is_integer_32_64()) {
   _mesa_glsl_error(loc, state, "LHS of operator %% must be an integer");
   return glsl_type::error_type;
}
-   if (!type_b->is_integer()) {
+   if (!type_b->is_integer_32_64()) {
   _mesa_glsl_error(loc, state, "RHS of operator %% must be an integer");
   return glsl_type::error_type;
}
@@ -740,7 +740,7 @@ shift_result_type(const struct glsl_type *type_a,
 * must be signed or unsigned integers or integer vectors. One operand
 * can be signed while the other is unsigned."
 */
-   if (!type_a->is_integer()) {
+   if (!type_a->is_integer_32_64()) {
   _mesa_glsl_error(loc, state, "LHS of operator %s must be an integer or "
"integer vector", ast_expression::operator_string(op));
  return glsl_type::error_type;
@@ -1552,7 +1552,7 @@ ast_expression::do_hir(exec_list *instructions,
  error_emitted = true;
   }
 
-  if (!op[0]->type->is_integer()) {
+  if (!op[0]->type->is_integer_32_64()) {
  _mesa_glsl_error(, state, "operand of `~' must be an integer");
  error_emitted = true;
   }
diff --git a/src/compiler/glsl/ir_validate.cpp 
b/src/compiler/glsl/ir_validate.cpp
index 91a83c2..a811f82 100644
--- a/src/compiler/glsl/ir_validate.cpp
+++ b/src/compiler/glsl/ir_validate.cpp
@@ -650,7 +650,7 @@ ir_validate::visit_leave(ir_expression *ir)
 
case ir_binop_lshift:
case ir_binop_rshift:
-  assert(ir->operands[0]->type->is_integer() &&
+  assert(ir->operands[0]->type->is_integer_32_64() &&
  ir->operands[1]->type->is_integer());
   if (ir->operands[0]->type->is_scalar()) {
   assert(ir->operands[1]->type->is_scalar());
@@ -668,7 +668,7 @@ ir_validate::visit_leave(ir_expression *ir)
case ir_binop_bit_or:
assert(ir->operands[0]->type->base_type ==
   ir->operands[1]->type->base_type);
-   assert(ir->type->is_integer());
+   assert(ir->type->is_integer_32_64());
if (ir->operands[0]->type->is_vector() &&
ir->operands[1]->type->is_vector()) {
assert(ir->operands[0]->type->vector_elements ==
-- 
2.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 08/23] glsl: add 64-bit integer support to uniform initialiser code

2016-06-08 Thread Dave Airlie
From: Dave Airlie 

Just add support to the double case, same code should work.

Signed-off-by: Dave Airlie 
---
 src/compiler/glsl/link_uniform_initializers.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/compiler/glsl/link_uniform_initializers.cpp 
b/src/compiler/glsl/link_uniform_initializers.cpp
index bec2dae..401e8fc 100644
--- a/src/compiler/glsl/link_uniform_initializers.cpp
+++ b/src/compiler/glsl/link_uniform_initializers.cpp
@@ -64,6 +64,8 @@ copy_constant_to_storage(union gl_constant_value *storage,
 storage[i].f = val->value.f[i];
 break;
   case GLSL_TYPE_DOUBLE:
+  case GLSL_TYPE_UINT64:
+  case GLSL_TYPE_INT64:
  /* XXX need to check on big-endian */
  storage[i * 2].u = *(uint32_t *)>value.d[i];
  storage[i * 2 + 1].u = *(((uint32_t *)>value.d[i]) + 1);
@@ -71,8 +73,6 @@ copy_constant_to_storage(union gl_constant_value *storage,
   case GLSL_TYPE_BOOL:
 storage[i].b = val->value.b[i] ? boolean_true : 0;
 break;
-  case GLSL_TYPE_INT64:
-  case GLSL_TYPE_UINT64:
   case GLSL_TYPE_ARRAY:
   case GLSL_TYPE_STRUCT:
   case GLSL_TYPE_IMAGE:
-- 
2.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 14/23] glsl/ast: add 64-bit integer support in some places.

2016-06-08 Thread Dave Airlie
From: Dave Airlie 

Just add support in two more places in ast parsing.

Signed-off-by: Dave Airlie 
---
 src/compiler/glsl/ast_to_hir.cpp | 5 +
 1 file changed, 5 insertions(+)

diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp
index 1c64a4b..f1d2da7 100644
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -3784,6 +3784,8 @@ apply_type_qualifier_to_variable(const struct 
ast_type_qualifier *qual,
   "varying variables may not be of type struct");
  break;
   case GLSL_TYPE_DOUBLE:
+  case GLSL_TYPE_UINT64:
+  case GLSL_TYPE_INT64:
  break;
   default:
  _mesa_glsl_error(loc, state, "illegal type for a varying variable");
@@ -4807,6 +4809,9 @@ ast_declarator_list::hir(exec_list *instructions,
 switch (check_type->base_type) {
 case GLSL_TYPE_FLOAT:
 break;
+case GLSL_TYPE_UINT64:
+case GLSL_TYPE_INT64:
+   break;
 case GLSL_TYPE_UINT:
 case GLSL_TYPE_INT:
if (state->is_version(120, 300))
-- 
2.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 20/23] st/glsl_to_tgsi: add support for 64-bit integers.

2016-06-08 Thread Dave Airlie
From: Dave Airlie 

This is incomplete in the conversions area.

Signed-off-by: Dave Airlie 
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 105 -
 1 file changed, 87 insertions(+), 18 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 2216bdf..0d05872 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -876,6 +876,10 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, 
unsigned op,
 
if (is_resource_instruction(op))
   type = src1.type;
+   else if (src0.type == GLSL_TYPE_INT64 || src1.type == GLSL_TYPE_INT64)
+  type = GLSL_TYPE_INT64;
+   else if (src0.type == GLSL_TYPE_UINT64 || src1.type == GLSL_TYPE_UINT64)
+  type = GLSL_TYPE_UINT64;
else if (src0.type == GLSL_TYPE_DOUBLE || src1.type == GLSL_TYPE_DOUBLE)
   type = GLSL_TYPE_DOUBLE;
else if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
@@ -883,6 +887,21 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, 
unsigned op,
else if (native_integers)
   type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type;
 
+#define case7(c, f, i, u, d, i64, ui64) \
+   case TGSI_OPCODE_##c: \
+  if (type == GLSL_TYPE_UINT64)   \
+ op = TGSI_OPCODE_##ui64; \
+  else if (type == GLSL_TYPE_INT64)   \
+ op = TGSI_OPCODE_##i64; \
+  else if (type == GLSL_TYPE_DOUBLE)   \
+ op = TGSI_OPCODE_##d; \
+  else if (type == GLSL_TYPE_INT)   \
+ op = TGSI_OPCODE_##i; \
+  else if (type == GLSL_TYPE_UINT) \
+ op = TGSI_OPCODE_##u; \
+  else \
+ op = TGSI_OPCODE_##f; \
+  break;
 #define case5(c, f, i, u, d)\
case TGSI_OPCODE_##c: \
   if (type == GLSL_TYPE_DOUBLE)   \
@@ -906,14 +925,22 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, 
unsigned op,
   break;
 
 #define case3(f, i, u)  case4(f, f, i, u)
-#define case4d(f, i, u, d)  case5(f, f, i, u, d)
+#define case6d(f, i, u, d, i64, u64)  case7(f, f, i, u, d, i64, u64)
 #define case3fid(f, i, d) case5(f, f, i, i, d)
+#define case3fid64(f, i, d, i64) case7(f, f, i, i, d, i64, i64)
 #define case2fi(f, i)   case4(f, f, i, i)
 #define case2iu(i, u)   case4(i, LAST, i, u)
 
-#define casecomp(c, f, i, u, d)   \
+#define case2iu64(i, i64)   case7(i, LAST, i, i, LAST, i64, i64)
+#define case4iu64(i, u, i64, u64)   case7(i, LAST, i, u, LAST, i64, u64)
+
+#define casecomp(c, f, i, u, d, i64, ui64)   \
case TGSI_OPCODE_##c: \
-  if (type == GLSL_TYPE_DOUBLE) \
+  if (type == GLSL_TYPE_INT64) \
+ op = TGSI_OPCODE_##i64; \
+  else if (type == GLSL_TYPE_UINT64)\
+ op = TGSI_OPCODE_##ui64; \
+  else if (type == GLSL_TYPE_DOUBLE)   \
  op = TGSI_OPCODE_##d; \
   else if (type == GLSL_TYPE_INT || type == GLSL_TYPE_SUBROUTINE)   \
  op = TGSI_OPCODE_##i; \
@@ -926,24 +953,25 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, 
unsigned op,
   break;
 
switch(op) {
-  case3fid(ADD, UADD, DADD);
-  case3fid(MUL, UMUL, DMUL);
+  case3fid64(ADD, UADD, DADD, U64ADD);
+  case3fid64(MUL, UMUL, DMUL, U64MUL);
   case3fid(MAD, UMAD, DMAD);
   case3fid(FMA, UMAD, DFMA);
-  case3(DIV, IDIV, UDIV);
-  case4d(MAX, IMAX, UMAX, DMAX);
-  case4d(MIN, IMIN, UMIN, DMIN);
-  case2iu(MOD, UMOD);
+  case6d(DIV, IDIV, UDIV, LAST, I64DIV, U64DIV);
+  case6d(MAX, IMAX, UMAX, DMAX, I64MAX, U64MAX);
+  case6d(MIN, IMIN, UMIN, DMIN, I64MIN, U64MIN);
+  case4iu64(MOD, UMOD, I64MOD, U64MOD);
 
-  casecomp(SEQ, FSEQ, USEQ, USEQ, DSEQ);
-  casecomp(SNE, FSNE, USNE, USNE, DSNE);
-  casecomp(SGE, FSGE, ISGE, USGE, DSGE);
-  casecomp(SLT, FSLT, ISLT, USLT, DSLT);
+  casecomp(SEQ, FSEQ, USEQ, USEQ, DSEQ, U64SEQ, U64SEQ);
+  casecomp(SNE, FSNE, USNE, USNE, DSNE, U64SNE, U64SNE);
+  casecomp(SGE, FSGE, ISGE, USGE, DSGE, I64SGE, U64SGE);
+  casecomp(SLT, FSLT, ISLT, USLT, DSLT, I64SLT, U64SLT);
 
-  case2iu(ISHR, USHR);
+  case2iu64(SHL, U64SHL);
+  case4iu64(ISHR, USHR, I64SHR, U64SHR);
 
-  case3fid(SSG, ISSG, DSSG);
-  case3fid(ABS, IABS, DABS);
+  case3fid64(SSG, ISSG, DSSG, I64SSG);
+  case3fid64(ABS, IABS, DABS, I64ABS);
 
   case2iu(IBFE, UBFE);
   case2iu(IMSB, UMSB);
@@ -1075,7 +1103,9 @@ glsl_to_tgsi_visitor::add_constant(gl_register_file file,
 
int index = 0;
immediate_storage *entry;
-   int size32 = size * (datatype == GL_DOUBLE ? 2 : 1);
+   int size32 = size * ((datatype == GL_DOUBLE ||
+ datatype == GL_INT64_ARB ||
+ datatype == GL_UNSIGNED_INT64_ARB)? 2 : 1);
int i;
 
/* Search immediate storage to see if we already have an identical
@@ -1566,7 +1596,9 

[Mesa-dev] [PATCH 16/23] glsl: add 64-bit integer functions.

2016-06-08 Thread Dave Airlie
From: Dave Airlie 

These are all the allowed 64-bit functions from ARB_gpu_shader_int64
spec.

Signed-off-by: Dave Airlie 
---
 src/compiler/glsl/builtin_functions.cpp | 173 +++-
 1 file changed, 170 insertions(+), 3 deletions(-)

diff --git a/src/compiler/glsl/builtin_functions.cpp 
b/src/compiler/glsl/builtin_functions.cpp
index db0dcb6..2bb 100644
--- a/src/compiler/glsl/builtin_functions.cpp
+++ b/src/compiler/glsl/builtin_functions.cpp
@@ -510,6 +510,12 @@ fp64(const _mesa_glsl_parse_state *state)
 }
 
 static bool
+int64(const _mesa_glsl_parse_state *state)
+{
+   return state->has_int64();
+}
+
+static bool
 compute_shader(const _mesa_glsl_parse_state *state)
 {
return state->stage == MESA_SHADER_COMPUTE;
@@ -704,6 +710,12 @@ private:
B1(floatBitsToUint)
B1(intBitsToFloat)
B1(uintBitsToFloat)
+
+   B1(doubleBitsToInt64)
+   B1(doubleBitsToUint64)
+   B1(int64BitsToDouble)
+   B1(uint64BitsToDouble)
+
ir_function_signature *_packUnorm2x16(builtin_available_predicate avail);
ir_function_signature *_packSnorm2x16(builtin_available_predicate avail);
ir_function_signature *_packUnorm4x8(builtin_available_predicate avail);
@@ -716,6 +728,10 @@ private:
ir_function_signature *_unpackHalf2x16(builtin_available_predicate avail);
ir_function_signature *_packDouble2x32(builtin_available_predicate avail);
ir_function_signature *_unpackDouble2x32(builtin_available_predicate avail);
+   ir_function_signature *_packInt2x32(builtin_available_predicate avail);
+   ir_function_signature *_unpackInt2x32(builtin_available_predicate avail);
+   ir_function_signature *_packUint2x32(builtin_available_predicate avail);
+   ir_function_signature *_unpackUint2x32(builtin_available_predicate avail);
 
BA1(length)
BA1(distance);
@@ -1140,7 +1156,7 @@ builtin_builder::create_builtins()
 _##NAME(glsl_type::ivec4_type), \
 NULL);
 
-#define FID(NAME)\
+#define FI64(NAME)\
add_function(#NAME,  \
 _##NAME(always_available, glsl_type::float_type), \
 _##NAME(always_available, glsl_type::vec2_type),  \
@@ -1154,6 +1170,10 @@ builtin_builder::create_builtins()
 _##NAME(fp64, glsl_type::dvec2_type),  \
 _##NAME(fp64, glsl_type::dvec3_type),  \
 _##NAME(fp64, glsl_type::dvec4_type),  \
+_##NAME(int64, glsl_type::int64_t_type), \
+_##NAME(int64, glsl_type::i64vec2_type),  \
+_##NAME(int64, glsl_type::i64vec3_type),  \
+_##NAME(int64, glsl_type::i64vec4_type),  \
 NULL);
 
 #define FIUD(NAME) \
@@ -1176,6 +1196,14 @@ builtin_builder::create_builtins()
 _##NAME(fp64, glsl_type::dvec2_type),  \
 _##NAME(fp64, glsl_type::dvec3_type),  \
 _##NAME(fp64, glsl_type::dvec4_type),  \
+_##NAME(int64, glsl_type::int64_t_type), \
+_##NAME(int64, glsl_type::i64vec2_type),  \
+_##NAME(int64, glsl_type::i64vec3_type),  \
+_##NAME(int64, glsl_type::i64vec4_type),  \
+_##NAME(int64, glsl_type::uint64_t_type), \
+_##NAME(int64, glsl_type::u64vec2_type),  \
+_##NAME(int64, glsl_type::u64vec3_type),  \
+_##NAME(int64, glsl_type::u64vec4_type),  \
 NULL);
 
 #define IU(NAME)\
@@ -1217,6 +1245,14 @@ builtin_builder::create_builtins()
 _##NAME(fp64, glsl_type::dvec2_type), \
 _##NAME(fp64, glsl_type::dvec3_type), \
 _##NAME(fp64, glsl_type::dvec4_type), \
+_##NAME(int64, glsl_type::int64_t_type), \
+_##NAME(int64, glsl_type::i64vec2_type),  \
+_##NAME(int64, glsl_type::i64vec3_type),  \
+_##NAME(int64, glsl_type::i64vec4_type),  \
+_##NAME(int64, glsl_type::uint64_t_type), \
+_##NAME(int64, glsl_type::u64vec2_type),  \
+_##NAME(int64, glsl_type::u64vec3_type),  \
+_##NAME(int64, glsl_type::u64vec4_type),  \
 NULL);
 
 #define FIUD2_MIXED(NAME)  
   \
@@ -1255,6 +1291,21 @@ builtin_builder::create_builtins()
 _##NAME(fp64, glsl_type::dvec2_type, glsl_type::dvec2_type),   
\
 _##NAME(fp64, glsl_type::dvec3_type, glsl_type::dvec3_type),   
\
 _##NAME(fp64, glsl_type::dvec4_type, glsl_type::dvec4_type),   
\
+\
+_##NAME(int64, glsl_type::int64_t_type, 
glsl_type::int64_t_type),   \
+  

[Mesa-dev] [PATCH 05/23] mesa: add support for 64-bit integer uniforms.

2016-06-08 Thread Dave Airlie
From: Dave Airlie 

This hooks up the API to the internals for 64-bit integer uniforms.

Signed-off-by: Dave Airlie 
---
 src/mesa/main/uniform_query.cpp |  60 +-
 src/mesa/main/uniforms.c| 170 +++-
 2 files changed, 226 insertions(+), 4 deletions(-)

diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp
index eea611b..169db46 100644
--- a/src/mesa/main/uniform_query.cpp
+++ b/src/mesa/main/uniform_query.cpp
@@ -332,7 +332,8 @@ _mesa_get_uniform(struct gl_context *ctx, GLuint program, 
GLint location,
 >storage[offset * elements * dmul];
 
   assert(returnType == GLSL_TYPE_FLOAT || returnType == GLSL_TYPE_INT ||
- returnType == GLSL_TYPE_UINT || returnType == GLSL_TYPE_DOUBLE);
+ returnType == GLSL_TYPE_UINT || returnType == GLSL_TYPE_DOUBLE ||
+ returnType == GLSL_TYPE_UINT64 || returnType == GLSL_TYPE_INT64);
 
   /* doubles have a different size than the other 3 types */
   unsigned bytes = sizeof(src[0]) * elements * rmul;
@@ -354,7 +355,11 @@ _mesa_get_uniform(struct gl_context *ctx, GLuint program, 
GLint location,
  (uni->type->base_type == GLSL_TYPE_INT
   || uni->type->base_type == GLSL_TYPE_UINT
|| uni->type->base_type == GLSL_TYPE_SAMPLER
-   || uni->type->base_type == GLSL_TYPE_IMAGE))) {
+   || uni->type->base_type == GLSL_TYPE_IMAGE))
+  || ((returnType == GLSL_TYPE_UINT64 ||
+   returnType == GLSL_TYPE_INT64 ) &&
+  (uni->type->base_type == GLSL_TYPE_UINT64 ||
+   uni->type->base_type == GLSL_TYPE_INT64))) {
 memcpy(paramsOut, src, bytes);
   } else {
 union gl_constant_value *const dst =
@@ -384,6 +389,12 @@ _mesa_get_uniform(struct gl_context *ctx, GLuint program, 
GLint location,
   case GLSL_TYPE_DOUBLE:
  dst[didx].f = *(double *)[sidx].f;
  break;
+   case GLSL_TYPE_UINT64:
+ dst[didx].f = *(uint64_t *)[sidx].u;
+ break;
+   case GLSL_TYPE_INT64:
+  dst[didx].f = *(int64_t *)[sidx].i;
+ break;
   default:
  assert(!"Should not get here.");
  break;
@@ -405,6 +416,12 @@ _mesa_get_uniform(struct gl_context *ctx, GLuint program, 
GLint location,
   case GLSL_TYPE_FLOAT:
  *(double *)[didx].f = (double) src[sidx].f;
  break;
+   case GLSL_TYPE_UINT64:
+ *(double *)[didx].f = *(uint64_t *)[sidx].u;
+  break;
+   case GLSL_TYPE_INT64:
+ *(double *)[didx].f = *(int64_t *)[sidx].i;
+  break;
   default:
  assert(!"Should not get here.");
  break;
@@ -439,12 +456,41 @@ _mesa_get_uniform(struct gl_context *ctx, GLuint program, 
GLint location,
   case GLSL_TYPE_DOUBLE:
  dst[didx].i = IROUNDD(*(double *)[sidx].f);
  break;
+   case GLSL_TYPE_UINT64:
+  dst[didx].i = *(uint64_t *)[sidx].u;
+  break;
+   case GLSL_TYPE_INT64:
+  dst[didx].i = *(int64_t *)[sidx].i;
+  break;
   default:
  assert(!"Should not get here.");
  break;
   }
   break;
+case GLSL_TYPE_INT64:
+case GLSL_TYPE_UINT64:
+   switch (uni->type->base_type) {
+  case GLSL_TYPE_UINT:
+ *(int64_t *)[didx].u = (int64_t) src[sidx].u;
+ break;
+  case GLSL_TYPE_INT:
+  case GLSL_TYPE_SAMPLER:
+  case GLSL_TYPE_IMAGE:
+ *(int64_t *)[didx].u = (int64_t) src[sidx].i;
+ break;
+  case GLSL_TYPE_BOOL:
+ *(int64_t *)[didx].u = src[sidx].i ? 1.0f : 0.0f;
+ break;
+  case GLSL_TYPE_FLOAT:
+ *(int64_t *)[didx].u = (int64_t) src[sidx].f;
+ break;
+  default:
+ assert(!"Should not get here.");
+ break;
+  }
+   break;
 
+   break;
default:
   assert(!"Should not get here.");
   break;
@@ -482,6 +528,12 @@ log_uniform(const void *values, enum glsl_base_type 
basicType,
   case GLSL_TYPE_INT:
 printf("%d ", v[i].i);
 break;
+  case GLSL_TYPE_UINT64:
+printf("%lu ", *(uint64_t* )[i * 2].u);
+break;
+  case GLSL_TYPE_INT64:
+printf("%ld ", *(int64_t* )[i * 2].u);
+break;
   case GLSL_TYPE_FLOAT:
 printf("%g ", v[i].f);
 break;
@@ -633,6 +685,10 @@ glsl_type_name(enum glsl_base_type 

[Mesa-dev] [PATCH 10/23] glsl: add 64-bit integer support for constant expressions

2016-06-08 Thread Dave Airlie
From: Dave Airlie 

This just adds the new operations and add 64-bit integer
support to all the existing cases where it is needed.

Signed-off-by: Dave Airlie 
---
 src/compiler/glsl/ir_constant_expression.cpp | 324 +++
 1 file changed, 324 insertions(+)

diff --git a/src/compiler/glsl/ir_constant_expression.cpp 
b/src/compiler/glsl/ir_constant_expression.cpp
index fbbf779..e6f5668 100644
--- a/src/compiler/glsl/ir_constant_expression.cpp
+++ b/src/compiler/glsl/ir_constant_expression.cpp
@@ -88,6 +88,42 @@ bitcast_f2u(float f)
return u;
 }
 
+static double
+bitcast_u642d(uint64_t u)
+{
+   assert(sizeof(double) == sizeof(uint64_t));
+   double d;
+   memcpy(, , sizeof(d));
+   return d;
+}
+
+static double
+bitcast_i642d(int64_t i)
+{
+   assert(sizeof(double) == sizeof(int64_t));
+   double d;
+   memcpy(, , sizeof(d));
+   return d;
+}
+
+static double
+bitcast_d2u64(double d)
+{
+   assert(sizeof(double) == sizeof(uint64_t));
+   uint64_t u;
+   memcpy(, , sizeof(d));
+   return u;
+}
+
+static double
+bitcast_d2i64(double d)
+{
+   assert(sizeof(double) == sizeof(int64_t));
+   int64_t i;
+   memcpy(, , sizeof(d));
+   return i;
+}
+
 /**
  * Evaluate one component of a floating-point 4x8 unpacking function.
  */
@@ -690,6 +726,162 @@ ir_expression::constant_expression_value(struct 
hash_table *variable_context)
  data.b[c] = op[0]->value.d[c] != 0.0;
   }
   break;
+   case ir_unop_bitcast_u642d:
+  assert(op[0]->type->base_type == GLSL_TYPE_UINT64);
+  for (unsigned c = 0; c < op[0]->type->components(); c++) {
+data.d[c] = bitcast_u642d(op[0]->value.u64[c]);
+  }
+  break;
+   case ir_unop_bitcast_i642d:
+  assert(op[0]->type->base_type == GLSL_TYPE_INT64);
+  for (unsigned c = 0; c < op[0]->type->components(); c++) {
+data.d[c] = bitcast_i642d(op[0]->value.i64[c]);
+  }
+  break;
+   case ir_unop_bitcast_d2u64:
+  assert(op[0]->type->base_type == GLSL_TYPE_DOUBLE);
+  for (unsigned c = 0; c < op[0]->type->components(); c++) {
+data.u64[c] = bitcast_d2u64(op[0]->value.d[c]);
+  }
+  break;
+   case ir_unop_bitcast_d2i64:
+  assert(op[0]->type->base_type == GLSL_TYPE_DOUBLE);
+  for (unsigned c = 0; c < op[0]->type->components(); c++) {
+data.i64[c] = bitcast_d2i64(op[0]->value.d[c]);
+  }
+  break;
+   case ir_unop_i642i:
+  assert(op[0]->type->base_type == GLSL_TYPE_INT64);
+  for (unsigned c = 0; c < op[0]->type->components(); c++) {
+data.i[c] = op[0]->value.i64[c];
+  }
+  break;
+   case ir_unop_u642i:
+  assert(op[0]->type->base_type == GLSL_TYPE_UINT64);
+  for (unsigned c = 0; c < op[0]->type->components(); c++) {
+data.i[c] = op[0]->value.u64[c];
+  }
+  break;
+   case ir_unop_i642u:
+  assert(op[0]->type->base_type == GLSL_TYPE_INT64);
+  for (unsigned c = 0; c < op[0]->type->components(); c++) {
+data.u[c] = op[0]->value.i64[c];
+  }
+  break;
+   case ir_unop_u642u:
+  assert(op[0]->type->base_type == GLSL_TYPE_UINT64);
+  for (unsigned c = 0; c < op[0]->type->components(); c++) {
+data.u[c] = op[0]->value.u64[c];
+  }
+  break;
+   case ir_unop_i642b:
+  assert(op[0]->type->base_type == GLSL_TYPE_INT64);
+  for (unsigned c = 0; c < op[0]->type->components(); c++) {
+data.b[c] = op[0]->value.i64[c] != 0;
+  }
+  break;
+   case ir_unop_u642b:
+  assert(op[0]->type->base_type == GLSL_TYPE_UINT64);
+  for (unsigned c = 0; c < op[0]->type->components(); c++) {
+data.b[c] = op[0]->value.u64[c] != 0;
+  }
+  break;
+   case ir_unop_i642f:
+  assert(op[0]->type->base_type == GLSL_TYPE_INT64);
+  for (unsigned c = 0; c < op[0]->type->components(); c++) {
+data.f[c] = op[0]->value.i64[c];
+  }
+  break;
+   case ir_unop_u642f:
+  assert(op[0]->type->base_type == GLSL_TYPE_UINT64);
+  for (unsigned c = 0; c < op[0]->type->components(); c++) {
+data.f[c] = op[0]->value.u64[c];
+  }
+  break;
+   case ir_unop_i642d:
+  assert(op[0]->type->base_type == GLSL_TYPE_INT64);
+  for (unsigned c = 0; c < op[0]->type->components(); c++) {
+data.d[c] = op[0]->value.i64[c];
+  }
+  break;
+   case ir_unop_u642d:
+  assert(op[0]->type->base_type == GLSL_TYPE_UINT64);
+  for (unsigned c = 0; c < op[0]->type->components(); c++) {
+data.d[c] = op[0]->value.u64[c];
+  }
+  break;
+   case ir_unop_i2i64:
+  assert(op[0]->type->base_type == GLSL_TYPE_INT);
+  for (unsigned c = 0; c < op[0]->type->components(); c++) {
+data.i64[c] = op[0]->value.i[c];
+  }
+  break;
+   case ir_unop_u2i64:
+  assert(op[0]->type->base_type == GLSL_TYPE_UINT);
+  for (unsigned c = 0; c < op[0]->type->components(); c++) {
+data.i64[c] = op[0]->value.u[c];
+  }
+  break;
+   

[Mesa-dev] [PATCH 21/23] st/mesa: add support for enabling ARB_gpu_shader_int64.

2016-06-08 Thread Dave Airlie
From: Dave Airlie 

Signed-off-by: Dave Airlie 
---
 src/mesa/state_tracker/st_extensions.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/src/mesa/state_tracker/st_extensions.c 
b/src/mesa/state_tracker/st_extensions.c
index 13b0acc..b94a8cb 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -1147,6 +1147,13 @@ void st_init_extensions(struct pipe_screen *screen,
   extensions->ARB_vertex_attrib_64bit = GL_TRUE;
}
 
+   if (screen->get_shader_param(screen, PIPE_SHADER_VERTEX,
+PIPE_SHADER_CAP_INT64) &&
+   screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
+PIPE_SHADER_CAP_INT64)) {
+  extensions->ARB_gpu_shader_int64 = GL_TRUE;
+   }
+
if ((ST_DEBUG & DEBUG_GREMEDY) &&
screen->get_param(screen, PIPE_CAP_STRING_MARKER))
   extensions->GREMEDY_string_marker = GL_TRUE;
-- 
2.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/23] mesa: add ARB_gpu_shader_int64 extension bits

2016-06-08 Thread Dave Airlie
From: Dave Airlie 

This just adds the usual boilerplate in mesa core.

Signed-off-by: Dave Airlie 
---
 src/mesa/main/extensions_table.h | 1 +
 src/mesa/main/mtypes.h   | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
index ea8eb30..d8e7903 100644
--- a/src/mesa/main/extensions_table.h
+++ b/src/mesa/main/extensions_table.h
@@ -71,6 +71,7 @@ EXT(ARB_get_program_binary  , dummy_true
 EXT(ARB_get_texture_sub_image   , dummy_true   
  , GLL, GLC,  x ,  x , 2014)
 EXT(ARB_gpu_shader5 , ARB_gpu_shader5  
  ,  x , GLC,  x ,  x , 2010)
 EXT(ARB_gpu_shader_fp64 , ARB_gpu_shader_fp64  
  ,  x , GLC,  x ,  x , 2010)
+EXT(ARB_gpu_shader_int64, ARB_gpu_shader_int64 
  ,  x , GLC,  x ,  x , 2015)
 EXT(ARB_half_float_pixel, dummy_true   
  , GLL, GLC,  x ,  x , 2003)
 EXT(ARB_half_float_vertex   , ARB_half_float_vertex
  , GLL, GLC,  x ,  x , 2008)
 EXT(ARB_indirect_parameters , ARB_indirect_parameters  
  ,  x , GLC,  x ,  x , 2013)
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 471d41d..85559b2 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -3815,6 +3815,7 @@ struct gl_extensions
GLboolean ARB_explicit_uniform_location;
GLboolean ARB_gpu_shader5;
GLboolean ARB_gpu_shader_fp64;
+   GLboolean ARB_gpu_shader_int64;
GLboolean ARB_half_float_vertex;
GLboolean ARB_indirect_parameters;
GLboolean ARB_instanced_arrays;
-- 
2.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/23] glsl: add basic arb_gpu_shader_int64 types

2016-06-08 Thread Dave Airlie
From: Dave Airlie 

This adds the builtins and the lexer support.

To avoid too many warnings, it adds basic
support to the type in a few other places
in mesa, mostly in the trivial places.

It also adds a query to be used later for
if a type is an integer 32 or 64.

Signed-off-by: Dave Airlie 
---
 src/compiler/builtin_type_macros.h  | 10 ++
 src/compiler/glsl/ast_to_hir.cpp|  2 ++
 src/compiler/glsl/builtin_types.cpp | 12 +++
 src/compiler/glsl/glsl_lexer.ll | 10 ++
 src/compiler/glsl/glsl_parser.yy|  9 +
 src/compiler/glsl/ir_clone.cpp  |  2 ++
 src/compiler/glsl/link_uniform_initializers.cpp |  2 ++
 src/compiler/glsl_types.cpp | 45 +
 src/compiler/glsl_types.h   | 19 +--
 src/mesa/program/ir_to_mesa.cpp | 14 
 src/mesa/state_tracker/st_glsl_types.cpp|  6 
 11 files changed, 129 insertions(+), 2 deletions(-)

diff --git a/src/compiler/builtin_type_macros.h 
b/src/compiler/builtin_type_macros.h
index da3f19e..1986684 100644
--- a/src/compiler/builtin_type_macros.h
+++ b/src/compiler/builtin_type_macros.h
@@ -78,6 +78,16 @@ DECL_TYPE(dmat3x4, GL_DOUBLE_MAT3x4, GLSL_TYPE_DOUBLE, 4, 3)
 DECL_TYPE(dmat4x2, GL_DOUBLE_MAT4x2, GLSL_TYPE_DOUBLE, 2, 4)
 DECL_TYPE(dmat4x3, GL_DOUBLE_MAT4x3, GLSL_TYPE_DOUBLE, 3, 4)
 
+DECL_TYPE(int64_t,  GL_INT64_ARB,  GLSL_TYPE_INT64,   1, 1)
+DECL_TYPE(i64vec2,  GL_INT64_VEC2_ARB, GLSL_TYPE_INT64,   2, 1)
+DECL_TYPE(i64vec3,  GL_INT64_VEC3_ARB, GLSL_TYPE_INT64,   3, 1)
+DECL_TYPE(i64vec4,  GL_INT64_VEC4_ARB, GLSL_TYPE_INT64,   4, 1)
+
+DECL_TYPE(uint64_t, GL_UNSIGNED_INT64_ARB,  GLSL_TYPE_UINT64, 1, 1)
+DECL_TYPE(u64vec2,  GL_UNSIGNED_INT64_VEC2_ARB, GLSL_TYPE_UINT64, 2, 1)
+DECL_TYPE(u64vec3,  GL_UNSIGNED_INT64_VEC3_ARB, GLSL_TYPE_UINT64, 3, 1)
+DECL_TYPE(u64vec4,  GL_UNSIGNED_INT64_VEC4_ARB, GLSL_TYPE_UINT64, 4, 1)
+
 DECL_TYPE(sampler,   GL_SAMPLER_1D,   
GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_1D,   0, 0, GLSL_TYPE_VOID)
 DECL_TYPE(sampler1D, GL_SAMPLER_1D,   
GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_1D,   0, 0, GLSL_TYPE_FLOAT)
 DECL_TYPE(sampler2D, GL_SAMPLER_2D,   
GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_2D,   0, 0, GLSL_TYPE_FLOAT)
diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp
index 1c751f6..b75ddbd 100644
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -1089,6 +1089,8 @@ do_comparison(void *mem_ctx, int operation, ir_rvalue 
*op0, ir_rvalue *op1)
case GLSL_TYPE_INT:
case GLSL_TYPE_BOOL:
case GLSL_TYPE_DOUBLE:
+   case GLSL_TYPE_UINT64:
+   case GLSL_TYPE_INT64:
   return new(mem_ctx) ir_expression(operation, op0, op1);
 
case GLSL_TYPE_ARRAY: {
diff --git a/src/compiler/glsl/builtin_types.cpp 
b/src/compiler/glsl/builtin_types.cpp
index 5f208f8..1a9292a 100644
--- a/src/compiler/glsl/builtin_types.cpp
+++ b/src/compiler/glsl/builtin_types.cpp
@@ -402,5 +402,17 @@ _mesa_glsl_initialize_types(struct _mesa_glsl_parse_state 
*state)
   add_type(symbols, glsl_type::dmat4x2_type);
   add_type(symbols, glsl_type::dmat4x3_type);
}
+
+   if (state->ARB_gpu_shader_int64_enable) {
+  add_type(symbols, glsl_type::int64_t_type);
+  add_type(symbols, glsl_type::i64vec2_type);
+  add_type(symbols, glsl_type::i64vec3_type);
+  add_type(symbols, glsl_type::i64vec4_type);
+
+  add_type(symbols, glsl_type::uint64_t_type);
+  add_type(symbols, glsl_type::u64vec2_type);
+  add_type(symbols, glsl_type::u64vec3_type);
+  add_type(symbols, glsl_type::u64vec4_type);
+   }
 }
 /** @} */
diff --git a/src/compiler/glsl/glsl_lexer.ll b/src/compiler/glsl/glsl_lexer.ll
index 11711ee..9c6d943 100644
--- a/src/compiler/glsl/glsl_lexer.ll
+++ b/src/compiler/glsl/glsl_lexer.ll
@@ -587,6 +587,16 @@ resource   KEYWORD(420, 300, 0, 0, RESOURCE);
 sample KEYWORD_WITH_ALT(400, 300, 400, 320, 
yyextra->ARB_gpu_shader5_enable || 
yyextra->OES_shader_multisample_interpolation_enable, SAMPLE);
 subroutine KEYWORD_WITH_ALT(400, 300, 400, 0, 
yyextra->ARB_shader_subroutine_enable, SUBROUTINE);
 
+/* Additional words for ARB_gpu_shader_int64 */
+int64_tKEYWORD_WITH_ALT(110, 100, 0, 0, 
yyextra->ARB_gpu_shader_int64_enable, INT64);
+i64vec2KEYWORD_WITH_ALT(110, 100, 0, 0, 
yyextra->ARB_gpu_shader_int64_enable, I64VEC2);
+i64vec3KEYWORD_WITH_ALT(110, 100, 0, 0, 
yyextra->ARB_gpu_shader_int64_enable, I64VEC3);
+i64vec4KEYWORD_WITH_ALT(110, 100, 0, 0, 
yyextra->ARB_gpu_shader_int64_enable, I64VEC4);
+
+uint64_t   KEYWORD_WITH_ALT(110, 100, 0, 0, 
yyextra->ARB_gpu_shader_int64_enable, UINT64);
+u64vec2KEYWORD_WITH_ALT(110, 100, 0, 0, 

[Mesa-dev] [PATCH 15/23] glsl/varying_packing: add 64-bit integer support

2016-06-08 Thread Dave Airlie
From: Dave Airlie 

As for the double code, but using the 64-bit integer conversions.

Signed-off-by: Dave Airlie 
---
 src/compiler/glsl/lower_packed_varyings.cpp | 62 +
 1 file changed, 62 insertions(+)

diff --git a/src/compiler/glsl/lower_packed_varyings.cpp 
b/src/compiler/glsl/lower_packed_varyings.cpp
index 130b8f6..18784fd 100644
--- a/src/compiler/glsl/lower_packed_varyings.cpp
+++ b/src/compiler/glsl/lower_packed_varyings.cpp
@@ -345,6 +345,38 @@ 
lower_packed_varyings_visitor::bitwise_assign_pack(ir_rvalue *lhs,
 rhs = u2i(expr(ir_unop_unpack_double_2x32, rhs));
  }
  break;
+  case GLSL_TYPE_INT64:
+ assert(rhs->type->vector_elements <= 2);
+ if (rhs->type->vector_elements == 2) {
+ir_variable *t = new(mem_ctx) ir_variable(lhs->type, "pack", 
ir_var_temporary);
+
+assert(lhs->type->vector_elements == 4);
+this->out_variables->push_tail(t);
+this->out_instructions->push_tail(
+  assign(t, u2i(expr(ir_unop_unpack_int_2x32, 
swizzle_x(rhs->clone(mem_ctx, NULL, 0x3));
+this->out_instructions->push_tail(
+  assign(t,  u2i(expr(ir_unop_unpack_int_2x32, 
swizzle_y(rhs))), 0xc));
+rhs = deref(t).val;
+ } else {
+rhs = u2i(expr(ir_unop_unpack_int_2x32, rhs));
+ }
+ break;
+  case GLSL_TYPE_UINT64:
+ assert(rhs->type->vector_elements <= 2);
+ if (rhs->type->vector_elements == 2) {
+ir_variable *t = new(mem_ctx) ir_variable(lhs->type, "pack", 
ir_var_temporary);
+
+assert(lhs->type->vector_elements == 4);
+this->out_variables->push_tail(t);
+this->out_instructions->push_tail(
+  assign(t, u2i(expr(ir_unop_unpack_uint_2x32, 
swizzle_x(rhs->clone(mem_ctx, NULL, 0x3));
+this->out_instructions->push_tail(
+  assign(t,  u2i(expr(ir_unop_unpack_uint_2x32, 
swizzle_y(rhs))), 0xc));
+rhs = deref(t).val;
+ } else {
+rhs = u2i(expr(ir_unop_unpack_uint_2x32, rhs));
+ }
+ break;
   default:
  assert(!"Unexpected type conversion while lowering varyings");
  break;
@@ -394,6 +426,36 @@ 
lower_packed_varyings_visitor::bitwise_assign_unpack(ir_rvalue *lhs,
 rhs = expr(ir_unop_pack_double_2x32, i2u(rhs));
  }
  break;
+  case GLSL_TYPE_INT64:
+ assert(lhs->type->vector_elements <= 2);
+ if (lhs->type->vector_elements == 2) {
+ir_variable *t = new(mem_ctx) ir_variable(lhs->type, "unpack", 
ir_var_temporary);
+assert(rhs->type->vector_elements == 4);
+this->out_variables->push_tail(t);
+this->out_instructions->push_tail(
+  assign(t, expr(ir_unop_pack_int_2x32, 
i2u(swizzle_xy(rhs->clone(mem_ctx, NULL, 0x1));
+this->out_instructions->push_tail(
+  assign(t, expr(ir_unop_pack_int_2x32, 
i2u(swizzle(rhs->clone(mem_ctx, NULL), SWIZZLE_ZWZW, 2))), 0x2));
+rhs = deref(t).val;
+ } else {
+rhs = expr(ir_unop_pack_int_2x32, i2u(rhs));
+ }
+ break;
+  case GLSL_TYPE_UINT64:
+ assert(lhs->type->vector_elements <= 2);
+ if (lhs->type->vector_elements == 2) {
+ir_variable *t = new(mem_ctx) ir_variable(lhs->type, "unpack", 
ir_var_temporary);
+assert(rhs->type->vector_elements == 4);
+this->out_variables->push_tail(t);
+this->out_instructions->push_tail(
+  assign(t, expr(ir_unop_pack_uint_2x32, 
i2u(swizzle_xy(rhs->clone(mem_ctx, NULL, 0x1));
+this->out_instructions->push_tail(
+  assign(t, expr(ir_unop_pack_uint_2x32, 
i2u(swizzle(rhs->clone(mem_ctx, NULL), SWIZZLE_ZWZW, 2))), 0x2));
+rhs = deref(t).val;
+ } else {
+rhs = expr(ir_unop_pack_uint_2x32, i2u(rhs));
+ }
+ break;
   default:
  assert(!"Unexpected type conversion while lowering varyings");
  break;
-- 
2.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 11/23] glsl/ast: add 64-bit integer support to conversion functions

2016-06-08 Thread Dave Airlie
From: Dave Airlie 

This adds support to call the new operations on conversions.

Signed-off-by: Dave Airlie 
---
 src/compiler/glsl/ast_function.cpp | 75 ++
 src/compiler/glsl/ast_to_hir.cpp   | 20 ++
 2 files changed, 95 insertions(+)

diff --git a/src/compiler/glsl/ast_function.cpp 
b/src/compiler/glsl/ast_function.cpp
index f74394f..399a142 100644
--- a/src/compiler/glsl/ast_function.cpp
+++ b/src/compiler/glsl/ast_function.cpp
@@ -757,6 +757,12 @@ convert_component(ir_rvalue *src, const glsl_type 
*desired_type)
   case GLSL_TYPE_DOUBLE:
 result = new(ctx) ir_expression(ir_unop_d2u, src);
 break;
+  case GLSL_TYPE_UINT64:
+result = new(ctx) ir_expression(ir_unop_u642u, src);
+break;
+  case GLSL_TYPE_INT64:
+result = new(ctx) ir_expression(ir_unop_i642u, src);
+break;
   }
   break;
case GLSL_TYPE_INT:
@@ -773,6 +779,12 @@ convert_component(ir_rvalue *src, const glsl_type 
*desired_type)
   case GLSL_TYPE_DOUBLE:
 result = new(ctx) ir_expression(ir_unop_d2i, src);
 break;
+  case GLSL_TYPE_UINT64:
+result = new(ctx) ir_expression(ir_unop_u642i, src);
+break;
+  case GLSL_TYPE_INT64:
+result = new(ctx) ir_expression(ir_unop_i642i, src);
+break;
   }
   break;
case GLSL_TYPE_FLOAT:
@@ -789,6 +801,12 @@ convert_component(ir_rvalue *src, const glsl_type 
*desired_type)
   case GLSL_TYPE_DOUBLE:
 result = new(ctx) ir_expression(ir_unop_d2f, desired_type, src, NULL);
 break;
+  case GLSL_TYPE_UINT64:
+result = new(ctx) ir_expression(ir_unop_u642f, desired_type, src, 
NULL);
+break;
+  case GLSL_TYPE_INT64:
+result = new(ctx) ir_expression(ir_unop_i642f, desired_type, src, 
NULL);
+break;
   }
   break;
case GLSL_TYPE_BOOL:
@@ -806,6 +824,12 @@ convert_component(ir_rvalue *src, const glsl_type 
*desired_type)
   case GLSL_TYPE_DOUBLE:
  result = new(ctx) ir_expression(ir_unop_d2b, desired_type, src, NULL);
  break;
+  case GLSL_TYPE_UINT64:
+result = new(ctx) ir_expression(ir_unop_u642b, desired_type, src, 
NULL);
+break;
+  case GLSL_TYPE_INT64:
+result = new(ctx) ir_expression(ir_unop_i642b, desired_type, src, 
NULL);
+break;
   }
   break;
case GLSL_TYPE_DOUBLE:
@@ -823,7 +847,58 @@ convert_component(ir_rvalue *src, const glsl_type 
*desired_type)
   case GLSL_TYPE_FLOAT:
  result = new(ctx) ir_expression(ir_unop_f2d, desired_type, src, NULL);
  break;
+  case GLSL_TYPE_UINT64:
+result = new(ctx) ir_expression(ir_unop_u642d, desired_type, src, 
NULL);
+break;
+  case GLSL_TYPE_INT64:
+result = new(ctx) ir_expression(ir_unop_i642d, desired_type, src, 
NULL);
+break;
   }
+  break;
+   case GLSL_TYPE_UINT64:
+  switch (b) {
+  case GLSL_TYPE_INT:
+ result = new(ctx) ir_expression(ir_unop_i2u64, src);
+ break;
+  case GLSL_TYPE_UINT:
+ result = new(ctx) ir_expression(ir_unop_u2u64, src);
+ break;
+  case GLSL_TYPE_BOOL:
+ result = new(ctx) ir_expression(ir_unop_b2u64, src);
+ break;
+  case GLSL_TYPE_FLOAT:
+ result = new(ctx) ir_expression(ir_unop_f2u64, src);
+ break;
+  case GLSL_TYPE_DOUBLE:
+ result = new(ctx) ir_expression(ir_unop_d2u64, src);
+ break;
+  case GLSL_TYPE_INT64:
+ result = new(ctx) ir_expression(ir_unop_i642u64, src);
+ break;
+  }
+  break;
+   case GLSL_TYPE_INT64:
+  switch (b) {
+  case GLSL_TYPE_INT:
+ result = new(ctx) ir_expression(ir_unop_i2i64, src);
+ break;
+  case GLSL_TYPE_UINT:
+ result = new(ctx) ir_expression(ir_unop_u2i64, src);
+ break;
+  case GLSL_TYPE_BOOL:
+ result = new(ctx) ir_expression(ir_unop_b2i64, src);
+ break;
+  case GLSL_TYPE_FLOAT:
+ result = new(ctx) ir_expression(ir_unop_f2i64, src);
+ break;
+  case GLSL_TYPE_DOUBLE:
+ result = new(ctx) ir_expression(ir_unop_d2i64, src);
+ break;
+  case GLSL_TYPE_UINT64:
+ result = new(ctx) ir_expression(ir_unop_u642i64, src);
+ break;
+  }
+  break;
}
 
assert(result != NULL);
diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp
index 0cf0941..abcd491 100644
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -258,8 +258,28 @@ get_implicit_conversion_operation(const glsl_type *to, 
const glsl_type *from,
   case GLSL_TYPE_INT: return ir_unop_i2d;
   case GLSL_TYPE_UINT: return ir_unop_u2d;
   case GLSL_TYPE_FLOAT: return ir_unop_f2d;
+  case GLSL_TYPE_INT64: return ir_unop_i642d;
+  case GLSL_TYPE_UINT64: return ir_unop_u642d;
   

[Mesa-dev] [PATCH 09/23] glsl/ir: add support for 64-bit integer conversions.

2016-06-08 Thread Dave Airlie
From: Dave Airlie 

This adds all the conversions in the world, I'm not 100%
sure of all of these are needed, but add all of them and
we can cut them down later.

Signed-off-by: Dave Airlie 
---
 src/compiler/glsl/ir.cpp  |  82 +
 src/compiler/glsl/ir.h|  38 +++-
 src/compiler/glsl/ir_validate.cpp | 124 ++
 3 files changed, 242 insertions(+), 2 deletions(-)

diff --git a/src/compiler/glsl/ir.cpp b/src/compiler/glsl/ir.cpp
index a7c0eac..47846f7 100644
--- a/src/compiler/glsl/ir.cpp
+++ b/src/compiler/glsl/ir.cpp
@@ -261,6 +261,8 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
case ir_unop_find_msb:
case ir_unop_find_lsb:
case ir_unop_subroutine_to_int:
+   case ir_unop_i642i:
+   case ir_unop_u642i:
   this->type = glsl_type::get_instance(GLSL_TYPE_INT,
   op0->type->vector_elements, 1);
   break;
@@ -271,6 +273,8 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
case ir_unop_d2f:
case ir_unop_bitcast_i2f:
case ir_unop_bitcast_u2f:
+   case ir_unop_i642f:
+   case ir_unop_u642f:
   this->type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
   op0->type->vector_elements, 1);
   break;
@@ -278,6 +282,8 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
case ir_unop_f2b:
case ir_unop_i2b:
case ir_unop_d2b:
+   case ir_unop_i642b:
+   case ir_unop_u642b:
   this->type = glsl_type::get_instance(GLSL_TYPE_BOOL,
   op0->type->vector_elements, 1);
   break;
@@ -285,6 +291,8 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
case ir_unop_f2d:
case ir_unop_i2d:
case ir_unop_u2d:
+   case ir_unop_i642d:
+   case ir_unop_u642d:
   this->type = glsl_type::get_instance(GLSL_TYPE_DOUBLE,
   op0->type->vector_elements, 1);
   break;
@@ -293,15 +301,36 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
case ir_unop_f2u:
case ir_unop_d2u:
case ir_unop_bitcast_f2u:
+   case ir_unop_i642u:
+   case ir_unop_u642u:
   this->type = glsl_type::get_instance(GLSL_TYPE_UINT,
   op0->type->vector_elements, 1);
   break;
 
+   case ir_unop_i2i64:
+   case ir_unop_u2i64:
+   case ir_unop_b2i64:
+   case ir_unop_f2i64:
+   case ir_unop_d2i64:
+  this->type = glsl_type::get_instance(GLSL_TYPE_INT64,
+  op0->type->vector_elements, 1);
+  break;
+
+   case ir_unop_i2u64:
+   case ir_unop_u2u64:
+   case ir_unop_b2u64:
+   case ir_unop_f2u64:
+   case ir_unop_d2u64:
+  this->type = glsl_type::get_instance(GLSL_TYPE_UINT64,
+  op0->type->vector_elements, 1);
+  break;
case ir_unop_noise:
   this->type = glsl_type::float_type;
   break;
 
case ir_unop_unpack_double_2x32:
+   case ir_unop_unpack_int_2x32:
+   case ir_unop_unpack_uint_2x32:
   this->type = glsl_type::uvec2_type;
   break;
 
@@ -317,6 +346,14 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
   this->type = glsl_type::double_type;
   break;
 
+   case ir_unop_pack_int_2x32:
+  this->type = glsl_type::int64_t_type;
+  break;
+
+   case ir_unop_pack_uint_2x32:
+  this->type = glsl_type::uint64_t_type;
+  break;
+
case ir_unop_unpack_snorm_2x16:
case ir_unop_unpack_unorm_2x16:
case ir_unop_unpack_half_2x16:
@@ -347,6 +384,21 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
   this->type = glsl_type::bool_type;
   break;
 
+   case ir_unop_bitcast_i642d:
+   case ir_unop_bitcast_u642d:
+  this->type = glsl_type::get_instance(GLSL_TYPE_DOUBLE,
+   op0->type->vector_elements, 1);
+  break;
+
+   case ir_unop_bitcast_d2i64:
+  this->type = glsl_type::get_instance(GLSL_TYPE_INT64,
+   op0->type->vector_elements, 1);
+  break;
+   case ir_unop_bitcast_d2u64:
+  this->type = glsl_type::get_instance(GLSL_TYPE_UINT64,
+   op0->type->vector_elements, 1);
+  break;
+
default:
   assert(!"not reached: missing automatic type setup for ir_expression");
   this->type = op0->type;
@@ -532,6 +584,32 @@ static const char *const operator_strs[] = {
"bitcast_f2i",
"bitcast_u2f",
"bitcast_f2u",
+   "bitcast_u642d",
+   "bitcast_i642d",
+   "bitcast_d2u64",
+   "bitcast_d2i64",
+   "i642i",
+   "u642i",
+   "i642u",
+   "u642u",
+   "i642b",
+   "u642b",
+   "i642f",
+   "u642f",
+   "i642d",
+   "u642d",
+   "i2i64",
+   "u2i64",
+   "b2i64",
+   "f2i64",
+   "d2i64",
+   "i2u64",
+   "u2u64",
+   "b2u64",
+   "f2u64",
+   "d2u64",
+   "i642u64",
+   "u642i64",
"trunc",
"ceil",
"floor",
@@ -572,6 +650,10 @@ 

[Mesa-dev] [PATCH 03/23] glsl: add arb_gpu_shader_int64 boilerplate.

2016-06-08 Thread Dave Airlie
From: Dave Airlie 

This just adds the basic boilerplate support.

Signed-off-by: Dave Airlie 
---
 src/compiler/glsl/glcpp/glcpp-parse.y| 3 +++
 src/compiler/glsl/glsl_parser_extras.cpp | 1 +
 src/compiler/glsl/glsl_parser_extras.h   | 7 +++
 3 files changed, 11 insertions(+)

diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y 
b/src/compiler/glsl/glcpp/glcpp-parse.y
index 2cfa6a6..5869c3a 100644
--- a/src/compiler/glsl/glcpp/glcpp-parse.y
+++ b/src/compiler/glsl/glcpp/glcpp-parse.y
@@ -2408,6 +2408,9 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t 
*parser, intmax_t versio
  if (extensions->ARB_gpu_shader_fp64)
 add_builtin_define(parser, "GL_ARB_gpu_shader_fp64", 1);
 
+ if (extensions->ARB_gpu_shader_int64)
+add_builtin_define(parser, "GL_ARB_gpu_shader_int64", 1);
+
  if (extensions->ARB_vertex_attrib_64bit)
 add_builtin_define(parser, "GL_ARB_vertex_attrib_64bit", 1);
 
diff --git a/src/compiler/glsl/glsl_parser_extras.cpp 
b/src/compiler/glsl/glsl_parser_extras.cpp
index 09f7477..e2b58a9 100644
--- a/src/compiler/glsl/glsl_parser_extras.cpp
+++ b/src/compiler/glsl/glsl_parser_extras.cpp
@@ -587,6 +587,7 @@ static const _mesa_glsl_extension 
_mesa_glsl_supported_extensions[] = {
EXT(ARB_fragment_layer_viewport,  true,  false, 
ARB_fragment_layer_viewport),
EXT(ARB_gpu_shader5,  true,  false, ARB_gpu_shader5),
EXT(ARB_gpu_shader_fp64,  true,  false, 
ARB_gpu_shader_fp64),
+   EXT(ARB_gpu_shader_int64, true,  false, 
ARB_gpu_shader_int64),
EXT(ARB_sample_shading,   true,  false, ARB_sample_shading),
EXT(ARB_separate_shader_objects,  true,  false, dummy_true),
EXT(ARB_shader_atomic_counter_ops,true,  false, 
ARB_shader_atomic_counter_ops),
diff --git a/src/compiler/glsl/glsl_parser_extras.h 
b/src/compiler/glsl/glsl_parser_extras.h
index 8c43292..b2bb064 100644
--- a/src/compiler/glsl/glsl_parser_extras.h
+++ b/src/compiler/glsl/glsl_parser_extras.h
@@ -250,6 +250,11 @@ struct _mesa_glsl_parse_state {
   return ARB_gpu_shader_fp64_enable || is_version(400, 0);
}
 
+   bool has_int64() const
+   {
+  return ARB_gpu_shader_int64_enable;
+   }
+
bool has_420pack() const
{
   return ARB_shading_language_420pack_enable || is_version(420, 0);
@@ -561,6 +566,8 @@ struct _mesa_glsl_parse_state {
bool ARB_gpu_shader5_warn;
bool ARB_gpu_shader_fp64_enable;
bool ARB_gpu_shader_fp64_warn;
+   bool ARB_gpu_shader_int64_enable;
+   bool ARB_gpu_shader_int64_warn;
bool ARB_sample_shading_enable;
bool ARB_sample_shading_warn;
bool ARB_separate_shader_objects_enable;
-- 
2.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/23] mapi: add support for ARB_gpu_shader_int64.

2016-06-08 Thread Dave Airlie
From: Dave Airlie 

Just add the boilerplate xml code.

Signed-off-by: Dave Airlie 
---
 src/mapi/glapi/gen/ARB_gpu_shader_int64.xml | 262 
 src/mapi/glapi/gen/Makefile.am  |   1 +
 src/mapi/glapi/gen/gl_API.xml   |   2 +
 src/mesa/main/uniforms.c| 182 +++
 src/mesa/main/uniforms.h|  82 +
 5 files changed, 529 insertions(+)
 create mode 100644 src/mapi/glapi/gen/ARB_gpu_shader_int64.xml

diff --git a/src/mapi/glapi/gen/ARB_gpu_shader_int64.xml 
b/src/mapi/glapi/gen/ARB_gpu_shader_int64.xml
new file mode 100644
index 000..d0e4c54
--- /dev/null
+++ b/src/mapi/glapi/gen/ARB_gpu_shader_int64.xml
@@ -0,0 +1,262 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+   
+
+
+
+
+
+
+   
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+   
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/mapi/glapi/gen/Makefile.am b/src/mapi/glapi/gen/Makefile.am
index c511de9..712cda8 100644
--- a/src/mapi/glapi/gen/Makefile.am
+++ b/src/mapi/glapi/gen/Makefile.am
@@ -135,6 +135,7 @@ API_XML = \
ARB_get_program_binary.xml \
ARB_get_texture_sub_image.xml \
ARB_gpu_shader_fp64.xml \
+   ARB_gpu_shader_int64.xml \
ARB_gpu_shader5.xml \
ARB_indirect_parameters.xml \
ARB_instanced_arrays.xml \
diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml
index 766fe3c..8e1f3a6 100644
--- a/src/mapi/glapi/gen/gl_API.xml
+++ b/src/mapi/glapi/gen/gl_API.xml
@@ -8294,6 +8294,8 @@
 
 http://www.w3.org/2001/XInclude"/>
 
+http://www.w3.org/2001/XInclude"/>
+
 
 
 
diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c
index 35b93d3..13577ae 100644
--- a/src/mesa/main/uniforms.c
+++ b/src/mesa/main/uniforms.c
@@ -897,6 +897,27 @@ _mesa_GetUniformdv(GLuint program, GLint location, 
GLdouble *params)
_mesa_GetnUniformdvARB(program, location, INT_MAX, params);
 }
 
+void GLAPIENTRY
+_mesa_GetnUniformi64vARB(GLuint program, GLint location,
+ GLsizei bufSize, GLint64 *params)
+{
+
+}
+void GLAPIENTRY
+_mesa_GetUniformi64vARB(GLuint program, GLint location, GLint64 *params)
+{
+}
+
+void GLAPIENTRY
+_mesa_GetnUniformui64vARB(GLuint program, GLint location,
+ GLsizei bufSize, GLuint64 *params)
+{
+}
+void GLAPIENTRY
+_mesa_GetUniformui64vARB(GLuint program, GLint location, GLuint64 *params)
+{
+}
+
 
 GLint GLAPIENTRY
 _mesa_GetUniformLocation(GLuint programObj, const GLcharARB *name)
@@ -1610,3 +1631,164 @@ _mesa_ProgramUniformMatrix4x3dv(GLuint program, GLint 
location, GLsizei count,
 "glProgramUniformMatrix4x3dv");
_mesa_uniform_matrix(ctx, shProg, 4, 3, location, count, transpose, value, 
GLSL_TYPE_DOUBLE);
 }
+
+void GLAPIENTRY
+_mesa_Uniform1i64ARB(GLint location, GLint64 v0)
+{
+}
+
+void GLAPIENTRY
+_mesa_Uniform2i64ARB(GLint location, GLint64 v0, GLint64 v1)
+{
+}
+
+void GLAPIENTRY
+_mesa_Uniform3i64ARB(GLint location, GLint64 v0, GLint64 v1, GLint64 v2)
+{
+}
+
+void GLAPIENTRY
+_mesa_Uniform4i64ARB(GLint location,  GLint64 v0, GLint64 v1, GLint64 v2, 
GLint64 v3)
+{
+}
+
+void GLAPIENTRY
+_mesa_Uniform1i64vARB(GLint 

[Mesa-dev] [rfc] ARB_gpu_shader_int64 support (softpipe/llvmpipe)

2016-06-08 Thread Dave Airlie
I started this quite a while back, and decided to spend some
time on it again, since it's the basis for a bunch of the newer
GL extensions.

So this is my first pass at it, it's fairly complete, but it
definitely is missing some pieces before it's finished.

The patchset add core mesa, gallium, softpipe and llvmpipe support.
I've got a few tests in piglit using a generator for all the built-ins
and operations that all pass.

So the big thing here was OMG the conversions, and is also
the biggest missing thing in the gallium code, I haven't plumbed
through all the conversions into opcodes yet, I'm not really sure
if we need all of them, but it'll be a quite large opcode set,
and I wasn't feeling brave enough yet.

The other things is I've implement MOD/DIV opcodes that may or
may not be useful on real hardware, if gallium drivers are okay
with lowering this in the backends then I'm fine with how it is.
(and gallium docs do need updating).

Dave.



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] isl: Replace bash generator with python generator

2016-06-08 Thread Dylan Baker
[snip]

Quoting Jason Ekstrand (2016-06-08 16:20:33)
> gen_format_layout.c.mako
> 
> I made this comment in the office today but I think the mako here is simple
> enough that we might be better off just putting it all in one file.
> 

I know you're not a fan of separate templates, and if the consensus is
to put the template in the python file I'll do that, but my experience
working with mako makes me wary of doing that for templates over a few
lines. I think they're very hard to read (remember that the copyright
header is part of the template), and you don't get syntax highlighting
which makes them much harder to work with.

[snip]

> > +def reader():
> > +    """Wrapper around csv.reader that skips comments and blanks."""
> > +    # csv.reader actually reads the file one line at a time (it was 
> > designed
> to
> > +    # open excel generated sheets), so hold the file until all of the lines
> are
> > +    # read.
> > +    with open('isl_format_layout.csv', 'r') as f:
> 
> I'm not so sure this works.  It's probably better to pass the file name in.  
> Maybe this is safe but I'm skeptical.

I don't know, I can change it, it just seemed kinda silly to add command
parsing to a generator that reads exactly one file.

Maybe someone with more autotools experience could say whether this is a
good idea or not?

> 
> Other than those two comments, this seems perfectly reasonable.
> 

Okay, I'll wait for some more feedback and send out a v2.

[snip]

Dylan


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeonsi: Reinitialize all descriptors in CE preamble.

2016-06-08 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Mon, Jun 6, 2016 at 10:49 PM, Bas Nieuwenhuizen
 wrote:
> This fixes a problem with the CE preamble and restoring only stuff in the
> preamble when needed.
>
> To illustrate suppose we have two graphics IB's 1 and 2, which  are submitted 
> in
> that order. Furthermore suppose IB 1 does not use CE ram, but IB 2 does, and 
> we
> have a context switch at the start of IB 1, but not between IB 1 and IB 2.
>
> The old code put the CE RAM loads in the preamble of IB 2. As the preamble of
> IB 1 does not have the loads and the preamble of IB 2 does not get executed, 
> the
> old values are not load into CE RAM.
>
> Fix this by always restoring the entire CE RAM.
>
> v2: - Just load all descriptor set buffers instead of load and store the 
> entire
>   CE RAM.
> - Leave the ce_ram_dirty tracking in place for the non-preamble case.
>
> Signed-off-by: Bas Nieuwenhuizen 
> Cc: "12.0" 
> ---
>
> Replaces "radeonsi: Save and restore entire CE RAM."
>
>  src/gallium/drivers/radeonsi/si_descriptors.c | 15 +--
>  src/gallium/drivers/radeonsi/si_hw_context.c  |  3 +++
>  src/gallium/drivers/radeonsi/si_state.h   |  1 +
>  3 files changed, 17 insertions(+), 2 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
> b/src/gallium/drivers/radeonsi/si_descriptors.c
> index baddc5f..b32032e 100644
> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
> @@ -160,7 +160,7 @@ static bool si_ce_upload(struct si_context *sctx, 
> unsigned ce_offset, unsigned s
> return true;
>  }
>
> -static void si_reinitialize_ce_ram(struct si_context *sctx,
> +static void si_ce_reinitialize_descriptors(struct si_context *sctx,
>  struct si_descriptors *desc)
>  {
> if (desc->buffer) {
> @@ -186,6 +186,17 @@ static void si_reinitialize_ce_ram(struct si_context 
> *sctx,
> desc->ce_ram_dirty = false;
>  }
>
> +void si_ce_reinitialize_all_descriptors(struct si_context *sctx)
> +{
> +   for (int i = 0; i < SI_NUM_SHADERS; i++) {
> +si_ce_reinitialize_descriptors(sctx, 
> >const_buffers[i].desc);
> +si_ce_reinitialize_descriptors(sctx, 
> >shader_buffers[i].desc);
> +si_ce_reinitialize_descriptors(sctx, 
> >samplers[i].views.desc);
> +si_ce_reinitialize_descriptors(sctx, >images[i].desc);
> +   }
> +si_ce_reinitialize_descriptors(sctx, >rw_buffers.desc);
> +}
> +
>  void si_ce_enable_loads(struct radeon_winsys_cs *ib)
>  {
> radeon_emit(ib, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
> @@ -207,7 +218,7 @@ static bool si_upload_descriptors(struct si_context *sctx,
> uint32_t const* list = (uint32_t const*)desc->list;
>
> if (desc->ce_ram_dirty)
> -   si_reinitialize_ce_ram(sctx, desc);
> +   si_ce_reinitialize_descriptors(sctx, desc);
>
> while(desc->dirty_mask) {
> int begin, count;
> diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c 
> b/src/gallium/drivers/radeonsi/si_hw_context.c
> index fa6a2cb..d1b9851 100644
> --- a/src/gallium/drivers/radeonsi/si_hw_context.c
> +++ b/src/gallium/drivers/radeonsi/si_hw_context.c
> @@ -213,6 +213,9 @@ void si_begin_new_cs(struct si_context *ctx)
> else if (ctx->ce_ib)
> si_ce_enable_loads(ctx->ce_ib);
>
> +   if (ctx->ce_preamble_ib)
> +   si_ce_reinitialize_all_descriptors(ctx);
> +
> ctx->framebuffer.dirty_cbufs = (1 << 8) - 1;
> ctx->framebuffer.dirty_zsbuf = true;
> si_mark_atom_dirty(ctx, >framebuffer.atom);
> diff --git a/src/gallium/drivers/radeonsi/si_state.h 
> b/src/gallium/drivers/radeonsi/si_state.h
> index e5795eb..c1c3ca5 100644
> --- a/src/gallium/drivers/radeonsi/si_state.h
> +++ b/src/gallium/drivers/radeonsi/si_state.h
> @@ -250,6 +250,7 @@ struct si_buffer_resources {
> } while(0)
>
>  /* si_descriptors.c */
> +void si_ce_reinitialize_all_descriptors(struct si_context *sctx);
>  void si_ce_enable_loads(struct radeon_winsys_cs *ib);
>  void si_set_mutable_tex_desc_fields(struct r600_texture *tex,
> const struct radeon_surf_level 
> *base_level_info,
> --
> 2.8.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] isl: Replace bash generator with python generator

2016-06-08 Thread Jason Ekstrand
On Jun 7, 2016 5:32 PM, "Dylan Baker"  wrote:
>
> This replaces the current bash generator with a python based generator
> using mako. It's quite fast and works with both python 2.7 and python
> 3.5, and should work with 3.3+ and maybe even 3.2.
>
> It produces an almost identical file except for a minor layout changes,
> and the addition of a "generated file, do not edit" warning.
>
> Signed-off-by: Dylan Baker 
> ---
>  src/intel/isl/Makefile.am|   7 +-
>  src/intel/isl/gen_format_layout.c.mako   |  77 +
>  src/intel/isl/gen_format_layout.py   | 143
+++
>  src/intel/isl/isl_format_layout_gen.bash | 129

>  4 files changed, 224 insertions(+), 132 deletions(-)
>  create mode 100644 src/intel/isl/gen_format_layout.c.mako
>  create mode 100644 src/intel/isl/gen_format_layout.py
>  delete mode 100755 src/intel/isl/isl_format_layout_gen.bash
>
> diff --git a/src/intel/isl/Makefile.am b/src/intel/isl/Makefile.am
> index 74f863a..ee75bbd 100644
> --- a/src/intel/isl/Makefile.am
> +++ b/src/intel/isl/Makefile.am
> @@ -66,10 +66,11 @@ libisl_gen9_la_CFLAGS = $(libisl_la_CFLAGS)
-DGEN_VERSIONx10=90
>
>  BUILT_SOURCES = $(ISL_GENERATED_FILES)
>
> -isl_format_layout.c: isl_format_layout_gen.bash \
> +isl_format_layout.c: gen_format_layout.py \
> + gen_format_layout.c.mako \
>   isl_format_layout.csv
> -   $(AM_V_GEN)$(srcdir)/isl_format_layout_gen.bash \
> -   <$(srcdir)/isl_format_layout.csv >$@
> +   $(PYTHON_GEN) $(AM_V_GEN)$(srcdir)/gen_format_layout.py \
> +   <$(srcdir)/isl_format_layout.csv
>
>  #

>  #  Tests
> diff --git a/src/intel/isl/gen_format_layout.c.mako
b/src/intel/isl/gen_format_layout.c.mako

I made this comment in the office today but I think the mako here is simple
enough that we might be better off just putting it all in one file.

> new file mode 100644
> index 000..241a923
> --- /dev/null
> +++ b/src/intel/isl/gen_format_layout.c.mako
> @@ -0,0 +1,77 @@
> +## encoding=utf-8
> +## Copyright © 2016 Intel Corporation
> +##
> +## Permission is hereby granted, free of charge, to any person obtaining
a copy
> +## of this software and associated documentation files (the "Software"),
to deal
> +## in the Software without restriction, including without limitation the
rights
> +## to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell
> +## copies of the Software, and to permit persons to whom the Software is
> +## furnished to do so, subject to the following conditions:
> +##
> +## The above copyright notice and this permission notice shall be
included in
> +## all copies or substantial portions of the Software.
> +##
> +## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR
> +## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY,
> +## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE
> +## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> +## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM,
> +## OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE
> +## SOFTWARE.
> +##
> +## This is the mako template companion to gen_format_layout.py
> +##
> +/* This file is autogenerated by gen_format_layout.{c.mako,py}. DO NOT
EDIT! */
> +
> +/*
> + * Copyright 2015 Intel Corporation
> + *
> + *  Permission is hereby granted, free of charge, to any person
obtaining a
> + *  copy of this software and associated documentation files (the
"Software"),
> + *  to deal in the Software without restriction, including without
limitation
> + *  the rights to use, copy, modify, merge, publish, distribute,
sublicense,
> + *  and/or sell copies of the Software, and to permit persons to whom the
> + *  Software is furnished to do so, subject to the following conditions:
> + *
> + *  The above copyright notice and this permission notice (including the
next
> + *  paragraph) shall be included in all copies or substantial portions
of the
> + *  Software.
> + *
> + *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR
> + *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY,
> + *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
SHALL
> + *  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER
> + *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING
> + *  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS
> + *  IN THE SOFTWARE.
> + */
> +
> +#include "isl.h"
> +
> +const struct isl_format_layout
> +isl_format_layouts[] = {
> +% for format in formats:
> +  [ISL_FORMAT_${format.name}] = {
> +.format = ISL_FORMAT_${format.name},
> +.name = 

Re: [Mesa-dev] [PATCH 2/2] clover: fix getting struct args api size

2016-06-08 Thread Serge Martin
On Thursday 09 June 2016 00:47:27 Serge Martin wrote:
> This fix getting the size of a struct arg. vec3 types still work ok.
> Only buit-in args need to have power of two alignment, getTypeAllocSize
> reports the correct size.

M. I forgot to put V2 and to fix the commit message
s/getTypeAllocSize reports the correct size//

> ---
>  src/gallium/state_trackers/clover/llvm/invocation.cpp | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp
> b/src/gallium/state_trackers/clover/llvm/invocation.cpp index
> 03487d6..9af51539 100644
> --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
> +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
> @@ -472,7 +472,8 @@ namespace {
>   // aligned to the next larger power of two".  We need this
>   // alignment for three element vectors, which have
>   // non-power-of-2 store size.
> - const unsigned arg_api_size =
> util_next_power_of_two(arg_store_size); + const unsigned
> arg_api_size = arg_type->isStructTy() ?
> +   arg_store_size : util_next_power_of_two(arg_store_size);
> 
>   llvm::Type *target_type = arg_type->isIntegerTy() ?
> TD.getSmallestLegalIntType(mod->getContext(), arg_store_size
> * 8)

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] clover: fix getting struct args api size

2016-06-08 Thread Serge Martin
This fix getting the size of a struct arg. vec3 types still work ok.
Only buit-in args need to have power of two alignment, getTypeAllocSize
reports the correct size.
---
 src/gallium/state_trackers/clover/llvm/invocation.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp 
b/src/gallium/state_trackers/clover/llvm/invocation.cpp
index 03487d6..9af51539 100644
--- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
+++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
@@ -472,7 +472,8 @@ namespace {
  // aligned to the next larger power of two".  We need this
  // alignment for three element vectors, which have
  // non-power-of-2 store size.
- const unsigned arg_api_size = util_next_power_of_two(arg_store_size);
+ const unsigned arg_api_size = arg_type->isStructTy() ?
+   arg_store_size : util_next_power_of_two(arg_store_size);
 
  llvm::Type *target_type = arg_type->isIntegerTy() ?
TD.getSmallestLegalIntType(mod->getContext(), arg_store_size * 
8)
-- 
2.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: Add a helper function for shared code in get_tex_rgba_{un}compressed

2016-06-08 Thread Anuj Phogat
On Wed, Jun 8, 2016 at 3:34 PM, Brian Paul  wrote:
> On 06/08/2016 12:15 PM, Anuj Phogat wrote:
>>
>> On Mon, Dec 28, 2015 at 10:46 AM, Anuj Phogat 
>> wrote:
>>>
>>> Signed-off-by: Anuj Phogat 
>>> ---
>>>   src/mesa/main/texgetimage.c | 83
>>> +
>>>   1 file changed, 38 insertions(+), 45 deletions(-)
>>>
>>> diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c
>>> index b273aaa..4399803 100644
>>> --- a/src/mesa/main/texgetimage.c
>>> +++ b/src/mesa/main/texgetimage.c
>>> @@ -265,6 +265,40 @@ get_tex_ycbcr(struct gl_context *ctx, GLuint
>>> dimensions,
>>>  }
>>>   }
>>>
>>> +/* Depending on the base format involved we may need to apply a rebase
>>> + * transform (for example: if we download to a Luminance format we want
>>> + * G=0 and B=0).
>>> + */
>>> +
>
>
> How about a doxygen-style comment:
>
> /**
>  * Depending...
>  */
>
> And remove the empty line between the comment and the function.
>
>>> +static bool
>>> +teximage_needs_rebase(mesa_format texFormat, GLenum baseFormat,
>>> +  bool is_compressed, uint8_t *rebaseSwizzle)
>>> +{
>>> +   bool needsRebase = false;
>>> +
>>> +   if (baseFormat == GL_LUMINANCE ||
>>> +   baseFormat == GL_INTENSITY) {
>>> +  needsRebase = true;
>>> +  rebaseSwizzle[0] = MESA_FORMAT_SWIZZLE_X;
>>> +  rebaseSwizzle[1] = MESA_FORMAT_SWIZZLE_ZERO;
>>> +  rebaseSwizzle[2] = MESA_FORMAT_SWIZZLE_ZERO;
>>> +  rebaseSwizzle[3] = MESA_FORMAT_SWIZZLE_ONE;
>>> +   } else if (baseFormat == GL_LUMINANCE_ALPHA) {
>>> +  needsRebase = true;
>>> +  rebaseSwizzle[0] = MESA_FORMAT_SWIZZLE_X;
>>> +  rebaseSwizzle[1] = MESA_FORMAT_SWIZZLE_ZERO;
>>> +  rebaseSwizzle[2] = MESA_FORMAT_SWIZZLE_ZERO;
>>> +  rebaseSwizzle[3] = MESA_FORMAT_SWIZZLE_W;
>>> +   } else if (!is_compressed && (baseFormat !=
>>> +   _mesa_get_format_base_format(texFormat))) {
>
>
> I think I'd break the line after the && instead of across the !=
>
>
>
>>> +  needsRebase =
>>> + _mesa_compute_rgba2base2rgba_component_mapping(baseFormat,
>>> +rebaseSwizzle);
>>> +   }
>>> +
>>> +   return needsRebase;
>>> +}
>>> +
>>>
>>>   /**
>>>* Get a color texture image with decompression.
>>> @@ -319,26 +353,8 @@ get_tex_rgba_compressed(struct gl_context *ctx,
>>> GLuint dimensions,
>>> }
>>>  }
>>>
>>> -   /* Depending on the base format involved we may need to apply a
>>> rebase
>>> -* transform (for example: if we download to a Luminance format we
>>> want
>>> -* G=0 and B=0).
>>> -*/
>>> -   if (baseFormat == GL_LUMINANCE ||
>>> -   baseFormat == GL_INTENSITY) {
>>> -  needsRebase = true;
>>> -  rebaseSwizzle[0] = MESA_FORMAT_SWIZZLE_X;
>>> -  rebaseSwizzle[1] = MESA_FORMAT_SWIZZLE_ZERO;
>>> -  rebaseSwizzle[2] = MESA_FORMAT_SWIZZLE_ZERO;
>>> -  rebaseSwizzle[3] = MESA_FORMAT_SWIZZLE_ONE;
>>> -   } else if (baseFormat == GL_LUMINANCE_ALPHA) {
>>> -  needsRebase = true;
>>> -  rebaseSwizzle[0] = MESA_FORMAT_SWIZZLE_X;
>>> -  rebaseSwizzle[1] = MESA_FORMAT_SWIZZLE_ZERO;
>>> -  rebaseSwizzle[2] = MESA_FORMAT_SWIZZLE_ZERO;
>>> -  rebaseSwizzle[3] = MESA_FORMAT_SWIZZLE_W;
>>> -   } else {
>>> -  needsRebase = false;
>>> -   }
>>> +   needsRebase = teximage_needs_rebase(texFormat, baseFormat, true,
>>> +   rebaseSwizzle);
>>>
>>>  srcStride = 4 * width * sizeof(GLfloat);
>>>  dstStride = _mesa_image_row_stride(>Pack, width, format, type);
>>> @@ -423,31 +439,8 @@ get_tex_rgba_uncompressed(struct gl_context *ctx,
>>> GLuint dimensions,
>>>  bool needsRebase;
>>>  void *rgba = NULL;
>>>
>>> -   /* Depending on the base format involved we may need to apply a
>>> rebase
>>> -* transform (for example: if we download to a Luminance format we
>>> want
>>> -* G=0 and B=0).
>>> -*/
>>> -   if (texImage->_BaseFormat == GL_LUMINANCE ||
>>> -   texImage->_BaseFormat == GL_INTENSITY) {
>>> -  needsRebase = true;
>>> -  rebaseSwizzle[0] = MESA_FORMAT_SWIZZLE_X;
>>> -  rebaseSwizzle[1] = MESA_FORMAT_SWIZZLE_ZERO;
>>> -  rebaseSwizzle[2] = MESA_FORMAT_SWIZZLE_ZERO;
>>> -  rebaseSwizzle[3] = MESA_FORMAT_SWIZZLE_ONE;
>>> -   } else if (texImage->_BaseFormat == GL_LUMINANCE_ALPHA) {
>>> -  needsRebase = true;
>>> -  rebaseSwizzle[0] = MESA_FORMAT_SWIZZLE_X;
>>> -  rebaseSwizzle[1] = MESA_FORMAT_SWIZZLE_ZERO;
>>> -  rebaseSwizzle[2] = MESA_FORMAT_SWIZZLE_ZERO;
>>> -  rebaseSwizzle[3] = MESA_FORMAT_SWIZZLE_W;
>>> -} else if (texImage->_BaseFormat !=
>>> -   _mesa_get_format_base_format(texFormat)) {
>>> -  needsRebase =
>>> -
>>> _mesa_compute_rgba2base2rgba_component_mapping(texImage->_BaseFormat,
>>> -rebaseSwizzle);
>>> -   

Re: [Mesa-dev] [PATCH] mesa: Add a helper function for shared code in get_tex_rgba_{un}compressed

2016-06-08 Thread Brian Paul

On 06/08/2016 12:15 PM, Anuj Phogat wrote:

On Mon, Dec 28, 2015 at 10:46 AM, Anuj Phogat  wrote:

Signed-off-by: Anuj Phogat 
---
  src/mesa/main/texgetimage.c | 83 +
  1 file changed, 38 insertions(+), 45 deletions(-)

diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c
index b273aaa..4399803 100644
--- a/src/mesa/main/texgetimage.c
+++ b/src/mesa/main/texgetimage.c
@@ -265,6 +265,40 @@ get_tex_ycbcr(struct gl_context *ctx, GLuint dimensions,
 }
  }

+/* Depending on the base format involved we may need to apply a rebase
+ * transform (for example: if we download to a Luminance format we want
+ * G=0 and B=0).
+ */
+


How about a doxygen-style comment:

/**
 * Depending...
 */

And remove the empty line between the comment and the function.


+static bool
+teximage_needs_rebase(mesa_format texFormat, GLenum baseFormat,
+  bool is_compressed, uint8_t *rebaseSwizzle)
+{
+   bool needsRebase = false;
+
+   if (baseFormat == GL_LUMINANCE ||
+   baseFormat == GL_INTENSITY) {
+  needsRebase = true;
+  rebaseSwizzle[0] = MESA_FORMAT_SWIZZLE_X;
+  rebaseSwizzle[1] = MESA_FORMAT_SWIZZLE_ZERO;
+  rebaseSwizzle[2] = MESA_FORMAT_SWIZZLE_ZERO;
+  rebaseSwizzle[3] = MESA_FORMAT_SWIZZLE_ONE;
+   } else if (baseFormat == GL_LUMINANCE_ALPHA) {
+  needsRebase = true;
+  rebaseSwizzle[0] = MESA_FORMAT_SWIZZLE_X;
+  rebaseSwizzle[1] = MESA_FORMAT_SWIZZLE_ZERO;
+  rebaseSwizzle[2] = MESA_FORMAT_SWIZZLE_ZERO;
+  rebaseSwizzle[3] = MESA_FORMAT_SWIZZLE_W;
+   } else if (!is_compressed && (baseFormat !=
+   _mesa_get_format_base_format(texFormat))) {


I think I'd break the line after the && instead of across the !=



+  needsRebase =
+ _mesa_compute_rgba2base2rgba_component_mapping(baseFormat,
+rebaseSwizzle);
+   }
+
+   return needsRebase;
+}
+

  /**
   * Get a color texture image with decompression.
@@ -319,26 +353,8 @@ get_tex_rgba_compressed(struct gl_context *ctx, GLuint 
dimensions,
}
 }

-   /* Depending on the base format involved we may need to apply a rebase
-* transform (for example: if we download to a Luminance format we want
-* G=0 and B=0).
-*/
-   if (baseFormat == GL_LUMINANCE ||
-   baseFormat == GL_INTENSITY) {
-  needsRebase = true;
-  rebaseSwizzle[0] = MESA_FORMAT_SWIZZLE_X;
-  rebaseSwizzle[1] = MESA_FORMAT_SWIZZLE_ZERO;
-  rebaseSwizzle[2] = MESA_FORMAT_SWIZZLE_ZERO;
-  rebaseSwizzle[3] = MESA_FORMAT_SWIZZLE_ONE;
-   } else if (baseFormat == GL_LUMINANCE_ALPHA) {
-  needsRebase = true;
-  rebaseSwizzle[0] = MESA_FORMAT_SWIZZLE_X;
-  rebaseSwizzle[1] = MESA_FORMAT_SWIZZLE_ZERO;
-  rebaseSwizzle[2] = MESA_FORMAT_SWIZZLE_ZERO;
-  rebaseSwizzle[3] = MESA_FORMAT_SWIZZLE_W;
-   } else {
-  needsRebase = false;
-   }
+   needsRebase = teximage_needs_rebase(texFormat, baseFormat, true,
+   rebaseSwizzle);

 srcStride = 4 * width * sizeof(GLfloat);
 dstStride = _mesa_image_row_stride(>Pack, width, format, type);
@@ -423,31 +439,8 @@ get_tex_rgba_uncompressed(struct gl_context *ctx, GLuint 
dimensions,
 bool needsRebase;
 void *rgba = NULL;

-   /* Depending on the base format involved we may need to apply a rebase
-* transform (for example: if we download to a Luminance format we want
-* G=0 and B=0).
-*/
-   if (texImage->_BaseFormat == GL_LUMINANCE ||
-   texImage->_BaseFormat == GL_INTENSITY) {
-  needsRebase = true;
-  rebaseSwizzle[0] = MESA_FORMAT_SWIZZLE_X;
-  rebaseSwizzle[1] = MESA_FORMAT_SWIZZLE_ZERO;
-  rebaseSwizzle[2] = MESA_FORMAT_SWIZZLE_ZERO;
-  rebaseSwizzle[3] = MESA_FORMAT_SWIZZLE_ONE;
-   } else if (texImage->_BaseFormat == GL_LUMINANCE_ALPHA) {
-  needsRebase = true;
-  rebaseSwizzle[0] = MESA_FORMAT_SWIZZLE_X;
-  rebaseSwizzle[1] = MESA_FORMAT_SWIZZLE_ZERO;
-  rebaseSwizzle[2] = MESA_FORMAT_SWIZZLE_ZERO;
-  rebaseSwizzle[3] = MESA_FORMAT_SWIZZLE_W;
-} else if (texImage->_BaseFormat !=
-   _mesa_get_format_base_format(texFormat)) {
-  needsRebase =
- _mesa_compute_rgba2base2rgba_component_mapping(texImage->_BaseFormat,
-rebaseSwizzle);
-} else {
-  needsRebase = false;
-}
+   needsRebase = teximage_needs_rebase(texFormat, texImage->_BaseFormat, false,
+   rebaseSwizzle);

 /* Describe the dst format */
 dst_is_integer = _mesa_is_enum_format_integer(format);
--
2.5.0



Anyone want to review this old patch? It still applies cleanly on master.


Reviewed-by: Brian Paul 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org

Re: [Mesa-dev] [PATCH] glsl/types: rename is_dual_slot_double to is_dual_slot_64bit.

2016-06-08 Thread Ilia Mirkin
On Wed, Jun 8, 2016 at 5:51 PM, Dave Airlie  wrote:
> From: Dave Airlie 
>
> In the future int64 support will have the same requirements.
>
> Signed-off-by: Dave Airlie 
> ---
>  src/compiler/glsl/ir_set_program_inouts.cpp| 4 ++--
>  src/compiler/glsl/linker.cpp   | 4 ++--
>  src/compiler/glsl_types.h  | 4 ++--
>  src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 4 ++--
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +-
>  5 files changed, 9 insertions(+), 9 deletions(-)
>
> diff --git a/src/compiler/glsl/ir_set_program_inouts.cpp 
> b/src/compiler/glsl/ir_set_program_inouts.cpp
> index 183b13b..2300da1 100644
> --- a/src/compiler/glsl/ir_set_program_inouts.cpp
> +++ b/src/compiler/glsl/ir_set_program_inouts.cpp
> @@ -119,7 +119,7 @@ mark(struct gl_program *prog, ir_variable *var, int 
> offset, int len,
>
>   /* double inputs read is only for vertex inputs */
>   if (stage == MESA_SHADER_VERTEX &&
> - var->type->without_array()->is_dual_slot_double())
> + var->type->without_array()->is_dual_slot_64bit())
>  prog->DoubleInputsRead |= bitfield;
>
>   if (stage == MESA_SHADER_FRAGMENT) {
> @@ -306,7 +306,7 @@ 
> ir_set_program_inouts_visitor::try_mark_partial_variable(ir_variable *var,
> /* double element width for double types that takes two slots */
> if (this->shader_stage != MESA_SHADER_VERTEX ||
> var->data.mode != ir_var_shader_in) {
> -  if (type->without_array()->is_dual_slot_double())
> +  if (type->without_array()->is_dual_slot_64bit())
>  elem_width *= 2;
> }
>
> diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
> index 9e65590..b82446d 100644
> --- a/src/compiler/glsl/linker.cpp
> +++ b/src/compiler/glsl/linker.cpp
> @@ -2863,7 +2863,7 @@ assign_attribute_or_color_locations(gl_shader_program 
> *prog,
>   * issue (3) of the GL_ARB_vertex_attrib_64bit behavior, this
>   * is optional behavior, but it seems preferable.
>   */
> -if (var->type->without_array()->is_dual_slot_double())
> +if (var->type->without_array()->is_dual_slot_64bit())
> double_storage_locations |= (use_mask << attr);
>  }
>
> @@ -2940,7 +2940,7 @@ assign_attribute_or_color_locations(gl_shader_program 
> *prog,
>to_assign[i].var->data.is_unmatched_generic_inout = 0;
>used_locations |= (use_mask << location);
>
> -  if (to_assign[i].var->type->without_array()->is_dual_slot_double())
> +  if (to_assign[i].var->type->without_array()->is_dual_slot_64bit())
>   double_storage_locations |= (use_mask << location);
> }
>
> diff --git a/src/compiler/glsl_types.h b/src/compiler/glsl_types.h
> index 2102132..b16ae67 100644
> --- a/src/compiler/glsl_types.h
> +++ b/src/compiler/glsl_types.h
> @@ -497,9 +497,9 @@ struct glsl_type {
> /**
>  * Query whether a double takes two slots.
>  */
> -   bool is_dual_slot_double() const
> +   bool is_dual_slot_64bit() const

I might just call this is_dual_slot(). Also adjust the comment above it.

Either way,

Reviewed-by: Ilia Mirkin 


> {
> -  return base_type == GLSL_TYPE_DOUBLE && vector_elements > 2;
> +  return is_64bit() && vector_elements > 2;
> }
>
> /**
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> index 4b5dfe6..fbaf2ba 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> @@ -585,7 +585,7 @@ type_size_xvec4(const struct glsl_type *type, bool 
> as_vec4)
>if (type->is_matrix()) {
>   const glsl_type *col_type = type->column_type();
>   unsigned col_slots =
> -(as_vec4 && col_type->is_dual_slot_double()) ? 2 : 1;
> +(as_vec4 && col_type->is_dual_slot_64bit()) ? 2 : 1;
>   return type->matrix_columns * col_slots;
>} else {
>   /* Regardless of size of vector, it gets a vec4. This is bad
> @@ -593,7 +593,7 @@ type_size_xvec4(const struct glsl_type *type, bool 
> as_vec4)
>* mess.  Hopefully a later pass over the code can pack scalars
>* down if appropriate.
>*/
> - return (as_vec4 && type->is_dual_slot_double()) ? 2 : 1;
> + return (as_vec4 && type->is_dual_slot_64bit()) ? 2 : 1;
>}
> case GLSL_TYPE_ARRAY:
>assert(type->length > 0);
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
> b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index e559e46..62bc39d 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -2822,7 +2822,7 @@ glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, 
> const struct glsl_type *
> }
> l->index++;
> r->index++;
> -   if 

Re: [Mesa-dev] [PATCH] mesa: Make TexSubImage check negative dimensions sooner.

2016-06-08 Thread Patrick Baggett
Sorry, didn't CC mesa-dev, trying again...

On Wed, Jun 8, 2016 at 4:11 PM, Kenneth Graunke  wrote:
> Two dEQP tests expect INVALID_VALUE errors for negative width/height
> parameters, but get INVALID_OPERATION because they haven't actually
> created a destination image.  This is arguably not a bug in Mesa, as
> there's no specified ordering of error conditions.
>
> However, it's also really easy to make the tests pass, and there's
> no real harm in doing these checks earlier.
>
> Fixes:
> dEQP-GLES3.functional.negative_api.texture.texsubimage3d_neg_width_height
> dEQP-GLES31.functional.debug.negative_coverage.get_error.texture.texsubimage3d_neg_width_height
>
> Signed-off-by: Kenneth Graunke 
> ---
>  src/mesa/main/teximage.c | 68 
> ++--
>  1 file changed, 49 insertions(+), 19 deletions(-)
>
> diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
> index 58b7f27..d4f8278 100644
> --- a/src/mesa/main/teximage.c
> +++ b/src/mesa/main/teximage.c
> @@ -1102,6 +1102,32 @@ _mesa_legal_texture_dimensions(struct gl_context *ctx, 
> GLenum target,
> }
>  }
>
> +static bool
> +error_check_subtexture_negative_dimensions(struct gl_context *ctx,
> +   GLuint dims,
> +   GLsizei subWidth,
> +   GLsizei subHeight,
> +   GLsizei subDepth,
> +   const char *func)
> +{
> +   /* Check size */
> +   if (subWidth < 0) {
> +  _mesa_error(ctx, GL_INVALID_VALUE, "%s(width=%d)", func, subWidth);
> +  return true;
> +   }
> +
> +   if (dims > 1 && subHeight < 0) {
> +  _mesa_error(ctx, GL_INVALID_VALUE, "%s(height=%d)", func, subHeight);
> +  return true;
> +   }
> +
> +   if (dims > 2 && subDepth < 0) {
> +  _mesa_error(ctx, GL_INVALID_VALUE, "%s(depth=%d)", func, subDepth);
> +  return true;
> +   }
> +

What do you think of a structure like:

switch(dims) {
case 3:
if(subDepth < 0) {
...
}
/* fall through */
case 2:
if(subHeight < 0) {
...
}
   /* fall through *
default:
if(subWidth < 0) {
...
}
}
return true;

I think this would reduce the overall number of expressions to check.
If you just want to check whether any are < 0, you can OR the sign
bits:


int result = 0;
switch(dims) {
case 3: result |= subDepth & (1 << 31);
case 2: result |= subHeight & (1 << 31);
default: result |= subWidth & (1 << 31);
}
return (bool)(result>>31);

...then later call that function to generate a more detailed error
message about specifically which dimension was negative.

> +   return false;
> +}
>
>  /**
>   * Do error checking of xoffset, yoffset, zoffset, width, height and depth
> @@ -1119,25 +1145,6 @@ error_check_subtexture_dimensions(struct gl_context 
> *ctx, GLuint dims,
> const GLenum target = destImage->TexObject->Target;
> GLuint bw, bh, bd;
>
> -   /* Check size */
> -   if (subWidth < 0) {
> -  _mesa_error(ctx, GL_INVALID_VALUE,
> -  "%s(width=%d)", func, subWidth);
> -  return GL_TRUE;
> -   }
> -
> -   if (dims > 1 && subHeight < 0) {
> -  _mesa_error(ctx, GL_INVALID_VALUE,
> -  "%s(height=%d)", func, subHeight);
> -  return GL_TRUE;
> -   }
> -
> -   if (dims > 2 && subDepth < 0) {
> -  _mesa_error(ctx, GL_INVALID_VALUE,
> -  "%s(depth=%d)", func, subDepth);
> -  return GL_TRUE;
> -   }
> -
> /* check xoffset and width */
> if (xoffset < - (GLint) destImage->Border) {
>_mesa_error(ctx, GL_INVALID_VALUE, "%s(xoffset)", func);
> @@ -2104,6 +2111,12 @@ texsubimage_error_check(struct gl_context *ctx, GLuint 
> dimensions,
>return GL_TRUE;
> }
>
> +   if (error_check_subtexture_negative_dimensions(ctx, dimensions,
> +  width, height, depth,
> +  callerName)) {
> +  return GL_TRUE;
> +   }
> +
> texImage = _mesa_select_tex_image(texObj, target, level);
> if (!texImage) {
>/* non-existant texture level */
> @@ -2140,6 +2153,12 @@ texsubimage_error_check(struct gl_context *ctx, GLuint 
> dimensions,
>return GL_TRUE;
> }
>
> +   if (error_check_subtexture_negative_dimensions(ctx, dimensions,
> +  width, height, depth,
> +  callerName)) {
> +  return GL_TRUE;
> +   }
> +
> if (error_check_subtexture_dimensions(ctx, dimensions,
>   texImage, xoffset, yoffset, zoffset,
>   width, height, depth, callerName)) {
> @@ -2497,6 +2516,11 @@ copytexsubimage_error_check(struct gl_context *ctx, 

[Mesa-dev] [PATCH] glsl/types: rename is_dual_slot_double to is_dual_slot_64bit.

2016-06-08 Thread Dave Airlie
From: Dave Airlie 

In the future int64 support will have the same requirements.

Signed-off-by: Dave Airlie 
---
 src/compiler/glsl/ir_set_program_inouts.cpp| 4 ++--
 src/compiler/glsl/linker.cpp   | 4 ++--
 src/compiler/glsl_types.h  | 4 ++--
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 4 ++--
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +-
 5 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/compiler/glsl/ir_set_program_inouts.cpp 
b/src/compiler/glsl/ir_set_program_inouts.cpp
index 183b13b..2300da1 100644
--- a/src/compiler/glsl/ir_set_program_inouts.cpp
+++ b/src/compiler/glsl/ir_set_program_inouts.cpp
@@ -119,7 +119,7 @@ mark(struct gl_program *prog, ir_variable *var, int offset, 
int len,
 
  /* double inputs read is only for vertex inputs */
  if (stage == MESA_SHADER_VERTEX &&
- var->type->without_array()->is_dual_slot_double())
+ var->type->without_array()->is_dual_slot_64bit())
 prog->DoubleInputsRead |= bitfield;
 
  if (stage == MESA_SHADER_FRAGMENT) {
@@ -306,7 +306,7 @@ 
ir_set_program_inouts_visitor::try_mark_partial_variable(ir_variable *var,
/* double element width for double types that takes two slots */
if (this->shader_stage != MESA_SHADER_VERTEX ||
var->data.mode != ir_var_shader_in) {
-  if (type->without_array()->is_dual_slot_double())
+  if (type->without_array()->is_dual_slot_64bit())
 elem_width *= 2;
}
 
diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
index 9e65590..b82446d 100644
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -2863,7 +2863,7 @@ assign_attribute_or_color_locations(gl_shader_program 
*prog,
  * issue (3) of the GL_ARB_vertex_attrib_64bit behavior, this
  * is optional behavior, but it seems preferable.
  */
-if (var->type->without_array()->is_dual_slot_double())
+if (var->type->without_array()->is_dual_slot_64bit())
double_storage_locations |= (use_mask << attr);
 }
 
@@ -2940,7 +2940,7 @@ assign_attribute_or_color_locations(gl_shader_program 
*prog,
   to_assign[i].var->data.is_unmatched_generic_inout = 0;
   used_locations |= (use_mask << location);
 
-  if (to_assign[i].var->type->without_array()->is_dual_slot_double())
+  if (to_assign[i].var->type->without_array()->is_dual_slot_64bit())
  double_storage_locations |= (use_mask << location);
}
 
diff --git a/src/compiler/glsl_types.h b/src/compiler/glsl_types.h
index 2102132..b16ae67 100644
--- a/src/compiler/glsl_types.h
+++ b/src/compiler/glsl_types.h
@@ -497,9 +497,9 @@ struct glsl_type {
/**
 * Query whether a double takes two slots.
 */
-   bool is_dual_slot_double() const
+   bool is_dual_slot_64bit() const
{
-  return base_type == GLSL_TYPE_DOUBLE && vector_elements > 2;
+  return is_64bit() && vector_elements > 2;
}
 
/**
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 4b5dfe6..fbaf2ba 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -585,7 +585,7 @@ type_size_xvec4(const struct glsl_type *type, bool as_vec4)
   if (type->is_matrix()) {
  const glsl_type *col_type = type->column_type();
  unsigned col_slots =
-(as_vec4 && col_type->is_dual_slot_double()) ? 2 : 1;
+(as_vec4 && col_type->is_dual_slot_64bit()) ? 2 : 1;
  return type->matrix_columns * col_slots;
   } else {
  /* Regardless of size of vector, it gets a vec4. This is bad
@@ -593,7 +593,7 @@ type_size_xvec4(const struct glsl_type *type, bool as_vec4)
   * mess.  Hopefully a later pass over the code can pack scalars
   * down if appropriate.
   */
- return (as_vec4 && type->is_dual_slot_double()) ? 2 : 1;
+ return (as_vec4 && type->is_dual_slot_64bit()) ? 2 : 1;
   }
case GLSL_TYPE_ARRAY:
   assert(type->length > 0);
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index e559e46..62bc39d 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -2822,7 +2822,7 @@ glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, 
const struct glsl_type *
}
l->index++;
r->index++;
-   if (type->is_dual_slot_double()) {
+   if (type->is_dual_slot_64bit()) {
   l->index++;
   if (r->is_double_vertex_input == false)
 r->index++;
-- 
2.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Mesa (master): st/mesa: use buffer usage history to set dirty flags for revalidation

2016-06-08 Thread Fredrik Höglund
On Wednesday 08 June 2016, Ilia Mirkin wrote:
> Glancing at the code (I don't even have a piglit checkout here):
> 
> static void
> set_ubo_binding(struct gl_context *ctx, ...)
> ...
>/* If this is a real buffer object, mark it has having been used
> * at some point as a UBO.
> */
>if (size >= 0)
>   bufObj->UsageHistory |= USAGE_UNIFORM_BUFFER;
> 
> That seems bogus - what if the current size is 0 (unallocated), the
> buffer object gets bound to a UBO endpoint, and then someone goes in
> and does glBufferData()? Same for set_ssbo_binding.
> 
>   -ilia

The test is greater than or equal to zero, so the UsageHistory should
be set even when the buffer is unallocated.

But the piglit test doesn't bind the buffer as a uniform buffer before
it allocates it.  It allocates the buffer first with glNamedBufferData(),
and then binds it.  The UsageHistory is still set to the default value in
the glNamedBufferData() call, since the buffer has never been bound
at that point.  But the uniform buffer state should still be marked as
dirty in the glBindBufferRange() call.  I think this failure suggests
that that doesn't happen for some reason.

Fredrik

> 
> 
> On Wed, Jun 8, 2016 at 2:28 PM, Ilia Mirkin  wrote:
> > Hm, that's odd. I guess the buffer usage doesn't get set properly? I
> > won't be able to look at this until tonight at the earliest, feel free
> > to revert the change in the meanwhile.
> >
> >   -ilia
> >
> > On Wed, Jun 8, 2016 at 1:25 PM, Brian Paul  wrote:
> >> Ilia, this patch causes a regression in the piglit
> >> arb_uniform_buffer_object-rendering-dsa test with llvmpipe (at least).
> >>
> >> I haven't debugged it at all.
> >>
> >> -Brian
> >>
> >> On 06/07/2016 08:29 PM, Ilia Mirkin wrote:
> >>>
> >>> Module: Mesa
> >>> Branch: master
> >>> Commit: 6e6fd911da8a1d9cd62fe0a8a4cc0fb7bdccfe02
> >>> URL:
> >>> https://urldefense.proofpoint.com/v2/url?u=http-3A__cgit.freedesktop.org_mesa_mesa_commit_-3Fid-3D6e6fd911da8a1d9cd62fe0a8a4cc0fb7bdccfe02=CwIGaQ=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs=T0t4QG7chq2ZwJo6wilkFznRSFy-8uDKartPGbomVj8=oZCXDAQxyzodq06r0fdxhJ0TSS2VMnftkgDNyPiJGKY=zn9XJ75wNk6pxn5KNFcWfLu7sXurLGGcWfIvVtSMwEo=
> >>>
> >>> Author: Ilia Mirkin 
> >>> Date:   Sat Jun  4 13:26:46 2016 -0400
> >>>
> >>> st/mesa: use buffer usage history to set dirty flags for revalidation
> >>>
> >>> We were previously unconditionally doing this for arrays and ubo's, and
> >>> ignoring texture/storage/atomic buffers. Instead use the usage history
> >>> to determine which atoms need to be revalidated.
> >>>
> >>> Signed-off-by: Ilia Mirkin 
> >>> Reviewed-by: Nicolai Hähnle 
> >>> Cc: "12.0" 
> >>>
> >>> ---
> >>>
> >>>   src/mesa/state_tracker/st_cb_bufferobjects.c | 15 +--
> >>>   1 file changed, 13 insertions(+), 2 deletions(-)
> >>>
> >>> diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c
> >>> b/src/mesa/state_tracker/st_cb_bufferobjects.c
> >>> index 8bbc2f0..1a8aea3 100644
> >>> --- a/src/mesa/state_tracker/st_cb_bufferobjects.c
> >>> +++ b/src/mesa/state_tracker/st_cb_bufferobjects.c
> >>> @@ -332,8 +332,19 @@ st_bufferobj_data(struct gl_context *ctx,
> >>> }
> >>>  }
> >>>
> >>> -   /* BufferData may change an array or uniform buffer, need to update it
> >>> */
> >>> -   st->dirty.st |= ST_NEW_VERTEX_ARRAYS | ST_NEW_UNIFORM_BUFFER;
> >>> +   /* The current buffer may be bound, so we have to revalidate all atoms
> >>> that
> >>> +* might be using it.
> >>> +*/
> >>> +   /* TODO: Add arrays to usage history */
> >>> +   st->dirty.st |= ST_NEW_VERTEX_ARRAYS;
> >>> +   if (st_obj->Base.UsageHistory & USAGE_UNIFORM_BUFFER)
> >>> +  st->dirty.st |= ST_NEW_UNIFORM_BUFFER;
> >>> +   if (st_obj->Base.UsageHistory & USAGE_SHADER_STORAGE_BUFFER)
> >>> +  st->dirty.st |= ST_NEW_STORAGE_BUFFER;
> >>> +   if (st_obj->Base.UsageHistory & USAGE_TEXTURE_BUFFER)
> >>> +  st->dirty.st |= ST_NEW_SAMPLER_VIEWS | ST_NEW_IMAGE_UNITS;
> >>> +   if (st_obj->Base.UsageHistory & USAGE_ATOMIC_COUNTER_BUFFER)
> >>> +  st->dirty.st |= ST_NEW_ATOMIC_BUFFER;
> >>>
> >>>  return GL_TRUE;
> >>>   }
> >>>
> >>> ___
> >>> mesa-commit mailing list
> >>> mesa-com...@lists.freedesktop.org
> >>>
> >>> https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.freedesktop.org_mailman_listinfo_mesa-2Dcommit=CwIGaQ=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs=T0t4QG7chq2ZwJo6wilkFznRSFy-8uDKartPGbomVj8=oZCXDAQxyzodq06r0fdxhJ0TSS2VMnftkgDNyPiJGKY=TUTsMchaJhWjCx5k6tptwVWWVHJs-zDtCtJVpPhM1pM=
> >>>
> >>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org

Re: [Mesa-dev] [PATCH 7/7] st/glsl_to_tgsi: move to checking 64-bitness instead of double

2016-06-08 Thread Ilia Mirkin
Series is

Reviewed-by: Ilia Mirkin 

On Wed, Jun 8, 2016 at 5:15 PM, Dave Airlie  wrote:
> From: Dave Airlie 
>
> This uses the new types interfaces to check for 64-bit types,
> as futureproofing against int64 support.
>
> Signed-off-by: Dave Airlie 
> ---
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 30 
> ++
>  1 file changed, 14 insertions(+), 16 deletions(-)
>
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
> b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index 29630e1..ce981c7 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -629,7 +629,7 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, 
> unsigned op,
>  {
> glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction();
> int num_reladdr = 0, i, j;
> -   bool dst_is_double[2];
> +   bool dst_is_64bit[2];
>
> op = get_opcode(ir, op, dst, src0, src1);
>
> @@ -732,18 +732,16 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, 
> unsigned op,
>  * GLSL [0].w -> TGSI [1].zw
>  */
> for (j = 0; j < 2; j++) {
> -  dst_is_double[j] = false;
> -  if (inst->dst[j].type == GLSL_TYPE_DOUBLE)
> - dst_is_double[j] = true;
> -  else if (inst->dst[j].file == PROGRAM_OUTPUT && inst->dst[j].type == 
> GLSL_TYPE_ARRAY) {
> +  dst_is_64bit[j] = glsl_base_type_is_64bit(inst->dst[j].type);
> +  if (!dst_is_64bit[j] && inst->dst[j].file == PROGRAM_OUTPUT && 
> inst->dst[j].type == GLSL_TYPE_ARRAY) {
>   enum glsl_base_type type = find_array_type(this->output_arrays, 
> this->num_output_arrays, inst->dst[j].array_id);
> - if (type == GLSL_TYPE_DOUBLE)
> -dst_is_double[j] = true;
> + if (glsl_base_type_is_64bit(type))
> +dst_is_64bit[j] = true;
>}
> }
>
> -   if (dst_is_double[0] || dst_is_double[1] ||
> -   inst->src[0].type == GLSL_TYPE_DOUBLE) {
> +   if (dst_is_64bit[0] || dst_is_64bit[1] ||
> +   glsl_base_type_is_64bit(inst->src[0].type)) {
>glsl_to_tgsi_instruction *dinst = NULL;
>int initial_src_swz[4], initial_src_idx[4];
>int initial_dst_idx[2], initial_dst_writemask[2];
> @@ -795,7 +793,7 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, 
> unsigned op,
>
>   /* modify the destination if we are splitting */
>   for (j = 0; j < 2; j++) {
> -if (dst_is_double[j]) {
> +if (dst_is_64bit[j]) {
> dinst->dst[j].writemask = (i & 1) ? WRITEMASK_ZW : 
> WRITEMASK_XY;
> dinst->dst[j].index = initial_dst_idx[j];
> if (i > 1) {
> @@ -816,7 +814,7 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, 
> unsigned op,
>   for (j = 0; j < 4; j++) {
>  int swz = GET_SWZ(initial_src_swz[j], i);
>
> -if (dinst->src[j].type == GLSL_TYPE_DOUBLE) {
> +if (glsl_base_type_is_64bit(dinst->src[j].type)) {
> dinst->src[j].index = initial_src_idx[j];
> if (swz > 1 && dinst->src[j].file != PROGRAM_IMMEDIATE) {
>dinst->src[j].double_reg2 = true;
> @@ -833,7 +831,7 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, 
> unsigned op,
>- F2D is a float src0, DLDEXP is integer src1 */
> if (op == TGSI_OPCODE_F2D ||
> op == TGSI_OPCODE_DLDEXP ||
> -   (op == TGSI_OPCODE_UCMP && dst_is_double[0])) {
> +   (op == TGSI_OPCODE_UCMP && dst_is_64bit[0])) {
>dinst->src[j].swizzle = MAKE_SWIZZLE4(swz, swz, swz, swz);
> }
>  }
> @@ -2093,7 +2091,7 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* 
> ir, st_src_reg *op)
>}
>
>cbuf.swizzle = swizzle_for_size(ir->type->vector_elements);
> -  if (cbuf.type == GLSL_TYPE_DOUBLE)
> +  if (glsl_base_type_is_64bit(cbuf.type))
>   cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 8,
> const_offset % 16 / 8,
> const_offset % 16 / 8,
> @@ -2850,7 +2848,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
>assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
>
>if (ir->lhs->type->is_array() || 
> ir->lhs->type->without_array()->is_matrix()) {
> - if (ir->lhs->type->without_array()->is_double()) {
> + if (ir->lhs->type->without_array()->is_64bit()) {
>  switch (ir->lhs->type->without_array()->vector_elements) {
>  case 1:
> l.writemask = WRITEMASK_X;
> @@ -2869,7 +2867,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
>  l.writemask = WRITEMASK_XYZW;
>}
> } else if (ir->lhs->type->is_scalar() &&
> -  !ir->lhs->type->is_double() &&
> +  

[Mesa-dev] [PATCH 7/7] st/glsl_to_tgsi: move to checking 64-bitness instead of double

2016-06-08 Thread Dave Airlie
From: Dave Airlie 

This uses the new types interfaces to check for 64-bit types,
as futureproofing against int64 support.

Signed-off-by: Dave Airlie 
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 30 ++
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 29630e1..ce981c7 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -629,7 +629,7 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned 
op,
 {
glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction();
int num_reladdr = 0, i, j;
-   bool dst_is_double[2];
+   bool dst_is_64bit[2];
 
op = get_opcode(ir, op, dst, src0, src1);
 
@@ -732,18 +732,16 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, 
unsigned op,
 * GLSL [0].w -> TGSI [1].zw
 */
for (j = 0; j < 2; j++) {
-  dst_is_double[j] = false;
-  if (inst->dst[j].type == GLSL_TYPE_DOUBLE)
- dst_is_double[j] = true;
-  else if (inst->dst[j].file == PROGRAM_OUTPUT && inst->dst[j].type == 
GLSL_TYPE_ARRAY) {
+  dst_is_64bit[j] = glsl_base_type_is_64bit(inst->dst[j].type);
+  if (!dst_is_64bit[j] && inst->dst[j].file == PROGRAM_OUTPUT && 
inst->dst[j].type == GLSL_TYPE_ARRAY) {
  enum glsl_base_type type = find_array_type(this->output_arrays, 
this->num_output_arrays, inst->dst[j].array_id);
- if (type == GLSL_TYPE_DOUBLE)
-dst_is_double[j] = true;
+ if (glsl_base_type_is_64bit(type))
+dst_is_64bit[j] = true;
   }
}
 
-   if (dst_is_double[0] || dst_is_double[1] ||
-   inst->src[0].type == GLSL_TYPE_DOUBLE) {
+   if (dst_is_64bit[0] || dst_is_64bit[1] ||
+   glsl_base_type_is_64bit(inst->src[0].type)) {
   glsl_to_tgsi_instruction *dinst = NULL;
   int initial_src_swz[4], initial_src_idx[4];
   int initial_dst_idx[2], initial_dst_writemask[2];
@@ -795,7 +793,7 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned 
op,
 
  /* modify the destination if we are splitting */
  for (j = 0; j < 2; j++) {
-if (dst_is_double[j]) {
+if (dst_is_64bit[j]) {
dinst->dst[j].writemask = (i & 1) ? WRITEMASK_ZW : WRITEMASK_XY;
dinst->dst[j].index = initial_dst_idx[j];
if (i > 1) {
@@ -816,7 +814,7 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned 
op,
  for (j = 0; j < 4; j++) {
 int swz = GET_SWZ(initial_src_swz[j], i);
 
-if (dinst->src[j].type == GLSL_TYPE_DOUBLE) {
+if (glsl_base_type_is_64bit(dinst->src[j].type)) {
dinst->src[j].index = initial_src_idx[j];
if (swz > 1 && dinst->src[j].file != PROGRAM_IMMEDIATE) {
   dinst->src[j].double_reg2 = true;
@@ -833,7 +831,7 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned 
op,
   - F2D is a float src0, DLDEXP is integer src1 */
if (op == TGSI_OPCODE_F2D ||
op == TGSI_OPCODE_DLDEXP ||
-   (op == TGSI_OPCODE_UCMP && dst_is_double[0])) {
+   (op == TGSI_OPCODE_UCMP && dst_is_64bit[0])) {
   dinst->src[j].swizzle = MAKE_SWIZZLE4(swz, swz, swz, swz);
}
 }
@@ -2093,7 +2091,7 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, 
st_src_reg *op)
   }
 
   cbuf.swizzle = swizzle_for_size(ir->type->vector_elements);
-  if (cbuf.type == GLSL_TYPE_DOUBLE)
+  if (glsl_base_type_is_64bit(cbuf.type))
  cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 8,
const_offset % 16 / 8,
const_offset % 16 / 8,
@@ -2850,7 +2848,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
   assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
 
   if (ir->lhs->type->is_array() || 
ir->lhs->type->without_array()->is_matrix()) {
- if (ir->lhs->type->without_array()->is_double()) {
+ if (ir->lhs->type->without_array()->is_64bit()) {
 switch (ir->lhs->type->without_array()->vector_elements) {
 case 1:
l.writemask = WRITEMASK_X;
@@ -2869,7 +2867,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
 l.writemask = WRITEMASK_XYZW;
   }
} else if (ir->lhs->type->is_scalar() &&
-  !ir->lhs->type->is_double() &&
+  !ir->lhs->type->is_64bit() &&
   ir->lhs->variable_referenced()->data.mode == ir_var_shader_out) {
   /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the
* FINISHME: W component of fragment shader output zero, work correctly.
@@ -4973,7 +4971,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
  delete inst;

[Mesa-dev] [PATCH 2/7] compiler: use 64bit check for sizing instead of double check.

2016-06-08 Thread Dave Airlie
From: Dave Airlie 

This just moves code to the new check in advance of int64 support.

Signed-off-by: Dave Airlie 
---
 src/compiler/glsl_types.cpp | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/compiler/glsl_types.cpp b/src/compiler/glsl_types.cpp
index 11f1e85..884f311 100644
--- a/src/compiler/glsl_types.cpp
+++ b/src/compiler/glsl_types.cpp
@@ -1434,7 +1434,7 @@ glsl_type::can_implicitly_convert_to(const glsl_type 
*desired,
 unsigned
 glsl_type::std140_base_alignment(bool row_major) const
 {
-   unsigned N = is_double() ? 8 : 4;
+   unsigned N = is_64bit() ? 8 : 4;
 
/* (1) If the member is a scalar consuming  basic machine units, the
 * base alignment is .
@@ -1552,7 +1552,7 @@ glsl_type::std140_base_alignment(bool row_major) const
 unsigned
 glsl_type::std140_size(bool row_major) const
 {
-   unsigned N = is_double() ? 8 : 4;
+   unsigned N = is_64bit() ? 8 : 4;
 
/* (1) If the member is a scalar consuming  basic machine units, the
 * base alignment is .
@@ -1689,7 +1689,7 @@ unsigned
 glsl_type::std430_base_alignment(bool row_major) const
 {
 
-   unsigned N = is_double() ? 8 : 4;
+   unsigned N = is_64bit() ? 8 : 4;
 
/* (1) If the member is a scalar consuming  basic machine units, the
 * base alignment is .
@@ -1798,7 +1798,7 @@ glsl_type::std430_base_alignment(bool row_major) const
 unsigned
 glsl_type::std430_array_stride(bool row_major) const
 {
-   unsigned N = is_double() ? 8 : 4;
+   unsigned N = is_64bit() ? 8 : 4;
 
/* Notice that the array stride of a vec3 is not 3 * N but 4 * N.
 * See OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout"
@@ -1816,7 +1816,7 @@ glsl_type::std430_array_stride(bool row_major) const
 unsigned
 glsl_type::std430_size(bool row_major) const
 {
-   unsigned N = is_double() ? 8 : 4;
+   unsigned N = is_64bit() ? 8 : 4;
 
/* OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout":
 *
-- 
2.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/7] mesa: use new 64-bit checks instead of explicit double checks.

2016-06-08 Thread Dave Airlie
From: Dave Airlie 

This just moves to the new interfaces in advance of int64.

Signed-off-by: Dave Airlie 
---
 src/mesa/main/uniform_query.cpp | 8 
 src/mesa/program/ir_to_mesa.cpp | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp
index 997b0cb..eea611b 100644
--- a/src/mesa/main/uniform_query.cpp
+++ b/src/mesa/main/uniform_query.cpp
@@ -322,8 +322,8 @@ _mesa_get_uniform(struct gl_context *ctx, GLuint program, 
GLint location,
{
   unsigned elements = (uni->type->is_sampler())
 ? 1 : uni->type->components();
-  const int dmul = uni->type->base_type == GLSL_TYPE_DOUBLE ? 2 : 1;
-  const int rmul = returnType == GLSL_TYPE_DOUBLE ? 2 : 1;
+  const int dmul = uni->type->is_64bit() ? 2 : 1;
+  const int rmul = glsl_base_type_is_64bit(returnType) ? 2 : 1;
 
   /* Calculate the source base address *BEFORE* modifying elements to
* account for the size of the user's buffer.
@@ -548,7 +548,7 @@ _mesa_propagate_uniforms_to_driver_storage(struct 
gl_uniform_storage *uni,
 */
const unsigned components = MAX2(1, uni->type->vector_elements);
const unsigned vectors = MAX2(1, uni->type->matrix_columns);
-   const int dmul = uni->type->base_type == GLSL_TYPE_DOUBLE ? 2 : 1;
+   const int dmul = uni->type->is_64bit() ? 2 : 1;
 
/* Store the data in the driver's requested type in the driver's storage
 * areas.
@@ -668,7 +668,7 @@ _mesa_uniform(struct gl_context *ctx, struct 
gl_shader_program *shProg,
   unsigned src_components)
 {
unsigned offset;
-   int size_mul = basicType == GLSL_TYPE_DOUBLE ? 2 : 1;
+   int size_mul = glsl_base_type_is_64bit(basicType) ? 2 : 1;
 
struct gl_uniform_storage *const uni =
   validate_uniform_parameters(ctx, shProg, location, count,
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index b10648b..3a5f058 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2407,7 +2407,7 @@ add_uniform_to_shader::visit_field(const glsl_type *type, 
const char *name,
 
if (type->is_vector() || type->is_scalar()) {
   size = type->vector_elements;
-  if (type->is_double())
+  if (type->is_64bit())
  size *= 2;
} else {
   size = type_size(type) * 4;
-- 
2.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/7] compiler/types: add 64-bitness queries.

2016-06-08 Thread Dave Airlie
From: Dave Airlie 

This adds an inline and type query for if a type is 64-bit.

Fow now this is equivalent to double, but int64 will change
this.

Signed-off-by: Dave Airlie 
---
 src/compiler/glsl_types.h | 13 +
 1 file changed, 13 insertions(+)

diff --git a/src/compiler/glsl_types.h b/src/compiler/glsl_types.h
index 4e3afe7..2102132 100644
--- a/src/compiler/glsl_types.h
+++ b/src/compiler/glsl_types.h
@@ -64,6 +64,11 @@ enum glsl_base_type {
GLSL_TYPE_ERROR
 };
 
+static inline bool glsl_base_type_is_64bit(enum glsl_base_type type)
+{
+   return type == GLSL_TYPE_DOUBLE;
+}
+
 enum glsl_sampler_dim {
GLSL_SAMPLER_DIM_1D = 0,
GLSL_SAMPLER_DIM_2D,
@@ -498,6 +503,14 @@ struct glsl_type {
}
 
/**
+* Query whether or not a type is 64-bit
+*/
+   bool is_64bit() const
+   {
+  return glsl_base_type_is_64bit(base_type);
+   }
+
+   /**
 * Query whether or not a type is a non-array boolean type
 */
bool is_boolean() const
-- 
2.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/7] glsl: use new interfaces for 64-bit checks.

2016-06-08 Thread Dave Airlie
From: Dave Airlie 

This is just prep work for int64 support, changing
places where 64-bit matters no doubles.

Signed-off-by: Dave Airlie 
---
 src/compiler/glsl/ast_to_hir.cpp|  4 ++--
 src/compiler/glsl/link_uniform_initializers.cpp |  2 +-
 src/compiler/glsl/link_varyings.cpp |  4 ++--
 src/compiler/glsl/lower_buffer_access.cpp   | 12 ++--
 src/compiler/glsl/lower_packed_varyings.cpp |  6 +++---
 5 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp
index fbd3256..9fd29c7 100644
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -3399,7 +3399,7 @@ apply_layout_qualifier_to_variable(const struct 
ast_type_qualifier *qual,
 (qual_component + components - 1) > 3) {
_mesa_glsl_error(loc, state, "component overflow (%u > 3)",
 (qual_component + components - 1));
-} else if (qual_component == 1 && type->is_double()) {
+} else if (qual_component == 1 && type->is_64bit()) {
/* We don't bother checking for 3 as it should be caught by the
 * overflow check above.
 */
@@ -6849,7 +6849,7 @@ ast_process_struct_or_iface_block_members(exec_list 
*instructions,
 }
  } else {
 if (layout && layout->flags.q.explicit_xfb_offset) {
-   unsigned align = field_type->is_double() ? 8 : 4;
+   unsigned align = field_type->is_64bit() ? 8 : 4;
fields[i].offset = glsl_align(block_xfb_offset, align);
block_xfb_offset +=
   MAX2(xfb_stride, (int) (4 * field_type->component_slots()));
diff --git a/src/compiler/glsl/link_uniform_initializers.cpp 
b/src/compiler/glsl/link_uniform_initializers.cpp
index bf6d394..acf8222 100644
--- a/src/compiler/glsl/link_uniform_initializers.cpp
+++ b/src/compiler/glsl/link_uniform_initializers.cpp
@@ -222,7 +222,7 @@ set_uniform_initializer(void *mem_ctx, gl_shader_program 
*prog,
 val->array_elements[0]->type->base_type;
   const unsigned int elements = val->array_elements[0]->type->components();
   unsigned int idx = 0;
-  unsigned dmul = (base_type == GLSL_TYPE_DOUBLE) ? 2 : 1;
+  unsigned dmul = glsl_base_type_is_64bit(base_type) ? 2 : 1;
 
   assert(val->type->length >= storage->array_elements);
   for (unsigned int i = 0; i < storage->array_elements; i++) {
diff --git a/src/compiler/glsl/link_varyings.cpp 
b/src/compiler/glsl/link_varyings.cpp
index 5a5adc0..99fb3fc 100644
--- a/src/compiler/glsl/link_varyings.cpp
+++ b/src/compiler/glsl/link_varyings.cpp
@@ -403,7 +403,7 @@ cross_validate_outputs_to_inputs(struct gl_shader_program 
*prog,
  */
 last_comp = 4;
  } else {
-unsigned dmul = var->type->is_double() ? 2 : 1;
+unsigned dmul = var->type->is_64bit() ? 2 : 1;
 last_comp = var->data.location_frac +
var->type->without_array()->vector_elements * dmul;
  }
@@ -708,7 +708,7 @@ tfeedback_decl::assign_location(struct gl_context *ctx,
   + this->matched_candidate->toplevel_var->data.location_frac
   + this->matched_candidate->offset;
const unsigned dmul =
-  this->matched_candidate->type->without_array()->is_double() ? 2 : 1;
+  this->matched_candidate->type->without_array()->is_64bit() ? 2 : 1;
 
if (this->matched_candidate->type->is_array()) {
   /* Array variable */
diff --git a/src/compiler/glsl/lower_buffer_access.cpp 
b/src/compiler/glsl/lower_buffer_access.cpp
index bdfabab..69dc82b 100644
--- a/src/compiler/glsl/lower_buffer_access.cpp
+++ b/src/compiler/glsl/lower_buffer_access.cpp
@@ -114,7 +114,7 @@ lower_buffer_access::emit_access(void *mem_ctx,
 /* For a row-major matrix, the next column starts at the next
  * element.
  */
-int size_mul = deref->type->is_double() ? 8 : 4;
+int size_mul = deref->type->is_64bit() ? 8 : 4;
 emit_access(mem_ctx, is_write, col_deref, base_offset,
 deref_offset + i * size_mul,
 row_major, deref->type->matrix_columns, packing,
@@ -125,7 +125,7 @@ lower_buffer_access::emit_access(void *mem_ctx,
 /* std430 doesn't round up vec2 size to a vec4 size */
 if (packing == GLSL_INTERFACE_PACKING_STD430 &&
 deref->type->vector_elements == 2 &&
-!deref->type->is_double()) {
+!deref->type->is_64bit()) {
size_mul = 8;
 } else {
/* std140 always rounds the stride of arrays (and matrices) to a
@@ -137,7 +137,7 @@ lower_buffer_access::emit_access(void *mem_ctx,
 * machine units, the base alignment is 4N. For vec4, base
 * alignment 

[Mesa-dev] [PATCH 6/7] st/glsl_to_tgsi: use enum glsl_base_type instead of unsigned

2016-06-08 Thread Dave Airlie
From: Dave Airlie 

This is just some better type safety that I noticed while working
on 64-bit integer support.

Signed-off-by: Dave Airlie 
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 24 
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index e1130c2..29630e1 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -91,7 +91,7 @@ public:
   this->is_double_vertex_input = false;
}
 
-   st_src_reg(gl_register_file file, int index, int type)
+   st_src_reg(gl_register_file file, int index, enum glsl_base_type type)
{
   this->type = type;
   this->file = file;
@@ -107,7 +107,7 @@ public:
   this->is_double_vertex_input = false;
}
 
-   st_src_reg(gl_register_file file, int index, int type, int index2D)
+   st_src_reg(gl_register_file file, int index, enum glsl_base_type type, int 
index2D)
{
   this->type = type;
   this->file = file;
@@ -146,7 +146,7 @@ public:
int index2D;
GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
int negate; /**< NEGATE_XYZW mask from mesa */
-   int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
+   enum glsl_base_type type; /** GLSL_TYPE_* from GLSL IR (enum 
glsl_base_type) */
/** Register index should be offset by the integer in this reg. */
st_src_reg *reladdr;
st_src_reg *reladdr2;
@@ -162,7 +162,7 @@ public:
 
 class st_dst_reg {
 public:
-   st_dst_reg(gl_register_file file, int writemask, int type, int index)
+   st_dst_reg(gl_register_file file, int writemask, enum glsl_base_type type, 
int index)
{
   this->file = file;
   this->index = index;
@@ -175,7 +175,7 @@ public:
   this->array_id = 0;
}
 
-   st_dst_reg(gl_register_file file, int writemask, int type)
+   st_dst_reg(gl_register_file file, int writemask, enum glsl_base_type type)
{
   this->file = file;
   this->index = 0;
@@ -207,7 +207,7 @@ public:
int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */
int index2D;
int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
-   int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
+   enum glsl_base_type type; /** GLSL_TYPE_* from GLSL IR (enum 
glsl_base_type) */
/** Register index should be offset by the integer in this reg. */
st_src_reg *reladdr;
st_src_reg *reladdr2;
@@ -343,10 +343,10 @@ struct array_decl {
unsigned mesa_index;
unsigned array_id;
unsigned array_size;
-   unsigned array_type;
+   enum glsl_base_type array_type;
 };
 
-static unsigned
+static enum glsl_base_type
 find_array_type(struct array_decl *arrays, unsigned count, unsigned array_id)
 {
unsigned i;
@@ -420,7 +420,7 @@ public:
st_src_reg st_src_reg_for_double(double val);
st_src_reg st_src_reg_for_float(float val);
st_src_reg st_src_reg_for_int(int val);
-   st_src_reg st_src_reg_for_type(int type, int val);
+   st_src_reg st_src_reg_for_type(enum glsl_base_type type, int val);
 
/**
 * \name Visit methods
@@ -736,7 +736,7 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned 
op,
   if (inst->dst[j].type == GLSL_TYPE_DOUBLE)
  dst_is_double[j] = true;
   else if (inst->dst[j].file == PROGRAM_OUTPUT && inst->dst[j].type == 
GLSL_TYPE_ARRAY) {
- unsigned type = find_array_type(this->output_arrays, 
this->num_output_arrays, inst->dst[j].array_id);
+ enum glsl_base_type type = find_array_type(this->output_arrays, 
this->num_output_arrays, inst->dst[j].array_id);
  if (type == GLSL_TYPE_DOUBLE)
 dst_is_double[j] = true;
   }
@@ -866,7 +866,7 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, 
unsigned op,
  st_dst_reg dst,
  st_src_reg src0, st_src_reg src1)
 {
-   int type = GLSL_TYPE_FLOAT;
+   enum glsl_base_type type = GLSL_TYPE_FLOAT;
 
if (op == TGSI_OPCODE_MOV)
return op;
@@ -1155,7 +1155,7 @@ glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
 }
 
 st_src_reg
-glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val)
+glsl_to_tgsi_visitor::st_src_reg_for_type(enum glsl_base_type type, int val)
 {
if (native_integers)
   return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) :
-- 
2.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] prep work for 64-bit integer support

2016-06-08 Thread Dave Airlie
While writing ARB_gpu_shader_int64 I realised I needed to change
a lot of existing checks for doubles to 64bit, so I decided to
do that as much in advance as possible.

This adds a macro + interface to check if a type is 64-bit,
which currently is only true for doubles, it then uses the new
API in places where it makes sense.

There is also a patch to st/glsl_to_tgsi to use glsl_base_type
enum for better type safety.

Dave.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/7] glsl/link_varyings: switch to 64bit check instead of double.

2016-06-08 Thread Dave Airlie
From: Dave Airlie 

This is prep work for int64 support.

Signed-off-by: Dave Airlie 
---
 src/compiler/glsl/link_varyings.cpp | 2 +-
 src/compiler/glsl/link_varyings.h   | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/compiler/glsl/link_varyings.cpp 
b/src/compiler/glsl/link_varyings.cpp
index 99fb3fc..67534a6 100644
--- a/src/compiler/glsl/link_varyings.cpp
+++ b/src/compiler/glsl/link_varyings.cpp
@@ -886,7 +886,7 @@ tfeedback_decl::store(struct gl_context *ctx, struct 
gl_shader_program *prog,
}
 
if (explicit_stride && explicit_stride[buffer]) {
-  if (this->is_double() && info->Buffers[buffer].Stride % 2) {
+  if (this->is_64bit() && info->Buffers[buffer].Stride % 2) {
  linker_error(prog, "invalid qualifier xfb_stride=%d must be a "
   "multiple of 8 as its applied to a type that is or "
   "contains a double.",
diff --git a/src/compiler/glsl/link_varyings.h 
b/src/compiler/glsl/link_varyings.h
index 0ad4f74..2126a5c 100644
--- a/src/compiler/glsl/link_varyings.h
+++ b/src/compiler/glsl/link_varyings.h
@@ -151,7 +151,7 @@ public:
  return this->size;
   else
  return this->vector_elements * this->matrix_columns * this->size *
-(this->is_double() ? 2 : 1);
+(this->is_64bit() ? 2 : 1);
}
 
unsigned get_location() const {
@@ -160,7 +160,7 @@ public:
 
 private:
 
-   bool is_double() const
+   bool is_64bit() const
{
   switch (this->type) {
   case GL_DOUBLE:
-- 
2.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] mesa: Make TexSubImage check negative dimensions sooner.

2016-06-08 Thread Kenneth Graunke
Two dEQP tests expect INVALID_VALUE errors for negative width/height
parameters, but get INVALID_OPERATION because they haven't actually
created a destination image.  This is arguably not a bug in Mesa, as
there's no specified ordering of error conditions.

However, it's also really easy to make the tests pass, and there's
no real harm in doing these checks earlier.

Fixes:
dEQP-GLES3.functional.negative_api.texture.texsubimage3d_neg_width_height
dEQP-GLES31.functional.debug.negative_coverage.get_error.texture.texsubimage3d_neg_width_height

Signed-off-by: Kenneth Graunke 
---
 src/mesa/main/teximage.c | 68 ++--
 1 file changed, 49 insertions(+), 19 deletions(-)

diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 58b7f27..d4f8278 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -1102,6 +1102,32 @@ _mesa_legal_texture_dimensions(struct gl_context *ctx, 
GLenum target,
}
 }
 
+static bool
+error_check_subtexture_negative_dimensions(struct gl_context *ctx,
+   GLuint dims,
+   GLsizei subWidth,
+   GLsizei subHeight,
+   GLsizei subDepth,
+   const char *func)
+{
+   /* Check size */
+   if (subWidth < 0) {
+  _mesa_error(ctx, GL_INVALID_VALUE, "%s(width=%d)", func, subWidth);
+  return true;
+   }
+
+   if (dims > 1 && subHeight < 0) {
+  _mesa_error(ctx, GL_INVALID_VALUE, "%s(height=%d)", func, subHeight);
+  return true;
+   }
+
+   if (dims > 2 && subDepth < 0) {
+  _mesa_error(ctx, GL_INVALID_VALUE, "%s(depth=%d)", func, subDepth);
+  return true;
+   }
+
+   return false;
+}
 
 /**
  * Do error checking of xoffset, yoffset, zoffset, width, height and depth
@@ -1119,25 +1145,6 @@ error_check_subtexture_dimensions(struct gl_context 
*ctx, GLuint dims,
const GLenum target = destImage->TexObject->Target;
GLuint bw, bh, bd;
 
-   /* Check size */
-   if (subWidth < 0) {
-  _mesa_error(ctx, GL_INVALID_VALUE,
-  "%s(width=%d)", func, subWidth);
-  return GL_TRUE;
-   }
-
-   if (dims > 1 && subHeight < 0) {
-  _mesa_error(ctx, GL_INVALID_VALUE,
-  "%s(height=%d)", func, subHeight);
-  return GL_TRUE;
-   }
-
-   if (dims > 2 && subDepth < 0) {
-  _mesa_error(ctx, GL_INVALID_VALUE,
-  "%s(depth=%d)", func, subDepth);
-  return GL_TRUE;
-   }
-
/* check xoffset and width */
if (xoffset < - (GLint) destImage->Border) {
   _mesa_error(ctx, GL_INVALID_VALUE, "%s(xoffset)", func);
@@ -2104,6 +2111,12 @@ texsubimage_error_check(struct gl_context *ctx, GLuint 
dimensions,
   return GL_TRUE;
}
 
+   if (error_check_subtexture_negative_dimensions(ctx, dimensions,
+  width, height, depth,
+  callerName)) {
+  return GL_TRUE;
+   }
+
texImage = _mesa_select_tex_image(texObj, target, level);
if (!texImage) {
   /* non-existant texture level */
@@ -2140,6 +2153,12 @@ texsubimage_error_check(struct gl_context *ctx, GLuint 
dimensions,
   return GL_TRUE;
}
 
+   if (error_check_subtexture_negative_dimensions(ctx, dimensions,
+  width, height, depth,
+  callerName)) {
+  return GL_TRUE;
+   }
+
if (error_check_subtexture_dimensions(ctx, dimensions,
  texImage, xoffset, yoffset, zoffset,
  width, height, depth, callerName)) {
@@ -2497,6 +2516,11 @@ copytexsubimage_error_check(struct gl_context *ctx, 
GLuint dimensions,
   return GL_TRUE;
}
 
+   if (error_check_subtexture_negative_dimensions(ctx, dimensions,
+  width, height, 1, caller)) {
+  return GL_TRUE;
+   }
+
if (error_check_subtexture_dimensions(ctx, dimensions, texImage,
  xoffset, yoffset, zoffset,
  width, height, 1, caller)) {
@@ -4387,6 +4411,12 @@ compressed_subtexture_error_check(struct gl_context 
*ctx, GLint dims,
   return GL_TRUE;
}
 
+   if (error_check_subtexture_negative_dimensions(ctx, dims,
+  width, height, depth,
+  callerName)) {
+  return GL_TRUE;
+   }
+
if (error_check_subtexture_dimensions(ctx, dims,
  texImage, xoffset, yoffset, zoffset,
  width, height, depth,
-- 
2.8.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org

[Mesa-dev] [PATCH] nv50: add missing draw_calls_indexed driver stat

2016-06-08 Thread Samuel Pitoiset
Spotted when glancing at the VBO push code.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nv50/nv50_vbo.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c 
b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
index a11cdf8..d608e7b 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -702,6 +702,7 @@ nv50_draw_elements(struct nv50_context *nv50, bool shorten,
  prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
   }
}
+   NOUVEAU_DRV_STAT(>screen->base, draw_calls_indexed, 1);
 }
 
 static void
-- 
2.8.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Mesa (master): st/mesa: use buffer usage history to set dirty flags for revalidation

2016-06-08 Thread Ilia Mirkin
Glancing at the code (I don't even have a piglit checkout here):

static void
set_ubo_binding(struct gl_context *ctx, ...)
...
   /* If this is a real buffer object, mark it has having been used
* at some point as a UBO.
*/
   if (size >= 0)
  bufObj->UsageHistory |= USAGE_UNIFORM_BUFFER;

That seems bogus - what if the current size is 0 (unallocated), the
buffer object gets bound to a UBO endpoint, and then someone goes in
and does glBufferData()? Same for set_ssbo_binding.

  -ilia


On Wed, Jun 8, 2016 at 2:28 PM, Ilia Mirkin  wrote:
> Hm, that's odd. I guess the buffer usage doesn't get set properly? I
> won't be able to look at this until tonight at the earliest, feel free
> to revert the change in the meanwhile.
>
>   -ilia
>
> On Wed, Jun 8, 2016 at 1:25 PM, Brian Paul  wrote:
>> Ilia, this patch causes a regression in the piglit
>> arb_uniform_buffer_object-rendering-dsa test with llvmpipe (at least).
>>
>> I haven't debugged it at all.
>>
>> -Brian
>>
>> On 06/07/2016 08:29 PM, Ilia Mirkin wrote:
>>>
>>> Module: Mesa
>>> Branch: master
>>> Commit: 6e6fd911da8a1d9cd62fe0a8a4cc0fb7bdccfe02
>>> URL:
>>> https://urldefense.proofpoint.com/v2/url?u=http-3A__cgit.freedesktop.org_mesa_mesa_commit_-3Fid-3D6e6fd911da8a1d9cd62fe0a8a4cc0fb7bdccfe02=CwIGaQ=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs=T0t4QG7chq2ZwJo6wilkFznRSFy-8uDKartPGbomVj8=oZCXDAQxyzodq06r0fdxhJ0TSS2VMnftkgDNyPiJGKY=zn9XJ75wNk6pxn5KNFcWfLu7sXurLGGcWfIvVtSMwEo=
>>>
>>> Author: Ilia Mirkin 
>>> Date:   Sat Jun  4 13:26:46 2016 -0400
>>>
>>> st/mesa: use buffer usage history to set dirty flags for revalidation
>>>
>>> We were previously unconditionally doing this for arrays and ubo's, and
>>> ignoring texture/storage/atomic buffers. Instead use the usage history
>>> to determine which atoms need to be revalidated.
>>>
>>> Signed-off-by: Ilia Mirkin 
>>> Reviewed-by: Nicolai Hähnle 
>>> Cc: "12.0" 
>>>
>>> ---
>>>
>>>   src/mesa/state_tracker/st_cb_bufferobjects.c | 15 +--
>>>   1 file changed, 13 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c
>>> b/src/mesa/state_tracker/st_cb_bufferobjects.c
>>> index 8bbc2f0..1a8aea3 100644
>>> --- a/src/mesa/state_tracker/st_cb_bufferobjects.c
>>> +++ b/src/mesa/state_tracker/st_cb_bufferobjects.c
>>> @@ -332,8 +332,19 @@ st_bufferobj_data(struct gl_context *ctx,
>>> }
>>>  }
>>>
>>> -   /* BufferData may change an array or uniform buffer, need to update it
>>> */
>>> -   st->dirty.st |= ST_NEW_VERTEX_ARRAYS | ST_NEW_UNIFORM_BUFFER;
>>> +   /* The current buffer may be bound, so we have to revalidate all atoms
>>> that
>>> +* might be using it.
>>> +*/
>>> +   /* TODO: Add arrays to usage history */
>>> +   st->dirty.st |= ST_NEW_VERTEX_ARRAYS;
>>> +   if (st_obj->Base.UsageHistory & USAGE_UNIFORM_BUFFER)
>>> +  st->dirty.st |= ST_NEW_UNIFORM_BUFFER;
>>> +   if (st_obj->Base.UsageHistory & USAGE_SHADER_STORAGE_BUFFER)
>>> +  st->dirty.st |= ST_NEW_STORAGE_BUFFER;
>>> +   if (st_obj->Base.UsageHistory & USAGE_TEXTURE_BUFFER)
>>> +  st->dirty.st |= ST_NEW_SAMPLER_VIEWS | ST_NEW_IMAGE_UNITS;
>>> +   if (st_obj->Base.UsageHistory & USAGE_ATOMIC_COUNTER_BUFFER)
>>> +  st->dirty.st |= ST_NEW_ATOMIC_BUFFER;
>>>
>>>  return GL_TRUE;
>>>   }
>>>
>>> ___
>>> mesa-commit mailing list
>>> mesa-com...@lists.freedesktop.org
>>>
>>> https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.freedesktop.org_mailman_listinfo_mesa-2Dcommit=CwIGaQ=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs=T0t4QG7chq2ZwJo6wilkFznRSFy-8uDKartPGbomVj8=oZCXDAQxyzodq06r0fdxhJ0TSS2VMnftkgDNyPiJGKY=TUTsMchaJhWjCx5k6tptwVWWVHJs-zDtCtJVpPhM1pM=
>>>
>>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: expose ARB_compute_shader with compat profile

2016-06-08 Thread Samuel Pitoiset



On 06/08/2016 01:45 AM, Ian Romanick wrote:

On 06/07/2016 04:28 PM, Nanley Chery wrote:

On Wed, May 25, 2016 at 12:52:38PM -0700, Matt Turner wrote:

On Wed, May 25, 2016 at 12:17 PM, Samuel Pitoiset
 wrote:

This fixes use of compute shaders with some NVIDIA GL 4.3 samples which
require a compat profile.


I'm confused.

The samples you're talking about... use 4.3 compatibility? We already
don't support compatibility profile, so you presumably have to hack
that on.

The ARB_compute_shader extension spec says it requires 4.2, and we
don't expose 4.2 with the compatibility profile. I think you should
have some justification for why it's safe to expose in compatibility
profile, since that's a pretty big change. Or, you don't want to
expose it in compatibility but you need this patch to enable it with
the 4.2-compat hack enabled...?

Strangely, ARB_compute_shader says "GLL, GLC" in extensions_table.h,
but it's only core-enabled on my glxinfo.


Because extensions_table.h has "GLL" in the ARB_compute_shader entry,
the extension is already exposed in all compatibility contexts. This
patch fixes the bug of advertising the extension externally, but
internally failing to do the logic required by the extension.


Thanks for clearing that up, Nanley.  That makes sense.  I believe the
commit message is misleading, and that led to some confusion.  I think a
better commit message is:

mesa: Let compute shaders work in compatibility profiles

The extension is already advertised in compatibility profile, but
the _mesa_has_compute_shaders only returns true in core profile.
If we advertise it, we should allow it to work.

Now, whether or not we should advertise the extension in compatibility
profiles is an orthogonal question.  We discussed this when compute
shaders first landed, and we decided to enable it.  I don't know of a
strong reason to change that, but I'm not married to it.

Either way, with the above change to the commit message, this patch is

Reviewed-by: Ian Romanick 


Thanks for the clarification, and you commit message is much better than 
mine. :)


I will push the patch.




- Nanley


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Mesa (master): st/mesa: use buffer usage history to set dirty flags for revalidation

2016-06-08 Thread Ilia Mirkin
Hm, that's odd. I guess the buffer usage doesn't get set properly? I
won't be able to look at this until tonight at the earliest, feel free
to revert the change in the meanwhile.

  -ilia

On Wed, Jun 8, 2016 at 1:25 PM, Brian Paul  wrote:
> Ilia, this patch causes a regression in the piglit
> arb_uniform_buffer_object-rendering-dsa test with llvmpipe (at least).
>
> I haven't debugged it at all.
>
> -Brian
>
> On 06/07/2016 08:29 PM, Ilia Mirkin wrote:
>>
>> Module: Mesa
>> Branch: master
>> Commit: 6e6fd911da8a1d9cd62fe0a8a4cc0fb7bdccfe02
>> URL:
>> https://urldefense.proofpoint.com/v2/url?u=http-3A__cgit.freedesktop.org_mesa_mesa_commit_-3Fid-3D6e6fd911da8a1d9cd62fe0a8a4cc0fb7bdccfe02=CwIGaQ=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs=T0t4QG7chq2ZwJo6wilkFznRSFy-8uDKartPGbomVj8=oZCXDAQxyzodq06r0fdxhJ0TSS2VMnftkgDNyPiJGKY=zn9XJ75wNk6pxn5KNFcWfLu7sXurLGGcWfIvVtSMwEo=
>>
>> Author: Ilia Mirkin 
>> Date:   Sat Jun  4 13:26:46 2016 -0400
>>
>> st/mesa: use buffer usage history to set dirty flags for revalidation
>>
>> We were previously unconditionally doing this for arrays and ubo's, and
>> ignoring texture/storage/atomic buffers. Instead use the usage history
>> to determine which atoms need to be revalidated.
>>
>> Signed-off-by: Ilia Mirkin 
>> Reviewed-by: Nicolai Hähnle 
>> Cc: "12.0" 
>>
>> ---
>>
>>   src/mesa/state_tracker/st_cb_bufferobjects.c | 15 +--
>>   1 file changed, 13 insertions(+), 2 deletions(-)
>>
>> diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c
>> b/src/mesa/state_tracker/st_cb_bufferobjects.c
>> index 8bbc2f0..1a8aea3 100644
>> --- a/src/mesa/state_tracker/st_cb_bufferobjects.c
>> +++ b/src/mesa/state_tracker/st_cb_bufferobjects.c
>> @@ -332,8 +332,19 @@ st_bufferobj_data(struct gl_context *ctx,
>> }
>>  }
>>
>> -   /* BufferData may change an array or uniform buffer, need to update it
>> */
>> -   st->dirty.st |= ST_NEW_VERTEX_ARRAYS | ST_NEW_UNIFORM_BUFFER;
>> +   /* The current buffer may be bound, so we have to revalidate all atoms
>> that
>> +* might be using it.
>> +*/
>> +   /* TODO: Add arrays to usage history */
>> +   st->dirty.st |= ST_NEW_VERTEX_ARRAYS;
>> +   if (st_obj->Base.UsageHistory & USAGE_UNIFORM_BUFFER)
>> +  st->dirty.st |= ST_NEW_UNIFORM_BUFFER;
>> +   if (st_obj->Base.UsageHistory & USAGE_SHADER_STORAGE_BUFFER)
>> +  st->dirty.st |= ST_NEW_STORAGE_BUFFER;
>> +   if (st_obj->Base.UsageHistory & USAGE_TEXTURE_BUFFER)
>> +  st->dirty.st |= ST_NEW_SAMPLER_VIEWS | ST_NEW_IMAGE_UNITS;
>> +   if (st_obj->Base.UsageHistory & USAGE_ATOMIC_COUNTER_BUFFER)
>> +  st->dirty.st |= ST_NEW_ATOMIC_BUFFER;
>>
>>  return GL_TRUE;
>>   }
>>
>> ___
>> mesa-commit mailing list
>> mesa-com...@lists.freedesktop.org
>>
>> https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.freedesktop.org_mailman_listinfo_mesa-2Dcommit=CwIGaQ=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs=T0t4QG7chq2ZwJo6wilkFznRSFy-8uDKartPGbomVj8=oZCXDAQxyzodq06r0fdxhJ0TSS2VMnftkgDNyPiJGKY=TUTsMchaJhWjCx5k6tptwVWWVHJs-zDtCtJVpPhM1pM=
>>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965/gen9: Don't change halign and valign to fit in fast copy blit

2016-06-08 Thread Anuj Phogat
An update in graphics specs has deleted the halign and valign fields
from XY_FAST_COPY_BLT command. See mesa commit 97f0f91.

Cc: Ben Widawsky 
Signed-off-by: Anuj Phogat 
---
 src/mesa/drivers/dri/i965/brw_tex_layout.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c 
b/src/mesa/drivers/dri/i965/brw_tex_layout.c
index a294829..4618bc0 100644
--- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
+++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
@@ -763,10 +763,8 @@ intel_miptree_set_alignment(struct brw_context *brw,
   mt->halign = 8;
   mt->valign = brw->gen >= 7 ? 8 : 4;
} else if (brw->gen >= 9 && mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE) {
-  /* XY_FAST_COPY_BLT doesn't support horizontal alignment < 32 or
-   * vertical alignment < 64. */
-  mt->halign = MAX2(tr_mode_horizontal_texture_alignment(mt), 32);
-  mt->valign = MAX2(tr_mode_vertical_texture_alignment(mt), 64);
+  mt->halign = tr_mode_horizontal_texture_alignment(mt);
+  mt->valign = tr_mode_vertical_texture_alignment(mt);
} else {
   mt->halign =
  intel_horizontal_texture_alignment_unit(brw, mt, layout_flags);
-- 
2.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: Add a helper function for shared code in get_tex_rgba_{un}compressed

2016-06-08 Thread Anuj Phogat
On Mon, Dec 28, 2015 at 10:46 AM, Anuj Phogat  wrote:
> Signed-off-by: Anuj Phogat 
> ---
>  src/mesa/main/texgetimage.c | 83 
> +
>  1 file changed, 38 insertions(+), 45 deletions(-)
>
> diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c
> index b273aaa..4399803 100644
> --- a/src/mesa/main/texgetimage.c
> +++ b/src/mesa/main/texgetimage.c
> @@ -265,6 +265,40 @@ get_tex_ycbcr(struct gl_context *ctx, GLuint dimensions,
> }
>  }
>
> +/* Depending on the base format involved we may need to apply a rebase
> + * transform (for example: if we download to a Luminance format we want
> + * G=0 and B=0).
> + */
> +
> +static bool
> +teximage_needs_rebase(mesa_format texFormat, GLenum baseFormat,
> +  bool is_compressed, uint8_t *rebaseSwizzle)
> +{
> +   bool needsRebase = false;
> +
> +   if (baseFormat == GL_LUMINANCE ||
> +   baseFormat == GL_INTENSITY) {
> +  needsRebase = true;
> +  rebaseSwizzle[0] = MESA_FORMAT_SWIZZLE_X;
> +  rebaseSwizzle[1] = MESA_FORMAT_SWIZZLE_ZERO;
> +  rebaseSwizzle[2] = MESA_FORMAT_SWIZZLE_ZERO;
> +  rebaseSwizzle[3] = MESA_FORMAT_SWIZZLE_ONE;
> +   } else if (baseFormat == GL_LUMINANCE_ALPHA) {
> +  needsRebase = true;
> +  rebaseSwizzle[0] = MESA_FORMAT_SWIZZLE_X;
> +  rebaseSwizzle[1] = MESA_FORMAT_SWIZZLE_ZERO;
> +  rebaseSwizzle[2] = MESA_FORMAT_SWIZZLE_ZERO;
> +  rebaseSwizzle[3] = MESA_FORMAT_SWIZZLE_W;
> +   } else if (!is_compressed && (baseFormat !=
> +   _mesa_get_format_base_format(texFormat))) {
> +  needsRebase =
> + _mesa_compute_rgba2base2rgba_component_mapping(baseFormat,
> +rebaseSwizzle);
> +   }
> +
> +   return needsRebase;
> +}
> +
>
>  /**
>   * Get a color texture image with decompression.
> @@ -319,26 +353,8 @@ get_tex_rgba_compressed(struct gl_context *ctx, GLuint 
> dimensions,
>}
> }
>
> -   /* Depending on the base format involved we may need to apply a rebase
> -* transform (for example: if we download to a Luminance format we want
> -* G=0 and B=0).
> -*/
> -   if (baseFormat == GL_LUMINANCE ||
> -   baseFormat == GL_INTENSITY) {
> -  needsRebase = true;
> -  rebaseSwizzle[0] = MESA_FORMAT_SWIZZLE_X;
> -  rebaseSwizzle[1] = MESA_FORMAT_SWIZZLE_ZERO;
> -  rebaseSwizzle[2] = MESA_FORMAT_SWIZZLE_ZERO;
> -  rebaseSwizzle[3] = MESA_FORMAT_SWIZZLE_ONE;
> -   } else if (baseFormat == GL_LUMINANCE_ALPHA) {
> -  needsRebase = true;
> -  rebaseSwizzle[0] = MESA_FORMAT_SWIZZLE_X;
> -  rebaseSwizzle[1] = MESA_FORMAT_SWIZZLE_ZERO;
> -  rebaseSwizzle[2] = MESA_FORMAT_SWIZZLE_ZERO;
> -  rebaseSwizzle[3] = MESA_FORMAT_SWIZZLE_W;
> -   } else {
> -  needsRebase = false;
> -   }
> +   needsRebase = teximage_needs_rebase(texFormat, baseFormat, true,
> +   rebaseSwizzle);
>
> srcStride = 4 * width * sizeof(GLfloat);
> dstStride = _mesa_image_row_stride(>Pack, width, format, type);
> @@ -423,31 +439,8 @@ get_tex_rgba_uncompressed(struct gl_context *ctx, GLuint 
> dimensions,
> bool needsRebase;
> void *rgba = NULL;
>
> -   /* Depending on the base format involved we may need to apply a rebase
> -* transform (for example: if we download to a Luminance format we want
> -* G=0 and B=0).
> -*/
> -   if (texImage->_BaseFormat == GL_LUMINANCE ||
> -   texImage->_BaseFormat == GL_INTENSITY) {
> -  needsRebase = true;
> -  rebaseSwizzle[0] = MESA_FORMAT_SWIZZLE_X;
> -  rebaseSwizzle[1] = MESA_FORMAT_SWIZZLE_ZERO;
> -  rebaseSwizzle[2] = MESA_FORMAT_SWIZZLE_ZERO;
> -  rebaseSwizzle[3] = MESA_FORMAT_SWIZZLE_ONE;
> -   } else if (texImage->_BaseFormat == GL_LUMINANCE_ALPHA) {
> -  needsRebase = true;
> -  rebaseSwizzle[0] = MESA_FORMAT_SWIZZLE_X;
> -  rebaseSwizzle[1] = MESA_FORMAT_SWIZZLE_ZERO;
> -  rebaseSwizzle[2] = MESA_FORMAT_SWIZZLE_ZERO;
> -  rebaseSwizzle[3] = MESA_FORMAT_SWIZZLE_W;
> -} else if (texImage->_BaseFormat !=
> -   _mesa_get_format_base_format(texFormat)) {
> -  needsRebase =
> - 
> _mesa_compute_rgba2base2rgba_component_mapping(texImage->_BaseFormat,
> -rebaseSwizzle);
> -} else {
> -  needsRebase = false;
> -}
> +   needsRebase = teximage_needs_rebase(texFormat, texImage->_BaseFormat, 
> false,
> +   rebaseSwizzle);
>
> /* Describe the dst format */
> dst_is_integer = _mesa_is_enum_format_integer(format);
> --
> 2.5.0
>

Anyone want to review this old patch? It still applies cleanly on master.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: expose ARB_compute_shader with compat profile

2016-06-08 Thread Nanley Chery
On Tue, Jun 07, 2016 at 04:45:39PM -0700, Ian Romanick wrote:
> On 06/07/2016 04:28 PM, Nanley Chery wrote:
> > On Wed, May 25, 2016 at 12:52:38PM -0700, Matt Turner wrote:
> >> On Wed, May 25, 2016 at 12:17 PM, Samuel Pitoiset
> >>  wrote:
> >>> This fixes use of compute shaders with some NVIDIA GL 4.3 samples which
> >>> require a compat profile.
> >>
> >> I'm confused.
> >>
> >> The samples you're talking about... use 4.3 compatibility? We already
> >> don't support compatibility profile, so you presumably have to hack
> >> that on.
> >>
> >> The ARB_compute_shader extension spec says it requires 4.2, and we
> >> don't expose 4.2 with the compatibility profile. I think you should
> >> have some justification for why it's safe to expose in compatibility
> >> profile, since that's a pretty big change. Or, you don't want to
> >> expose it in compatibility but you need this patch to enable it with
> >> the 4.2-compat hack enabled...?
> >>
> >> Strangely, ARB_compute_shader says "GLL, GLC" in extensions_table.h,
> >> but it's only core-enabled on my glxinfo.
> > 
> > Because extensions_table.h has "GLL" in the ARB_compute_shader entry,
> > the extension is already exposed in all compatibility contexts. This
> > patch fixes the bug of advertising the extension externally, but
> > internally failing to do the logic required by the extension.
> 
> Thanks for clearing that up, Nanley.  That makes sense.  I believe the
> commit message is misleading, and that led to some confusion.  I think a
> better commit message is:
> 
> mesa: Let compute shaders work in compatibility profiles
> 
> The extension is already advertised in compatibility profile, but
> the _mesa_has_compute_shaders only returns true in core profile.
> If we advertise it, we should allow it to work.
> 

No problem. Thanks for coming up with a solution to improve the patch's
clarity.

> Now, whether or not we should advertise the extension in compatibility
> profiles is an orthogonal question.  We discussed this when compute
> shaders first landed, and we decided to enable it.  I don't know of a
> strong reason to change that, but I'm not married to it.
> 
> Either way, with the above change to the commit message, this patch is
> 

Agreed. With the clearer commit message, this patch is also
Reviewed-by: Nanley Chery 

> Reviewed-by: Ian Romanick 
> 
> > - Nanley
> > 
> >> ___
> >> mesa-dev mailing list
> >> mesa-dev@lists.freedesktop.org
> >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> > 
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/5] i965: Use a uniform for gl_PatchVerticesIn in the TES.

2016-06-08 Thread Alejandro Piñeiro
On 08/06/16 19:45, Alejandro Piñeiro wrote:
> On 02/06/16 23:09, Kenneth Graunke wrote:
>> Fixes GL44-CTS.tessellation_shader.single.max_patch_vertices,
>> which uses gl_PatchVerticesIn in the TES, but doesn't link against
>> a TCS (which would allow the linker to lower it to a constant).
> Not sure if using different CTS releases but in my case I tested that
> this patch (plus the previous one) fixes the following CTS test:
> GL44-CTS.tessellation_shader.max_patch_vertices (without the single).

BTW, this also fixes
GL44-CTS.tessellation_shader.tessellation_control_to_tessellation_evaluation.gl_PatchVerticesIn

>> Signed-off-by: Kenneth Graunke 
>> Cc: mesa-sta...@lists.freedesktop.org
>> ---
>>  src/mesa/drivers/dri/i965/brw_context.c | 1 +
>>  1 file changed, 1 insertion(+)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
>> b/src/mesa/drivers/dri/i965/brw_context.c
>> index 97dc226..aad2afb 100644
>> --- a/src/mesa/drivers/dri/i965/brw_context.c
>> +++ b/src/mesa/drivers/dri/i965/brw_context.c
>> @@ -599,6 +599,7 @@ brw_initialize_context_constants(struct brw_context *brw)
>>ctx->Const.MaxClipPlanes = 8;
>>  
>> ctx->Const.LowerTessLevel = true;
>> +   ctx->Const.LowerTESPatchVerticesIn = true;
>> ctx->Const.PrimitiveRestartForPatches = true;
>>  
>> ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
> Nitpicks apart:
> Reviewed-by: Alejandro Piñeiro 
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/5] glsl: Optionally lower TES gl_PatchVerticesIn to a uniform.

2016-06-08 Thread Alejandro Piñeiro
On 02/06/16 23:09, Kenneth Graunke wrote:
> i965 has no special hardware for this, so we need to pass this value in
> as a uniform (unless the TES is linked against a TCS, in which case the
> linker can just replace this with a constant).
>
> Signed-off-by: Kenneth Graunke 
> Cc: mesa-sta...@lists.freedesktop.org
> ---
>  src/compiler/glsl/linker.cpp  | 20 +---
>  src/mesa/main/mtypes.h|  1 +
>  src/mesa/program/prog_statevars.c |  7 +++
>  src/mesa/program/prog_statevars.h |  1 +
>  4 files changed, 26 insertions(+), 3 deletions(-)
>
> diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
> index 9e65590..7c7dbfe 100644
> --- a/src/compiler/glsl/linker.cpp
> +++ b/src/compiler/glsl/linker.cpp
> @@ -72,6 +72,7 @@
>  #include "ir.h"
>  #include "program.h"
>  #include "program/hash_table.h"
> +#include "program/prog_instruction.h"
>  #include "linker.h"
>  #include "link_varyings.h"
>  #include "ir_optimization.h"
> @@ -2485,7 +2486,7 @@ resize_tes_inputs(struct gl_context *ctx,
>ir->accept(_resize_visitor);
> }
>  
> -   if (tcs) {
> +   if (tcs || ctx->Const.LowerTESPatchVerticesIn) {
>/* Convert the gl_PatchVerticesIn system value into a constant, since
> * the value is known at this point.

Now this comment became obsolete as can be converted to a constant or a
uniform. Perhaps it would be better to just remove the comment.

> */
> @@ -2494,9 +2495,22 @@ resize_tes_inputs(struct gl_context *ctx,
>   if (var && var->data.mode == ir_var_system_value &&
>   var->data.location == SYSTEM_VALUE_VERTICES_IN) {
>  void *mem_ctx = ralloc_parent(var);
> -var->data.mode = ir_var_auto;
>  var->data.location = 0;
> -var->constant_value = new(mem_ctx) ir_constant(num_vertices);
> +var->data.explicit_location = false;

Nitpick: With this change you are also changing slightly the option that
was present before (use a constant) in addition to add the option to
lower to an uniform. Not sure if it is worth to be mentioned though,
feel free to ignore.

> +if (tcs) {
> +   var->data.mode = ir_var_auto;
> +   var->constant_value = new(mem_ctx) ir_constant(num_vertices);
> +} else {
> +   var->data.mode = ir_var_uniform;
> +   var->data.how_declared = ir_var_hidden;
> +   var->allocate_state_slots(1);
> +   ir_state_slot *slot0 = >get_state_slots()[0];
> +   slot0->swizzle = SWIZZLE_;
> +   slot0->tokens[0] = STATE_INTERNAL;
> +   slot0->tokens[1] = STATE_TES_PATCH_VERTICES_IN;
> +   for (int i = 2; i < STATE_LENGTH; i++)
> +  slot0->tokens[i] = 0;
> +}
>   }
>}
> }
> diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
> index d0f3760..8970274 100644
> --- a/src/mesa/main/mtypes.h
> +++ b/src/mesa/main/mtypes.h
> @@ -3762,6 +3762,7 @@ struct gl_constants
> GLuint MaxTessPatchComponents;
> GLuint MaxTessControlTotalOutputComponents;
> bool LowerTessLevel; /**< Lower gl_TessLevel* from float[n] to vecn? */
> +   bool LowerTESPatchVerticesIn; /**< Lower gl_PatchVerticesIn to a uniform 
> */
> bool PrimitiveRestartForPatches;
> bool LowerCsDerivedVariables;/**< Lower gl_GlobalInvocationID and
>   *   gl_LocalInvocationIndex based on
> diff --git a/src/mesa/program/prog_statevars.c 
> b/src/mesa/program/prog_statevars.c
> index 27ff333..23e3015 100644
> --- a/src/mesa/program/prog_statevars.c
> +++ b/src/mesa/program/prog_statevars.c
> @@ -598,6 +598,13 @@ _mesa_fetch_state(struct gl_context *ctx, const 
> gl_state_index state[],
>   }
>   return;
>  
> +  case STATE_TES_PATCH_VERTICES_IN:
> + if (ctx->TessCtrlProgram._Current)
> +val[0].i = ctx->TessCtrlProgram._Current->VerticesOut;
> + else
> +val[0].i = ctx->TessCtrlProgram.patch_vertices;
> + return;
> +
>/* XXX: make sure new tokens added here are also handled in the 
> * _mesa_program_state_flags() switch, below.
> */
> diff --git a/src/mesa/program/prog_statevars.h 
> b/src/mesa/program/prog_statevars.h
> index 6333e63..4b27527 100644
> --- a/src/mesa/program/prog_statevars.h
> +++ b/src/mesa/program/prog_statevars.h
> @@ -128,6 +128,7 @@ typedef enum gl_state_index_ {
> STATE_PT_BIAS,   /**< Pixel transfer RGBA bias */
> STATE_FB_SIZE,   /**< (width-1, height-1, 0, 0) */
> STATE_FB_WPOS_Y_TRANSFORM,   /**< (1, 0, -1, height) if a FBO is bound, 
> (-1, height, 1, 0) otherwise */
> +   STATE_TES_PATCH_VERTICES_IN, /**< gl_PatchVerticesIn for TES (integer) */
> STATE_INTERNAL_DRIVER /* first available state index for drivers 
> (must be last) */
>  } gl_state_index;
>  

In any case:

Re: [Mesa-dev] [PATCH 3/5] i965: Use a uniform for gl_PatchVerticesIn in the TES.

2016-06-08 Thread Alejandro Piñeiro
On 02/06/16 23:09, Kenneth Graunke wrote:
> Fixes GL44-CTS.tessellation_shader.single.max_patch_vertices,
> which uses gl_PatchVerticesIn in the TES, but doesn't link against
> a TCS (which would allow the linker to lower it to a constant).

Not sure if using different CTS releases but in my case I tested that
this patch (plus the previous one) fixes the following CTS test:
GL44-CTS.tessellation_shader.max_patch_vertices (without the single).

>
> Signed-off-by: Kenneth Graunke 
> Cc: mesa-sta...@lists.freedesktop.org
> ---
>  src/mesa/drivers/dri/i965/brw_context.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
> b/src/mesa/drivers/dri/i965/brw_context.c
> index 97dc226..aad2afb 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.c
> +++ b/src/mesa/drivers/dri/i965/brw_context.c
> @@ -599,6 +599,7 @@ brw_initialize_context_constants(struct brw_context *brw)
>ctx->Const.MaxClipPlanes = 8;
>  
> ctx->Const.LowerTessLevel = true;
> +   ctx->Const.LowerTESPatchVerticesIn = true;
> ctx->Const.PrimitiveRestartForPatches = true;
>  
> ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;

Nitpicks apart:
Reviewed-by: Alejandro Piñeiro 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/5] glsl: Optionally lower TCS gl_PatchVerticesIn to a uniform.

2016-06-08 Thread Alejandro Piñeiro
On 02/06/16 23:09, Kenneth Graunke wrote:
> i965 has no special hardware for this, so the best way to implement
> this is to pass it in via a uniform.

Is this comment accurate? On the next patch you implement the i965 bits,
and you only do the lowering on gen8+, mentioning that you would like to
do the same on gen7. So for gen7 it is implemented, but an uniform is
not used. Or am I missing something?

> Signed-off-by: Kenneth Graunke 
> Cc: mesa-sta...@lists.freedesktop.org
> ---
>  src/compiler/glsl/builtin_variables.cpp | 13 -
>  src/mesa/main/mtypes.h  |  1 +
>  src/mesa/program/prog_statevars.c   |  4 
>  src/mesa/program/prog_statevars.h   |  1 +
>  4 files changed, 18 insertions(+), 1 deletion(-)
>
> diff --git a/src/compiler/glsl/builtin_variables.cpp 
> b/src/compiler/glsl/builtin_variables.cpp
> index 05b3b0b..35a11bb 100644
> --- a/src/compiler/glsl/builtin_variables.cpp
> +++ b/src/compiler/glsl/builtin_variables.cpp
> @@ -37,6 +37,11 @@ static const struct gl_builtin_uniform_element 
> gl_NumSamples_elements[] = {
> {NULL, {STATE_NUM_SAMPLES, 0, 0}, SWIZZLE_}
>  };
>  
> +/* only for TCS */
> +static const struct gl_builtin_uniform_element gl_PatchVerticesIn_elements[] 
> = {
> +   {NULL, {STATE_INTERNAL, STATE_TCS_PATCH_VERTICES_IN}, SWIZZLE_}
> +};
> +
>  static const struct gl_builtin_uniform_element gl_DepthRange_elements[] = {
> {"near", {STATE_DEPTH_RANGE, 0, 0}, SWIZZLE_},
> {"far", {STATE_DEPTH_RANGE, 0, 0}, SWIZZLE_},
> @@ -234,6 +239,7 @@ static const struct gl_builtin_uniform_element 
> gl_NormalMatrix_elements[] = {
>  #define STATEVAR(name) {#name, name ## _elements, ARRAY_SIZE(name ## 
> _elements)}
>  
>  static const struct gl_builtin_uniform_desc _mesa_builtin_uniform_desc[] = {
> +   STATEVAR(gl_PatchVerticesIn),
> STATEVAR(gl_NumSamples),
> STATEVAR(gl_DepthRange),
> STATEVAR(gl_ClipPlane),
> @@ -1029,9 +1035,14 @@ void
>  builtin_variable_generator::generate_tcs_special_vars()
>  {
> add_system_value(SYSTEM_VALUE_PRIMITIVE_ID, int_t, "gl_PrimitiveID");
> -   add_system_value(SYSTEM_VALUE_VERTICES_IN, int_t, "gl_PatchVerticesIn");
> add_system_value(SYSTEM_VALUE_INVOCATION_ID, int_t, "gl_InvocationID");
>  
> +   if (state->ctx->Const.LowerTCSPatchVerticesIn) {
> +  add_uniform(int_t, "gl_PatchVerticesIn");
> +   } else {
> +  add_system_value(SYSTEM_VALUE_VERTICES_IN, int_t, 
> "gl_PatchVerticesIn");
> +   }
> +
> add_output(VARYING_SLOT_TESS_LEVEL_OUTER, array(float_t, 4),
>"gl_TessLevelOuter")->data.patch = 1;
> add_output(VARYING_SLOT_TESS_LEVEL_INNER, array(float_t, 2),
> diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
> index 8970274..f1d70fb 100644
> --- a/src/mesa/main/mtypes.h
> +++ b/src/mesa/main/mtypes.h
> @@ -3762,6 +3762,7 @@ struct gl_constants
> GLuint MaxTessPatchComponents;
> GLuint MaxTessControlTotalOutputComponents;
> bool LowerTessLevel; /**< Lower gl_TessLevel* from float[n] to vecn? */
> +   bool LowerTCSPatchVerticesIn; /**< Lower gl_PatchVerticesIn to a uniform 
> */
> bool LowerTESPatchVerticesIn; /**< Lower gl_PatchVerticesIn to a uniform 
> */
> bool PrimitiveRestartForPatches;
> bool LowerCsDerivedVariables;/**< Lower gl_GlobalInvocationID and
> diff --git a/src/mesa/program/prog_statevars.c 
> b/src/mesa/program/prog_statevars.c
> index 23e3015..8dddc0b 100644
> --- a/src/mesa/program/prog_statevars.c
> +++ b/src/mesa/program/prog_statevars.c
> @@ -598,6 +598,10 @@ _mesa_fetch_state(struct gl_context *ctx, const 
> gl_state_index state[],
>   }
>   return;
>  
> +  case STATE_TCS_PATCH_VERTICES_IN:
> + val[0].i = ctx->TessCtrlProgram.patch_vertices;
> + return;
> +
>case STATE_TES_PATCH_VERTICES_IN:
>   if (ctx->TessCtrlProgram._Current)
>  val[0].i = ctx->TessCtrlProgram._Current->VerticesOut;
> diff --git a/src/mesa/program/prog_statevars.h 
> b/src/mesa/program/prog_statevars.h
> index 4b27527..e716d90 100644
> --- a/src/mesa/program/prog_statevars.h
> +++ b/src/mesa/program/prog_statevars.h
> @@ -128,6 +128,7 @@ typedef enum gl_state_index_ {
> STATE_PT_BIAS,   /**< Pixel transfer RGBA bias */
> STATE_FB_SIZE,   /**< (width-1, height-1, 0, 0) */
> STATE_FB_WPOS_Y_TRANSFORM,   /**< (1, 0, -1, height) if a FBO is bound, 
> (-1, height, 1, 0) otherwise */
> +   STATE_TCS_PATCH_VERTICES_IN, /**< gl_PatchVerticesIn for TCS (integer) */
> STATE_TES_PATCH_VERTICES_IN, /**< gl_PatchVerticesIn for TES (integer) */
> STATE_INTERNAL_DRIVER /* first available state index for drivers 
> (must be last) */
>  } gl_state_index;

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/12] RadeonSI: DCC mipmapping support & improvements

2016-06-08 Thread Marek Olšák
On Wed, Jun 8, 2016 at 2:34 PM, ⚛ <0xe2.0x9a.0...@gmail.com> wrote:
> Hello
>
> Mesa 12.1.0-devel (git-8c3ecde) has rendering issues when running
> "glxgears -samples N" with N >= 1 on R9 390. It was running ok
> yesterday/previously.

I reverted the problematic commit.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Mesa (master): st/mesa: use buffer usage history to set dirty flags for revalidation

2016-06-08 Thread Brian Paul
Ilia, this patch causes a regression in the piglit 
arb_uniform_buffer_object-rendering-dsa test with llvmpipe (at least).


I haven't debugged it at all.

-Brian

On 06/07/2016 08:29 PM, Ilia Mirkin wrote:

Module: Mesa
Branch: master
Commit: 6e6fd911da8a1d9cd62fe0a8a4cc0fb7bdccfe02
URL:
https://urldefense.proofpoint.com/v2/url?u=http-3A__cgit.freedesktop.org_mesa_mesa_commit_-3Fid-3D6e6fd911da8a1d9cd62fe0a8a4cc0fb7bdccfe02=CwIGaQ=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs=T0t4QG7chq2ZwJo6wilkFznRSFy-8uDKartPGbomVj8=oZCXDAQxyzodq06r0fdxhJ0TSS2VMnftkgDNyPiJGKY=zn9XJ75wNk6pxn5KNFcWfLu7sXurLGGcWfIvVtSMwEo=

Author: Ilia Mirkin 
Date:   Sat Jun  4 13:26:46 2016 -0400

st/mesa: use buffer usage history to set dirty flags for revalidation

We were previously unconditionally doing this for arrays and ubo's, and
ignoring texture/storage/atomic buffers. Instead use the usage history
to determine which atoms need to be revalidated.

Signed-off-by: Ilia Mirkin 
Reviewed-by: Nicolai Hähnle 
Cc: "12.0" 

---

  src/mesa/state_tracker/st_cb_bufferobjects.c | 15 +--
  1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c 
b/src/mesa/state_tracker/st_cb_bufferobjects.c
index 8bbc2f0..1a8aea3 100644
--- a/src/mesa/state_tracker/st_cb_bufferobjects.c
+++ b/src/mesa/state_tracker/st_cb_bufferobjects.c
@@ -332,8 +332,19 @@ st_bufferobj_data(struct gl_context *ctx,
}
 }

-   /* BufferData may change an array or uniform buffer, need to update it */
-   st->dirty.st |= ST_NEW_VERTEX_ARRAYS | ST_NEW_UNIFORM_BUFFER;
+   /* The current buffer may be bound, so we have to revalidate all atoms that
+* might be using it.
+*/
+   /* TODO: Add arrays to usage history */
+   st->dirty.st |= ST_NEW_VERTEX_ARRAYS;
+   if (st_obj->Base.UsageHistory & USAGE_UNIFORM_BUFFER)
+  st->dirty.st |= ST_NEW_UNIFORM_BUFFER;
+   if (st_obj->Base.UsageHistory & USAGE_SHADER_STORAGE_BUFFER)
+  st->dirty.st |= ST_NEW_STORAGE_BUFFER;
+   if (st_obj->Base.UsageHistory & USAGE_TEXTURE_BUFFER)
+  st->dirty.st |= ST_NEW_SAMPLER_VIEWS | ST_NEW_IMAGE_UNITS;
+   if (st_obj->Base.UsageHistory & USAGE_ATOMIC_COUNTER_BUFFER)
+  st->dirty.st |= ST_NEW_ATOMIC_BUFFER;

 return GL_TRUE;
  }

___
mesa-commit mailing list
mesa-com...@lists.freedesktop.org
https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.freedesktop.org_mailman_listinfo_mesa-2Dcommit=CwIGaQ=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs=T0t4QG7chq2ZwJo6wilkFznRSFy-8uDKartPGbomVj8=oZCXDAQxyzodq06r0fdxhJ0TSS2VMnftkgDNyPiJGKY=TUTsMchaJhWjCx5k6tptwVWWVHJs-zDtCtJVpPhM1pM=



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH] st/mesa: use first image's dimensions when finalizing texture

2016-06-08 Thread Nicolai Hähnle

For the piglit test:

Reviewed-by: Nicolai Hähnle 

On 06.06.2016 18:52, Brian Paul wrote:

On 06/06/2016 10:05 AM, Ilia Mirkin wrote:

On Mon, Jun 6, 2016 at 11:37 AM, Brian Paul  wrote:

On 06/05/2016 12:24 AM, Ilia Mirkin wrote:


In the case where we can't guess the base level size, just use the
first
image's dims. The width0/height0/depth0 on stObj may not have been set
at this point. Observed in a trace that set up levels 2..9 of a 2d
texture,
and set the base level to 2, with height 1. This made the guess logic
always bail.

Signed-off-by: Ilia Mirkin 
Cc: "12.0" 
---
   src/mesa/state_tracker/st_cb_texture.c | 6 +++---
   1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_texture.c
b/src/mesa/state_tracker/st_cb_texture.c
index d38f24c..1dd1ef6 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -2463,9 +2463,9 @@ st_finalize_texture(struct gl_context *ctx,
firstImage->base.Depth2,
firstImage->base.Level,
, , )) {
- width = stObj->width0;
- height = stObj->height0;
- depth = stObj->depth0;
+ width = stObj->width0 = firstImage->base.Width2;
+ height = stObj->height0 = firstImage->base.Height2;
+ depth = stObj->depth0 = firstImage->base.Depth2;
 } else {
/* The width/height/depth may have been previously reset in
 * guess_and_alloc_texture. */



Does this fix a crash or glitch or something else?

The state tracker's texture code is pretty delicate so I'd like to fully
understand the change.


Yes, this fixes a trace that was supplied by someone on IRC. The
situation was that the texture only ever had levels 2..9 (or 10?) set,
and its base level was set to 2. And each level was Nx1 (but
GL_TEXTURE_2D). So all the guessing logic always bailed, which means
that the stObj->width/height/depth were set to 0, which caused asserts
on the next line. (And I assume it wouldn't have rendered correctly
either, but it can be hard to tell in a large scene.)



Maybe we should have a piglit test for this?


I can try to whip something up, not sure when I'll get to it though.
Definitely not today, and probably not for a few at least.


I whipped up a piglet test.  No assertion with your patch, but still
incorrect rendering.  The textured quad is drawn black instead of gray.
  Works w/ NVIDIA driver.

I'm attaching the patch if you want to use it to investigate further.  I
have to get back to other things.

-Brian




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nvc0: do not follow the push path for indexed draws

2016-06-08 Thread Samuel Pitoiset
This fixes a lot of flickering in The Talos Principle on Fermi+
(tested on both GF119 and GK106). My guess is that some indices
and/or some vertex attributes are incorrectly set when emitting
data directly into the command stream rather than uploading a
user VBO.

The NV50 driver also doesn't use the push path for indexed draws
which makes confident enough with this fix. Furthermore, this
doesn't introduce any regressions with piglit.

As a side-effect, this also improves performance with The Talos
Principle (+~20% fps) and probably with other games which use indexed
draws. This is because uploading a user VBO is more efficient than
pushing data directly into the pushbuf.

There is still an issue in this game (sometimes walls become green),
but this seems to be totally unrelated to the first issue (disabling
the push path doesn't help here).

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90513
Signed-off-by: Samuel Pitoiset 
CC: "11.1 11.2" 
---
 src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
index 888c094..9e7395d 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
@@ -948,7 +948,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct 
pipe_draw_info *info)
 * if index count is larger and we expect repeated vertices, suggest upload.
 */
nvc0->vbo_push_hint =
-  !info->indirect && info->indexed &&
+  !info->indirect && !info->indexed &&
   (nvc0->vb_elt_limit >= (info->count * 2));
 
/* Check whether we want to switch vertex-submission mode. */
-- 
2.8.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] radeonsi: improve the computation and comment of scratch_waves

2016-06-08 Thread Nicolai Hähnle

For the series:

Reviewed-by: Nicolai Hähnle 

On 08.06.2016 17:36, Marek Olšák wrote:

From: Marek Olšák 

2% isn't much. If you think the number should be decreased, please speak up.
---
  src/gallium/drivers/radeonsi/si_pipe.c | 22 ++
  1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 5d7d2f3..0c601da 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -236,11 +236,25 @@ static struct pipe_context *si_create_context(struct 
pipe_screen *screen,
 R600_COHERENCY_SHADER);
}

-   /* XXX: This is the maximum value allowed.  I'm not sure how to compute
-* this for non-cs shaders.  Using the wrong value here can result in
-* GPU lockups, but the maximum value seems to always work.
+   uint64_t max_threads_per_block;
+   screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
+ PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK,
+ _threads_per_block);
+
+   /* The maximum number of scratch waves. Scratch space isn't divided
+* evenly between CUs. The number is only a function of the number of 
CUs.
+* We can decrease the constant to decrease the scratch buffer size.
+*
+* sctx->scratch_waves must be >= the maximum posible size of
+* 1 threadgroup, so that the hw doesn't hang from being unable
+* to start any.
+*
+* The recommended value is 4 per CU at most. Higher numbers don't
+* bring much benefit, but they still occupy chip resources (think
+* async compute). I've seen ~2% performance difference between 4 and 
32.
 */
-   sctx->scratch_waves = 32 * sscreen->b.info.num_good_compute_units;
+   sctx->scratch_waves = MAX2(32 * sscreen->b.info.num_good_compute_units,
+  max_threads_per_block / 64);

/* Initialize LLVM TargetMachine */
r600_target = radeon_llvm_get_r600_target(triple);


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 96408] [PERF] SSO: dirty all stages when only one is updated. Trigger extra validations.

2016-06-08 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=96408

Ilia Mirkin  changed:

   What|Removed |Added

 CC|imir...@alum.mit.edu|

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] automake: get in-tree `make distclean' working again.

2016-06-08 Thread Andy Furniss

Emil Velikov wrote:

From: Emil Velikov 

With earlier commit we've handled the `make distclean' out of tree
build, yet we failed to attribute that for in-tree builds the test
condition will return 1. Thus effectively the target will be considered
as "failed".

Fixes: b7f7ec78435 ("mesa: automake: distclean git_sha1.h when building
OOT")
Cc: 
Cc: Andy Furniss 
Reported-by: Andy Furniss 
Signed-off-by: Emil Velikov 
---
Thanks for catching this Andy. The following seems to work fine here.
Can you give it a try on you end ?


Working OK for me with this, thanks.




Emil
---
  src/Makefile.am | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Makefile.am b/src/Makefile.am
index 0527a31..32372da 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -50,7 +50,7 @@ BUILT_SOURCES = git_sha1.h
  # At the same time `make distclean' gets angry at us if we don't cleanup the
  # builddir one.
  distclean-local:
-   test $(top_srcdir) != $(top_builddir) && rm $(builddir)/git_sha1.h
+   ( test $(top_srcdir) != $(top_builddir) && rm $(builddir)/git_sha1.h ) 
|| true

  SUBDIRS = . gtest util mapi/glapi/gen mapi



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] st/mesa: fix type confusion with reladdrs

2016-06-08 Thread Brian Paul

On 06/08/2016 09:34 AM, Ilia Mirkin wrote:

The reality is that this doesn't matter, because we manually emit the
ARL to the sampler reladdr, and those arguments don't get an extra load
later, so it's effectively just a boolean. However having the types be
wrong is confusing and could trigger very odd bugs should usage change
down the line.

Signed-off-by: Ilia Mirkin 
---
  src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 10 +-
  1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 634d3a5..53c01c6 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -2228,7 +2228,7 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, 
st_src_reg *op)
  GLSL_TYPE_UINT);
if (!const_offset) {
   buffer.reladdr = ralloc(mem_ctx, st_src_reg);
- memcpy(buffer.reladdr, _reladdr, sizeof(sampler_reladdr));
+ *buffer.reladdr = op[0];
   emit_arl(ir, sampler_reladdr, op[0]);
}
emit_asm(ir, TGSI_OPCODE_RESQ, result_dst)->buffer = buffer;
@@ -3249,9 +3249,9 @@ glsl_to_tgsi_visitor::visit_ssbo_intrinsic(ir_call *ir)

 if (!const_block) {
block->accept(this);
-  emit_arl(ir, sampler_reladdr, this->result);
buffer.reladdr = ralloc(mem_ctx, st_src_reg);
-  memcpy(buffer.reladdr, _reladdr, sizeof(sampler_reladdr));
+  *buffer.reladdr = this->result;
+  emit_arl(ir, sampler_reladdr, this->result);
 }

 /* Calculate the surface offset */
@@ -3466,9 +3466,9 @@ glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir)
 get_deref_offsets(img, _array_size, _base,
   (unsigned int *), );
 if (reladdr.file != PROGRAM_UNDEFINED) {
-  emit_arl(ir, sampler_reladdr, reladdr);
image.reladdr = ralloc(mem_ctx, st_src_reg);
-  memcpy(image.reladdr, _reladdr, sizeof(reladdr));
+  *image.reladdr = reladdr;
+  emit_arl(ir, sampler_reladdr, reladdr);
 }

 st_dst_reg dst = undef_dst;



Reviewed-by: Brian Paul 


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] st/mesa: directly compute level=0 texture size in st_finalize_texture

2016-06-08 Thread Brian Paul

On 06/07/2016 05:00 PM, Nicolai Hähnle wrote:

From: Nicolai Hähnle 

The width0/height0/depth0 on stObj may not have been set at this point.
Observed in a trace that set up levels 2..9 of a 2d texture, and set the base
level to 2, with height 1. This made the guess logic always bail.

Originally investigated by Ilia Mirkin, this patch gets rid of the somewhat
redundant storage of width0/height0/depth0 and makes sure we always compute
pipe texture sizes that are compatible with the base level image of the
GL texture.

Fixes the gl-1.2-texture-base-level piglit test provided by Brian Paul.

v2:
- try to re-use an existing pipe texture when possible
- handle a corner case where the base level is not level 0 and it is of
   size 1x1x1

Cc: "12.0" 
--
Tested with Piglit on radeonsi, no change except for the
gl-1.2-texture-base-level test.
---
  src/mesa/state_tracker/st_cb_eglimage.c |  3 --
  src/mesa/state_tracker/st_cb_texture.c  | 86 ++---
  src/mesa/state_tracker/st_manager.c |  3 --
  src/mesa/state_tracker/st_texture.h |  6 ---
  src/mesa/state_tracker/st_vdpau.c   |  3 --
  5 files changed, 48 insertions(+), 53 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_eglimage.c 
b/src/mesa/state_tracker/st_cb_eglimage.c
index 8531afb..1782d15 100644
--- a/src/mesa/state_tracker/st_cb_eglimage.c
+++ b/src/mesa/state_tracker/st_cb_eglimage.c
@@ -128,9 +128,6 @@ st_bind_surface(struct gl_context *ctx, GLenum target,
 st_texture_release_all_sampler_views(st, stObj);
 pipe_resource_reference(>pt, stObj->pt);

-   stObj->width0 = ps->width;
-   stObj->height0 = ps->height;
-   stObj->depth0 = 1;
 stObj->surface_format = ps->format;

 _mesa_dirty_texobj(ctx, texObj);
diff --git a/src/mesa/state_tracker/st_cb_texture.c 
b/src/mesa/state_tracker/st_cb_texture.c
index 44e21b1..3a3d6b9 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -493,7 +493,6 @@ guess_and_alloc_texture(struct st_context *st,

 if (!guessed_box) {
/* we can't determine the image size at level=0 */
-  stObj->width0 = stObj->height0 = stObj->depth0 = 0;
/* this is not an out of memory error */
return GL_TRUE;
 }
@@ -518,11 +517,6 @@ guess_and_alloc_texture(struct st_context *st,
lastLevel = 0;
 }

-   /* Save the level=0 dimensions */
-   stObj->width0 = width;
-   stObj->height0 = height;
-   stObj->depth0 = depth;
-
 fmt = st_mesa_format_to_pipe_format(st, stImage->base.TexFormat);

 bindings = default_bindings(st, fmt);
@@ -2443,9 +2437,6 @@ st_finalize_texture(struct gl_context *ctx,
if (st_obj->buffer != stObj->pt) {
   pipe_resource_reference(>pt, st_obj->buffer);
   st_texture_release_all_sampler_views(st, stObj);
- stObj->width0 = stObj->pt->width0 / 
_mesa_get_format_bytes(tObj->_BufferObjectFormat);
- stObj->height0 = 1;
- stObj->depth0 = 1;
}
return GL_TRUE;

@@ -2478,25 +2469,44 @@ st_finalize_texture(struct gl_context *ctx,
 /* Find size of level=0 Gallium mipmap image, plus number of texture 
layers */
 {
GLuint width, height, depth;
-  if (!guess_base_level_size(stObj->base.Target,
- firstImage->base.Width2,
- firstImage->base.Height2,
- firstImage->base.Depth2,
- firstImage->base.Level,
- , , )) {
- width = stObj->width0;
- height = stObj->height0;
- depth = stObj->depth0;
+
+  st_gl_texture_dims_to_pipe_dims(stObj->base.Target,
+  firstImage->base.Width2,
+  firstImage->base.Height2,
+  firstImage->base.Depth2,
+  , , , );
+
+  /* If we previously allocated a pipe texture and its sizes are
+   * compatible, use them.
+   */
+  if (stObj->pt &&
+  u_minify(stObj->pt->width0, firstImage->base.Level) == width &&
+  u_minify(stObj->pt->height0, firstImage->base.Level) == height &&
+  u_minify(stObj->pt->depth0, firstImage->base.Level) == depth) {
+ ptWidth = stObj->pt->width0;
+ ptHeight = stObj->pt->height0;
+ ptDepth = stObj->pt->depth0;
} else {
- /* The width/height/depth may have been previously reset in
-  * guess_and_alloc_texture. */
- stObj->width0 = width;
- stObj->height0 = height;
- stObj->depth0 = depth;
+ /* Otherwise, compute a new level=0 size that is compatible with the
+  * base level image.
+  */
+ ptWidth = width > 1 ? width << firstImage->base.Level : 1;
+ ptHeight = height > 1 ? height << firstImage->base.Level : 1;
+ ptDepth 

Re: [Mesa-dev] [PATCH v6] swr: implement clipPlanes/clipVertex/clipDistance/cullDistance

2016-06-08 Thread Cherniak, Bruce
Reviewed-by: Bruce Cherniak  

> On Jun 8, 2016, at 9:59 AM, Tim Rowley  wrote:
> 
> v2: only load the clip vertex once
> 
> v3: fix clip enable logic, add cullDistance
> 
> v4: remove duplicate fields in vs jit key, fix test of clip fixup needed
> 
> v5: fix clipdistance linkage for slot!=0,4
> 
> v6: support clip+cull; passes most piglit clip (failures understood)
> ---
> docs/GL3.txt   |  2 +-
> src/gallium/drivers/swr/swr_context.h  |  2 ++
> src/gallium/drivers/swr/swr_screen.cpp |  3 +-
> src/gallium/drivers/swr/swr_shader.cpp | 65 ++
> src/gallium/drivers/swr/swr_shader.h   |  1 +
> src/gallium/drivers/swr/swr_state.cpp  | 25 -
> 6 files changed, 95 insertions(+), 3 deletions(-)
> 
> diff --git a/docs/GL3.txt b/docs/GL3.txt
> index e8d401d..4ba0366 100644
> --- a/docs/GL3.txt
> +++ b/docs/GL3.txt
> @@ -211,7 +211,7 @@ GL 4.5, GLSL 4.50:
>   GL_ARB_ES3_1_compatibilityDONE (nvc0, radeonsi)
>   GL_ARB_clip_control   DONE (i965, nv50, 
> nvc0, r600, radeonsi, llvmpipe, softpipe, swr)
>   GL_ARB_conditional_render_invertedDONE (i965, nv50, 
> nvc0, r600, radeonsi, llvmpipe, softpipe, swr)
> -  GL_ARB_cull_distance  DONE (i965, nv50, 
> nvc0, llvmpipe, softpipe)
> +  GL_ARB_cull_distance  DONE (i965, nv50, 
> nvc0, llvmpipe, softpipe, swr)
>   GL_ARB_derivative_control DONE (i965, nv50, 
> nvc0, r600, radeonsi)
>   GL_ARB_direct_state_accessDONE (all drivers)
>   GL_ARB_get_texture_sub_image  DONE (all drivers)
> diff --git a/src/gallium/drivers/swr/swr_context.h 
> b/src/gallium/drivers/swr/swr_context.h
> index a7383bb..75ecae3 100644
> --- a/src/gallium/drivers/swr/swr_context.h
> +++ b/src/gallium/drivers/swr/swr_context.h
> @@ -89,6 +89,8 @@ struct swr_draw_context {
>swr_jit_texture texturesFS[PIPE_MAX_SHADER_SAMPLER_VIEWS];
>swr_jit_sampler samplersFS[PIPE_MAX_SAMPLERS];
> 
> +   float userClipPlanes[PIPE_MAX_CLIP_PLANES][4];
> +
>SWR_SURFACE_STATE renderTargets[SWR_NUM_ATTACHMENTS];
> };
> 
> diff --git a/src/gallium/drivers/swr/swr_screen.cpp 
> b/src/gallium/drivers/swr/swr_screen.cpp
> index ca39202..af82c93 100644
> --- a/src/gallium/drivers/swr/swr_screen.cpp
> +++ b/src/gallium/drivers/swr/swr_screen.cpp
> @@ -333,6 +333,8 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap 
> param)
>case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
>case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
>   return 1;
> +   case PIPE_CAP_CULL_DISTANCE:
> +  return 1;
>case PIPE_CAP_TGSI_TXQS:
>case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
>case PIPE_CAP_SHAREABLE_SHADERS:
> @@ -358,7 +360,6 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap 
> param)
>case PIPE_CAP_PCI_DEVICE:
>case PIPE_CAP_PCI_FUNCTION:
>case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
> -   case PIPE_CAP_CULL_DISTANCE:
>case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
>case PIPE_CAP_TGSI_VOTE:
>   return 0;
> diff --git a/src/gallium/drivers/swr/swr_shader.cpp 
> b/src/gallium/drivers/swr/swr_shader.cpp
> index f26467e..8af0700 100644
> --- a/src/gallium/drivers/swr/swr_shader.cpp
> +++ b/src/gallium/drivers/swr/swr_shader.cpp
> @@ -40,6 +40,9 @@
> #include "swr_state.h"
> #include "swr_screen.h"
> 
> +static unsigned
> +locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info);
> +
> bool operator==(const swr_jit_fs_key , const swr_jit_fs_key )
> {
>return !memcmp(, , sizeof(lhs));
> @@ -119,6 +122,11 @@ swr_generate_vs_key(struct swr_jit_vs_key ,
> {
>memset(, 0, sizeof(key));
> 
> +   key.clip_plane_mask =
> +  swr_vs->info.base.clipdist_writemask ?
> +  swr_vs->info.base.clipdist_writemask & 
> ctx->rasterizer->clip_plane_enable :
> +  ctx->rasterizer->clip_plane_enable;
> +
>swr_generate_sampler_key(swr_vs->info, ctx, PIPE_SHADER_VERTEX, key);
> }
> 
> @@ -251,6 +259,63 @@ BuilderSWR::CompileVS(struct swr_context *ctx, 
> swr_jit_vs_key )
>   }
>}
> 
> +   if (ctx->rasterizer->clip_plane_enable ||
> +   swr_vs->info.base.culldist_writemask) {
> +  unsigned clip_mask = ctx->rasterizer->clip_plane_enable;
> +
> +  unsigned cv = 0;
> +  if (swr_vs->info.base.writes_clipvertex) {
> + cv = 1 + locate_linkage(TGSI_SEMANTIC_CLIPVERTEX, 0,
> + _vs->info.base);
> +  } else {
> + for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
> +if (swr_vs->info.base.output_semantic_name[i] == 
> TGSI_SEMANTIC_POSITION &&
> +swr_vs->info.base.output_semantic_index[i] == 0) {
> +   cv = i;
> +   break;
> +}
> + }
> +  }
> +  LLVMValueRef cx = LLVMBuildLoad(gallivm->builder, outputs[cv][0], "");

[Mesa-dev] [PATCH 2/3] radeonsi: print the number of spilled VGPRs

2016-06-08 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_shader.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 512db42..e563aec 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -6040,6 +6040,8 @@ static void si_shader_dump_stats(struct si_screen 
*sscreen,
unsigned lds_increment = sscreen->b.chip_class >= CIK ? 512 : 256;
unsigned lds_per_wave = 0;
unsigned max_simd_waves = 10;
+   /* Assuming SGPRs aren't spilled. */
+   unsigned spilled_vgprs = conf->scratch_bytes_per_wave / 64 / 4;
 
/* Compute LDS usage for PS. */
if (processor == PIPE_SHADER_FRAGMENT) {
@@ -6086,22 +6088,23 @@ static void si_shader_dump_stats(struct si_screen 
*sscreen,
fprintf(file, "*** SHADER STATS ***\n"
"SGPRS: %d\n"
"VGPRS: %d\n"
+   "Spilled VGPRs: %d\n"
"Code Size: %d bytes\n"
"LDS: %d blocks\n"
"Scratch: %d bytes per wave\n"
"Max Waves: %d\n"
"\n",
-   conf->num_sgprs, conf->num_vgprs, code_size,
+   conf->num_sgprs, conf->num_vgprs, spilled_vgprs, 
code_size,
conf->lds_size, conf->scratch_bytes_per_wave,
max_simd_waves);
}
 
pipe_debug_message(debug, SHADER_INFO,
   "Shader Stats: SGPRS: %d VGPRS: %d Code Size: %d "
-  "LDS: %d Scratch: %d Max Waves: %d",
+  "LDS: %d Scratch: %d Max Waves: %d Spilled VGPRs: 
%d",
   conf->num_sgprs, conf->num_vgprs, code_size,
   conf->lds_size, conf->scratch_bytes_per_wave,
-  max_simd_waves);
+  max_simd_waves, spilled_vgprs);
 }
 
 static const char *si_get_shader_name(struct si_shader *shader,
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] radeonsi: enable scratch coalescing

2016-06-08 Thread Marek Olšák
From: Marek Olšák 

Latest LLVM git is required.
---
 src/gallium/drivers/radeonsi/si_shader.c | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 9f8cb54..512db42 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5903,8 +5903,16 @@ void si_shader_apply_scratch_relocs(struct si_context 
*sctx,
unsigned i;
uint32_t scratch_rsrc_dword0 = scratch_va;
uint32_t scratch_rsrc_dword1 =
-   S_008F04_BASE_ADDRESS_HI(scratch_va >> 32)
-   |  S_008F04_STRIDE(config->scratch_bytes_per_wave / 64);
+   S_008F04_BASE_ADDRESS_HI(scratch_va >> 32);
+
+   /* Enable scratch coalescing if LLVM sets ELEMENT_SIZE & INDEX_STRIDE
+* correctly.
+*/
+   if (HAVE_LLVM >= 0x0309)
+   scratch_rsrc_dword1 |= S_008F04_SWIZZLE_ENABLE(1);
+   else
+   scratch_rsrc_dword1 |=
+   S_008F04_STRIDE(config->scratch_bytes_per_wave / 64);
 
for (i = 0 ; i < shader->binary.reloc_count; i++) {
const struct radeon_shader_reloc *reloc =
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] radeonsi: improve the computation and comment of scratch_waves

2016-06-08 Thread Marek Olšák
From: Marek Olšák 

2% isn't much. If you think the number should be decreased, please speak up.
---
 src/gallium/drivers/radeonsi/si_pipe.c | 22 ++
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 5d7d2f3..0c601da 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -236,11 +236,25 @@ static struct pipe_context *si_create_context(struct 
pipe_screen *screen,
 R600_COHERENCY_SHADER);
}
 
-   /* XXX: This is the maximum value allowed.  I'm not sure how to compute
-* this for non-cs shaders.  Using the wrong value here can result in
-* GPU lockups, but the maximum value seems to always work.
+   uint64_t max_threads_per_block;
+   screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
+ PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK,
+ _threads_per_block);
+
+   /* The maximum number of scratch waves. Scratch space isn't divided
+* evenly between CUs. The number is only a function of the number of 
CUs.
+* We can decrease the constant to decrease the scratch buffer size.
+*
+* sctx->scratch_waves must be >= the maximum posible size of
+* 1 threadgroup, so that the hw doesn't hang from being unable
+* to start any.
+*
+* The recommended value is 4 per CU at most. Higher numbers don't
+* bring much benefit, but they still occupy chip resources (think
+* async compute). I've seen ~2% performance difference between 4 and 
32.
 */
-   sctx->scratch_waves = 32 * sscreen->b.info.num_good_compute_units;
+   sctx->scratch_waves = MAX2(32 * sscreen->b.info.num_good_compute_units,
+  max_threads_per_block / 64);
 
/* Initialize LLVM TargetMachine */
r600_target = radeon_llvm_get_r600_target(triple);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] st/mesa: fix type confusion with reladdrs

2016-06-08 Thread Ilia Mirkin
The reality is that this doesn't matter, because we manually emit the
ARL to the sampler reladdr, and those arguments don't get an extra load
later, so it's effectively just a boolean. However having the types be
wrong is confusing and could trigger very odd bugs should usage change
down the line.

Signed-off-by: Ilia Mirkin 
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 634d3a5..53c01c6 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -2228,7 +2228,7 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, 
st_src_reg *op)
 GLSL_TYPE_UINT);
   if (!const_offset) {
  buffer.reladdr = ralloc(mem_ctx, st_src_reg);
- memcpy(buffer.reladdr, _reladdr, sizeof(sampler_reladdr));
+ *buffer.reladdr = op[0];
  emit_arl(ir, sampler_reladdr, op[0]);
   }
   emit_asm(ir, TGSI_OPCODE_RESQ, result_dst)->buffer = buffer;
@@ -3249,9 +3249,9 @@ glsl_to_tgsi_visitor::visit_ssbo_intrinsic(ir_call *ir)
 
if (!const_block) {
   block->accept(this);
-  emit_arl(ir, sampler_reladdr, this->result);
   buffer.reladdr = ralloc(mem_ctx, st_src_reg);
-  memcpy(buffer.reladdr, _reladdr, sizeof(sampler_reladdr));
+  *buffer.reladdr = this->result;
+  emit_arl(ir, sampler_reladdr, this->result);
}
 
/* Calculate the surface offset */
@@ -3466,9 +3466,9 @@ glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir)
get_deref_offsets(img, _array_size, _base,
  (unsigned int *), );
if (reladdr.file != PROGRAM_UNDEFINED) {
-  emit_arl(ir, sampler_reladdr, reladdr);
   image.reladdr = ralloc(mem_ctx, st_src_reg);
-  memcpy(image.reladdr, _reladdr, sizeof(reladdr));
+  *image.reladdr = reladdr;
+  emit_arl(ir, sampler_reladdr, reladdr);
}
 
st_dst_reg dst = undef_dst;
-- 
2.7.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] automake: get in-tree `make distclean' working again.

2016-06-08 Thread Eric Engestrom
On Wed, Jun 08, 2016 at 03:44:39PM +0100, Emil Velikov wrote:
> From: Emil Velikov 
> 
> With earlier commit we've handled the `make distclean' out of tree
> build, yet we failed to attribute that for in-tree builds the test
> condition will return 1. Thus effectively the target will be considered
> as "failed".
> 
> Fixes: b7f7ec78435 ("mesa: automake: distclean git_sha1.h when building
> OOT")
> Cc: 
> Cc: Andy Furniss 
> Reported-by: Andy Furniss 
> Signed-off-by: Emil Velikov 
> ---
> Thanks for catching this Andy. The following seems to work fine here.
> Can you give it a try on you end ?
> 
> Emil
> ---
>  src/Makefile.am | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/Makefile.am b/src/Makefile.am
> index 0527a31..32372da 100644
> --- a/src/Makefile.am
> +++ b/src/Makefile.am
> @@ -50,7 +50,7 @@ BUILT_SOURCES = git_sha1.h
>  # At the same time `make distclean' gets angry at us if we don't cleanup the
>  # builddir one.
>  distclean-local:
> - test $(top_srcdir) != $(top_builddir) && rm $(builddir)/git_sha1.h
> + ( test $(top_srcdir) != $(top_builddir) && rm $(builddir)/git_sha1.h ) 
> || true

I admit I have almost no experience with automake, but shouldn't make's `-test`
achieve the same result?

I also believe you forgot to CC: Andy :)

>  
>  SUBDIRS = . gtest util mapi/glapi/gen mapi
>  
> -- 
> 2.8.2
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v6] swr: implement clipPlanes/clipVertex/clipDistance/cullDistance

2016-06-08 Thread Tim Rowley
v2: only load the clip vertex once

v3: fix clip enable logic, add cullDistance

v4: remove duplicate fields in vs jit key, fix test of clip fixup needed

v5: fix clipdistance linkage for slot!=0,4

v6: support clip+cull; passes most piglit clip (failures understood)
---
 docs/GL3.txt   |  2 +-
 src/gallium/drivers/swr/swr_context.h  |  2 ++
 src/gallium/drivers/swr/swr_screen.cpp |  3 +-
 src/gallium/drivers/swr/swr_shader.cpp | 65 ++
 src/gallium/drivers/swr/swr_shader.h   |  1 +
 src/gallium/drivers/swr/swr_state.cpp  | 25 -
 6 files changed, 95 insertions(+), 3 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index e8d401d..4ba0366 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -211,7 +211,7 @@ GL 4.5, GLSL 4.50:
   GL_ARB_ES3_1_compatibilityDONE (nvc0, radeonsi)
   GL_ARB_clip_control   DONE (i965, nv50, 
nvc0, r600, radeonsi, llvmpipe, softpipe, swr)
   GL_ARB_conditional_render_invertedDONE (i965, nv50, 
nvc0, r600, radeonsi, llvmpipe, softpipe, swr)
-  GL_ARB_cull_distance  DONE (i965, nv50, 
nvc0, llvmpipe, softpipe)
+  GL_ARB_cull_distance  DONE (i965, nv50, 
nvc0, llvmpipe, softpipe, swr)
   GL_ARB_derivative_control DONE (i965, nv50, 
nvc0, r600, radeonsi)
   GL_ARB_direct_state_accessDONE (all drivers)
   GL_ARB_get_texture_sub_image  DONE (all drivers)
diff --git a/src/gallium/drivers/swr/swr_context.h 
b/src/gallium/drivers/swr/swr_context.h
index a7383bb..75ecae3 100644
--- a/src/gallium/drivers/swr/swr_context.h
+++ b/src/gallium/drivers/swr/swr_context.h
@@ -89,6 +89,8 @@ struct swr_draw_context {
swr_jit_texture texturesFS[PIPE_MAX_SHADER_SAMPLER_VIEWS];
swr_jit_sampler samplersFS[PIPE_MAX_SAMPLERS];
 
+   float userClipPlanes[PIPE_MAX_CLIP_PLANES][4];
+
SWR_SURFACE_STATE renderTargets[SWR_NUM_ATTACHMENTS];
 };
 
diff --git a/src/gallium/drivers/swr/swr_screen.cpp 
b/src/gallium/drivers/swr/swr_screen.cpp
index ca39202..af82c93 100644
--- a/src/gallium/drivers/swr/swr_screen.cpp
+++ b/src/gallium/drivers/swr/swr_screen.cpp
@@ -333,6 +333,8 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap 
param)
case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
   return 1;
+   case PIPE_CAP_CULL_DISTANCE:
+  return 1;
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
@@ -358,7 +360,6 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap 
param)
case PIPE_CAP_PCI_DEVICE:
case PIPE_CAP_PCI_FUNCTION:
case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
-   case PIPE_CAP_CULL_DISTANCE:
case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
case PIPE_CAP_TGSI_VOTE:
   return 0;
diff --git a/src/gallium/drivers/swr/swr_shader.cpp 
b/src/gallium/drivers/swr/swr_shader.cpp
index f26467e..8af0700 100644
--- a/src/gallium/drivers/swr/swr_shader.cpp
+++ b/src/gallium/drivers/swr/swr_shader.cpp
@@ -40,6 +40,9 @@
 #include "swr_state.h"
 #include "swr_screen.h"
 
+static unsigned
+locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info);
+
 bool operator==(const swr_jit_fs_key , const swr_jit_fs_key )
 {
return !memcmp(, , sizeof(lhs));
@@ -119,6 +122,11 @@ swr_generate_vs_key(struct swr_jit_vs_key ,
 {
memset(, 0, sizeof(key));
 
+   key.clip_plane_mask =
+  swr_vs->info.base.clipdist_writemask ?
+  swr_vs->info.base.clipdist_writemask & 
ctx->rasterizer->clip_plane_enable :
+  ctx->rasterizer->clip_plane_enable;
+
swr_generate_sampler_key(swr_vs->info, ctx, PIPE_SHADER_VERTEX, key);
 }
 
@@ -251,6 +259,63 @@ BuilderSWR::CompileVS(struct swr_context *ctx, 
swr_jit_vs_key )
   }
}
 
+   if (ctx->rasterizer->clip_plane_enable ||
+   swr_vs->info.base.culldist_writemask) {
+  unsigned clip_mask = ctx->rasterizer->clip_plane_enable;
+
+  unsigned cv = 0;
+  if (swr_vs->info.base.writes_clipvertex) {
+ cv = 1 + locate_linkage(TGSI_SEMANTIC_CLIPVERTEX, 0,
+ _vs->info.base);
+  } else {
+ for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
+if (swr_vs->info.base.output_semantic_name[i] == 
TGSI_SEMANTIC_POSITION &&
+swr_vs->info.base.output_semantic_index[i] == 0) {
+   cv = i;
+   break;
+}
+ }
+  }
+  LLVMValueRef cx = LLVMBuildLoad(gallivm->builder, outputs[cv][0], "");
+  LLVMValueRef cy = LLVMBuildLoad(gallivm->builder, outputs[cv][1], "");
+  LLVMValueRef cz = LLVMBuildLoad(gallivm->builder, outputs[cv][2], "");
+  LLVMValueRef cw = LLVMBuildLoad(gallivm->builder, outputs[cv][3], "");
+
+  for (unsigned val = 0; val < PIPE_MAX_CLIP_PLANES; val++) {
+ // clip distance overrides 

[Mesa-dev] [PATCH] automake: get in-tree `make distclean' working again.

2016-06-08 Thread Emil Velikov
From: Emil Velikov 

With earlier commit we've handled the `make distclean' out of tree
build, yet we failed to attribute that for in-tree builds the test
condition will return 1. Thus effectively the target will be considered
as "failed".

Fixes: b7f7ec78435 ("mesa: automake: distclean git_sha1.h when building
OOT")
Cc: 
Cc: Andy Furniss 
Reported-by: Andy Furniss 
Signed-off-by: Emil Velikov 
---
Thanks for catching this Andy. The following seems to work fine here.
Can you give it a try on you end ?

Emil
---
 src/Makefile.am | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Makefile.am b/src/Makefile.am
index 0527a31..32372da 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -50,7 +50,7 @@ BUILT_SOURCES = git_sha1.h
 # At the same time `make distclean' gets angry at us if we don't cleanup the
 # builddir one.
 distclean-local:
-   test $(top_srcdir) != $(top_builddir) && rm $(builddir)/git_sha1.h
+   ( test $(top_srcdir) != $(top_builddir) && rm $(builddir)/git_sha1.h ) 
|| true
 
 SUBDIRS = . gtest util mapi/glapi/gen mapi
 
-- 
2.8.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] st/va: ensure linear memory for dmabuf

2016-06-08 Thread Christian König

Am 08.06.2016 um 15:53 schrieb Julien Isorce:


To go back to "add a bind flag to struct pipe_video_buffer instead ", 
the alternative is to bring back the first version
of the patch but according to the first review, it was duplication of 
bind flag between pipe_video_buffer
and pipe_resource so it would require quite of big of refactoring 
unless I miss understood the comment.
>> I obviously have missed that discussion, but yes that is exactly 
the way we should go. The duplication is unproblematic as far as I can 
see.


The v1 patch and discussion is here: 
https://patchwork.freedesktop.org/patch/66382/


Thanks for pointing that out! That's more than 6 month ago and I barely 
remember what I had for lunch yesterday :).


In that v1 should I just replace the specific "bool disable_tiling" by 
a generic bind flag ?


Yeah, that's what I wanted to say with my comment back then.

Regards,
Christian.



On 8 June 2016 at 14:07, Christian König > wrote:


Am 08.06.2016 um 14:20 schrieb Julien Isorce:



On 8 June 2016 at 09:16, Christian König > wrote:

Am 02.06.2016 um 16:00 schrieb Julien Isorce:

In order to do zero-copy between two different devices
the memory should not be tiled.

This is currently no way to set pipe_resource template's flag
from pipe_video_buffer template. So disabled_tiling is added.

Choosed "disable" prefix so that CALLOC keeps tiling enabled
by default.

Tested with GStreamer on a laptop that has 2 GPUs:
1- gstvaapidecode:
HW decoding and dmabuf export with nouveau driver on
Nvidia GPU.
2- glimagesink:
EGLImage imports dmabuf on Intel GPU.

Note that tiling is working if 1 and 2 are done on the
same GPU.
So it is up to the application to set or not the flag:
VA_SURFACE_EXTBUF_DESC_ENABLE_TILING

Signed-off-by: Julien Isorce >



Thx for the review.

NAK, it won't be possible to use the resulting video buffer
with hardware decoding on AMD hardware.


But I restrict to these format:

+case VA_FOURCC_RGBA:
+case VA_FOURCC_RGBX:
+case VA_FOURCC_BGRA:
+case VA_FOURCC_BGRX:

So if the vaapi user request a linear layout, it will fail if not
one of these formats. So basically for now it requires vpp.


Yeah, ok that should work for now but is clearly not a good idea.



Please add a bind flag to struct pipe_video_buffer instead so
that we can specify if linear layout is requested or not.


Do you mean that resource = pscreen->resource_create(pscreen,
) does not honor the bind flag of the template.
Maybe I can just checked if it was effective after that call,
i.e. checking presence of PIPE_BIND_LINEAR in
resources[0]->bind.


No, for resource_create() the flag should be honored. But we
shouldn't be using resource_create() to create the different
planes of video buffers if possible. We should use
create_video_buffer() on the pipe context if possible.



This way the hardware driver can still reject the request if
this would result in a surface which can't be decoded to.


For now it requires vpp since I explicitly restricted linear
layout request to the rgbs format above. The reason behind is
that vaapi is limited to export 1 fd per surface. Problem is that
for at least nouveau, it uses 1 pipe resource per plane, and
NV12 has 2 planes.

In the spec the problem comes from the fact that a VAImage has
only one VABufferID. It would require to define
a new VABufferType which represents an array of
VAImageBufferType, something like that.


Yeah, I know that is one of the many deficits VA-API unfortunately
has. It should work with the Radeon implementation, but only
because UVD requires both planes to be in the same buffer object.




To go back to "add a bind flag to struct pipe_video_buffer
instead ", the alternative is to bring back the first version
of the patch but according to the first review, it was
duplication of bind flag between pipe_video_buffer
and pipe_resource so it would require quite of big of refactoring
unless I miss understood the comment.


I obviously have missed that discussion, but yes that is exactly
the way we should go. The duplication is unproblematic as far as I
can see.


Also in vdpau, the function vlVdpOutputSurfaceCreate is using
PIPE_BIND_LINEAR flag and resource_create call,
like in the v2 of my patch.


Yeah, but that isn't a video buffer you try to create here. VA-API
unfortunately doesn't 

Re: [Mesa-dev] [PATCH v2] st/va: ensure linear memory for dmabuf

2016-06-08 Thread Julien Isorce
To go back to "add a bind flag to struct pipe_video_buffer instead ", the
alternative is to bring back the first version
of the patch but according to the first review, it was duplication of bind
flag between pipe_video_buffer
and pipe_resource so it would require quite of big of refactoring unless I
miss understood the comment.

>> I obviously have missed that discussion, but yes that is exactly the way
we should go. The duplication is unproblematic as far as I can see.

The v1 patch and discussion is here:
https://patchwork.freedesktop.org/patch/66382/
In that v1 should I just replace the specific "bool disable_tiling" by a
generic bind flag ?

On 8 June 2016 at 14:07, Christian König  wrote:

> Am 08.06.2016 um 14:20 schrieb Julien Isorce:
>
>
>
> On 8 June 2016 at 09:16, Christian König  wrote:
>
>> Am 02.06.2016 um 16:00 schrieb Julien Isorce:
>>
>>> In order to do zero-copy between two different devices
>>> the memory should not be tiled.
>>>
>>> This is currently no way to set pipe_resource template's flag
>>> from pipe_video_buffer template. So disabled_tiling is added.
>>>
>>> Choosed "disable" prefix so that CALLOC keeps tiling enabled
>>> by default.
>>>
>>> Tested with GStreamer on a laptop that has 2 GPUs:
>>> 1- gstvaapidecode:
>>> HW decoding and dmabuf export with nouveau driver on Nvidia GPU.
>>> 2- glimagesink:
>>> EGLImage imports dmabuf on Intel GPU.
>>>
>>> Note that tiling is working if 1 and 2 are done on the same GPU.
>>> So it is up to the application to set or not the flag:
>>> VA_SURFACE_EXTBUF_DESC_ENABLE_TILING
>>>
>>> Signed-off-by: Julien Isorce < 
>>> j.iso...@samsung.com>
>>>
>>
>>
> Thx for the review.
>
>
>> NAK, it won't be possible to use the resulting video buffer with hardware
>> decoding on AMD hardware.
>>
>
> But I restrict to these format:
>
> +case VA_FOURCC_RGBA:
> +case VA_FOURCC_RGBX:
> +case VA_FOURCC_BGRA:
> +case VA_FOURCC_BGRX:
>
> So if the vaapi user request a linear layout, it will fail if not one of
> these formats. So basically for now it requires vpp.
>
>
> Yeah, ok that should work for now but is clearly not a good idea.
>
>
>
>>
>> Please add a bind flag to struct pipe_video_buffer instead so that we can
>> specify if linear layout is requested or not.
>>
>
> Do you mean that resource = pscreen->resource_create(pscreen, ) does
> not honor the bind flag of the template.
> Maybe I can just checked if it was effective after that call, i.e.
> checking presence of PIPE_BIND_LINEAR in
> resources[0]->bind.
>
>
> No, for resource_create() the flag should be honored. But we shouldn't be
> using resource_create() to create the different planes of video buffers if
> possible. We should use create_video_buffer() on the pipe context if
> possible.
>
>
>
>>
>> This way the hardware driver can still reject the request if this would
>> result in a surface which can't be decoded to.
>>
>
> For now it requires vpp since I explicitly restricted linear layout
> request to the rgbs format above. The reason behind is
> that vaapi is limited to export 1 fd per surface. Problem is that for at
> least nouveau, it uses 1 pipe resource per plane, and
> NV12 has 2 planes.
>
> In the spec the problem comes from the fact that a VAImage has only one
> VABufferID. It would require to define
> a new VABufferType which represents an array of VAImageBufferType,
> something like that.
>
>
> Yeah, I know that is one of the many deficits VA-API unfortunately has. It
> should work with the Radeon implementation, but only because UVD requires
> both planes to be in the same buffer object.
>
>
>
> To go back to "add a bind flag to struct pipe_video_buffer instead ", the
> alternative is to bring back the first version
> of the patch but according to the first review, it was duplication of bind
> flag between pipe_video_buffer
> and pipe_resource so it would require quite of big of refactoring unless I
> miss understood the comment.
>
>
> I obviously have missed that discussion, but yes that is exactly the way
> we should go. The duplication is unproblematic as far as I can see.
>
> Also in vdpau, the function vlVdpOutputSurfaceCreate is using
> PIPE_BIND_LINEAR flag and resource_create call,
> like in the v2 of my patch.
>
>
> Yeah, but that isn't a video buffer you try to create here. VA-API
> unfortunately doesn't properly distinct between those two things.
>
> Regards,
> Christian.
>
>
>
> Not sure if I replied to your concerns, let me know.
>
> Thx
> Julien
>
>
>>
>> Regards,
>> Christian.
>>
>>
>> ---
>>>   src/gallium/state_trackers/va/surface.c | 60
>>> -
>>>   1 file changed, 59 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/src/gallium/state_trackers/va/surface.c
>>> b/src/gallium/state_trackers/va/surface.c
>>> index 8a6a397..b04ced4 100644
>>> --- a/src/gallium/state_trackers/va/surface.c
>>> +++ 

[Mesa-dev] in tree make distclean failing since mesa: automake: distclean git_sha1.h when building OOT

2016-06-08 Thread Andy Furniss

In tree build make distclean is failing for me since -


b7f7ec78435771ab02f7d9a61bb1d4a11df720b8 is the first bad commit
commit b7f7ec78435771ab02f7d9a61bb1d4a11df720b8
Author: Emil Velikov 
Date:   Mon Jun 6 19:39:40 2016 +0100

mesa: automake: distclean git_sha1.h when building OOT

In the case of out-of-tree (OOT) builds, in particular when building
from tarball, we'll end up with the file in both srcdir and builddir.

We want the former to remain intact (since we need it on rebuild) while
the latter should be removed otherwise `make distclean' gets angry at
us.

Ideally there'll be a solution that feels a bit less of a hack. Until
then this does the job exactly as expected.


I usually clean with make distclean then git clean -dfx and did use the 
latter during bisect.


./autogen.sh --prefix=/usr --sysconfdir=/etc --enable-texture-float 
--enable-opencl --enable-omx --with-egl-platforms=x11,drm,wayland 
--with-gallium-drivers=radeonsi,swrast --enable-gbm 
--enable-shared-glapi --enable-glx-tls --with-dri-drivers= && make -j5


.

make distclean

Making distclean in src
make[1]: Entering directory '/mnt/sdb1/Gits/mesa/src'
Making distclean in .
make[2]: Entering directory '/mnt/sdb1/Gits/mesa/src'
rm -rf .libs _libs
rm -rf mesa/main/.libs mesa/main/_libs
rm -rf mesa/program/.libs mesa/program/_libs
test -z "libglsl_util.la" || rm -f libglsl_util.la
rm -f ./so_locations
rm -f *.o
rm -f mesa/main/*.o
rm -f mesa/main/*.lo
rm -f mesa/program/*.o
rm -f mesa/program/*.lo
rm -f *.lo
rm -f *.tab.c
test -z "" || rm -f
test . = "." || test -z "" || rm -f
rm -f mesa/main/.deps/.dirstamp
rm -f mesa/main/.dirstamp
rm -f mesa/program/.deps/.dirstamp
rm -f mesa/program/.dirstamp
test .. != .. && rm ./git_sha1.h
Makefile:1009: recipe for target 'distclean-local' failed
make[2]: *** [distclean-local] Error 1
make[2]: Leaving directory '/mnt/sdb1/Gits/mesa/src'
Makefile:699: recipe for target 'distclean-recursive' failed
make[1]: *** [distclean-recursive] Error 1
make[1]: Leaving directory '/mnt/sdb1/Gits/mesa/src'
Makefile:644: recipe for target 'distclean-recursive' failed
make: *** [distclean-recursive] Error 1
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] st/va: ensure linear memory for dmabuf

2016-06-08 Thread Christian König

Am 08.06.2016 um 14:20 schrieb Julien Isorce:



On 8 June 2016 at 09:16, Christian König > wrote:


Am 02.06.2016 um 16:00 schrieb Julien Isorce:

In order to do zero-copy between two different devices
the memory should not be tiled.

This is currently no way to set pipe_resource template's flag
from pipe_video_buffer template. So disabled_tiling is added.

Choosed "disable" prefix so that CALLOC keeps tiling enabled
by default.

Tested with GStreamer on a laptop that has 2 GPUs:
1- gstvaapidecode:
HW decoding and dmabuf export with nouveau driver on
Nvidia GPU.
2- glimagesink:
EGLImage imports dmabuf on Intel GPU.

Note that tiling is working if 1 and 2 are done on the same GPU.
So it is up to the application to set or not the flag:
VA_SURFACE_EXTBUF_DESC_ENABLE_TILING

Signed-off-by: Julien Isorce >



Thx for the review.

NAK, it won't be possible to use the resulting video buffer with
hardware decoding on AMD hardware.


But I restrict to these format:

+case VA_FOURCC_RGBA:
+case VA_FOURCC_RGBX:
+case VA_FOURCC_BGRA:
+case VA_FOURCC_BGRX:

So if the vaapi user request a linear layout, it will fail if not one 
of these formats. So basically for now it requires vpp.


Yeah, ok that should work for now but is clearly not a good idea.



Please add a bind flag to struct pipe_video_buffer instead so that
we can specify if linear layout is requested or not.


Do you mean that resource = pscreen->resource_create(pscreen, ) 
does not honor the bind flag of the template.
Maybe I can just checked if it was effective after that call, i.e. 
checking presence of PIPE_BIND_LINEAR in

resources[0]->bind.


No, for resource_create() the flag should be honored. But we shouldn't 
be using resource_create() to create the different planes of video 
buffers if possible. We should use create_video_buffer() on the pipe 
context if possible.




This way the hardware driver can still reject the request if this
would result in a surface which can't be decoded to.


For now it requires vpp since I explicitly restricted linear layout 
request to the rgbs format above. The reason behind is
that vaapi is limited to export 1 fd per surface. Problem is that for 
at least nouveau, it uses 1 pipe resource per plane, and

NV12 has 2 planes.

In the spec the problem comes from the fact that a VAImage has only 
one VABufferID. It would require to define
a new VABufferType which represents an array of VAImageBufferType, 
something like that.


Yeah, I know that is one of the many deficits VA-API unfortunately has. 
It should work with the Radeon implementation, but only because UVD 
requires both planes to be in the same buffer object.





To go back to "add a bind flag to struct pipe_video_buffer instead ", 
the alternative is to bring back the first version
of the patch but according to the first review, it was duplication of 
bind flag between pipe_video_buffer
and pipe_resource so it would require quite of big of refactoring 
unless I miss understood the comment.


I obviously have missed that discussion, but yes that is exactly the way 
we should go. The duplication is unproblematic as far as I can see.


Also in vdpau, the function vlVdpOutputSurfaceCreate is using 
PIPE_BIND_LINEAR flag and resource_create call,

like in the v2 of my patch.


Yeah, but that isn't a video buffer you try to create here. VA-API 
unfortunately doesn't properly distinct between those two things.


Regards,
Christian.



Not sure if I replied to your concerns, let me know.

Thx
Julien


Regards,
Christian.


---
  src/gallium/state_trackers/va/surface.c | 60
-
  1 file changed, 59 insertions(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/va/surface.c
b/src/gallium/state_trackers/va/surface.c
index 8a6a397..b04ced4 100644
--- a/src/gallium/state_trackers/va/surface.c
+++ b/src/gallium/state_trackers/va/surface.c
@@ -507,7 +507,9 @@ vlVaCreateSurfaces2(VADriverContextP ctx,
unsigned int format,
 int i;
 int memory_type;
 int expected_fourcc;
+   int linear_layout;
 VAStatus vaStatus;
+   const enum pipe_format *resource_formats;
   if (!ctx)
return VA_STATUS_ERROR_INVALID_CONTEXT;
@@ -529,6 +531,7 @@ vlVaCreateSurfaces2(VADriverContextP ctx,
unsigned int format,
 memory_attibute = NULL;
 memory_type = VA_SURFACE_ATTRIB_MEM_TYPE_VA;
 expected_fourcc = 0;
+   resource_formats = NULL;
   for (i = 0; i < num_attribs && attrib_list; 

Re: [Mesa-dev] [PATCH 00/12] RadeonSI: DCC mipmapping support & improvements

2016-06-08 Thread
Hello

Mesa 12.1.0-devel (git-8c3ecde) has rendering issues when running
"glxgears -samples N" with N >= 1 on R9 390. It was running ok
yesterday/previously.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] st/va: ensure linear memory for dmabuf

2016-06-08 Thread Julien Isorce
On 8 June 2016 at 09:16, Christian König  wrote:

> Am 02.06.2016 um 16:00 schrieb Julien Isorce:
>
>> In order to do zero-copy between two different devices
>> the memory should not be tiled.
>>
>> This is currently no way to set pipe_resource template's flag
>> from pipe_video_buffer template. So disabled_tiling is added.
>>
>> Choosed "disable" prefix so that CALLOC keeps tiling enabled
>> by default.
>>
>> Tested with GStreamer on a laptop that has 2 GPUs:
>> 1- gstvaapidecode:
>> HW decoding and dmabuf export with nouveau driver on Nvidia GPU.
>> 2- glimagesink:
>> EGLImage imports dmabuf on Intel GPU.
>>
>> Note that tiling is working if 1 and 2 are done on the same GPU.
>> So it is up to the application to set or not the flag:
>> VA_SURFACE_EXTBUF_DESC_ENABLE_TILING
>>
>> Signed-off-by: Julien Isorce 
>>
>
>
Thx for the review.


> NAK, it won't be possible to use the resulting video buffer with hardware
> decoding on AMD hardware.
>

But I restrict to these format:

+case VA_FOURCC_RGBA:
+case VA_FOURCC_RGBX:
+case VA_FOURCC_BGRA:
+case VA_FOURCC_BGRX:

So if the vaapi user request a linear layout, it will fail if not one of
these formats. So basically for now it requires vpp.


>
> Please add a bind flag to struct pipe_video_buffer instead so that we can
> specify if linear layout is requested or not.
>

Do you mean that resource = pscreen->resource_create(pscreen, ) does
not honor the bind flag of the template.
Maybe I can just checked if it was effective after that call, i.e. checking
presence of PIPE_BIND_LINEAR in
resources[0]->bind.


>
> This way the hardware driver can still reject the request if this would
> result in a surface which can't be decoded to.
>

For now it requires vpp since I explicitly restricted linear layout request
to the rgbs format above. The reason behind is
that vaapi is limited to export 1 fd per surface. Problem is that for at
least nouveau, it uses 1 pipe resource per plane, and
NV12 has 2 planes.

In the spec the problem comes from the fact that a VAImage has only one
VABufferID. It would require to define
a new VABufferType which represents an array of VAImageBufferType,
something like that.


To go back to "add a bind flag to struct pipe_video_buffer instead ", the
alternative is to bring back the first version
of the patch but according to the first review, it was duplication of bind
flag between pipe_video_buffer
and pipe_resource so it would require quite of big of refactoring unless I
miss understood the comment.
Also in vdpau, the function vlVdpOutputSurfaceCreate is using
PIPE_BIND_LINEAR flag and resource_create call,
like in the v2 of my patch.

Not sure if I replied to your concerns, let me know.

Thx
Julien


>
> Regards,
> Christian.
>
>
> ---
>>   src/gallium/state_trackers/va/surface.c | 60
>> -
>>   1 file changed, 59 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/gallium/state_trackers/va/surface.c
>> b/src/gallium/state_trackers/va/surface.c
>> index 8a6a397..b04ced4 100644
>> --- a/src/gallium/state_trackers/va/surface.c
>> +++ b/src/gallium/state_trackers/va/surface.c
>> @@ -507,7 +507,9 @@ vlVaCreateSurfaces2(VADriverContextP ctx, unsigned
>> int format,
>>  int i;
>>  int memory_type;
>>  int expected_fourcc;
>> +   int linear_layout;
>>  VAStatus vaStatus;
>> +   const enum pipe_format *resource_formats;
>>if (!ctx)
>> return VA_STATUS_ERROR_INVALID_CONTEXT;
>> @@ -529,6 +531,7 @@ vlVaCreateSurfaces2(VADriverContextP ctx, unsigned
>> int format,
>>  memory_attibute = NULL;
>>  memory_type = VA_SURFACE_ATTRIB_MEM_TYPE_VA;
>>  expected_fourcc = 0;
>> +   resource_formats = NULL;
>>for (i = 0; i < num_attribs && attrib_list; i++) {
>> if ((attrib_list[i].type == VASurfaceAttribPixelFormat) &&
>> @@ -569,8 +572,27 @@ vlVaCreateSurfaces2(VADriverContextP ctx, unsigned
>> int format,
>> return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
>>  }
>>   +   /* The application will clear the TILING flag when the surface is
>> +* intended to be exported as dmabuf. */
>> +   linear_layout = memory_attibute &&
>> +  !(memory_attibute->flags & VA_SURFACE_EXTBUF_DESC_ENABLE_TILING);
>> +
>>  switch (memory_type) {
>>  case VA_SURFACE_ATTRIB_MEM_TYPE_VA:
>> +  if (linear_layout) {
>> + switch (expected_fourcc) {
>> +case VA_FOURCC_RGBA:
>> +case VA_FOURCC_RGBX:
>> +case VA_FOURCC_BGRA:
>> +case VA_FOURCC_BGRX:
>> +   if (memory_attibute->num_planes != 1)
>> +  return VA_STATUS_ERROR_INVALID_PARAMETER;
>> +   break;
>> +default:
>> +   return VA_STATUS_ERROR_INVALID_PARAMETER;
>> + }
>> +  }
>> +
>> break;
>>  case VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME:
>> if (!memory_attibute)
>> @@ -587,6 

Re: [Mesa-dev] [PATCH] st/va: it is valid to release the VABuffer of an exported resource

2016-06-08 Thread Christian König
I'm clearly not deep enough into VA-API to judge if that is correct or 
not, but it sounds sane to me.


So feel free to add an Acked-by: Christian König 
 on the patch.


Cheers,
Christian.

Am 08.06.2016 um 13:13 schrieb Julien Isorce:

Hi Christian,

Thx for the review.

pipe_resource_reference(, NULL)  will decrement reference counting 
(p_atomic_dec  res->count). But the va surface still has the initial 
reference since it created that resource.
So calling vaDestroyImage on a derived image will call VaDestroyBuffer 
but the decrementation wont't reach 0.


It is just wrong that vlVaDestroyBuffer relies on the export_refcount 
flag. I also compared with vaapi intel driver and they have same flag 
and it is not present in their vaDestroyBuffer.


Cheers
Julien


On 8 June 2016 at 09:22, Christian König > wrote:


Am 02.06.2016 um 16:03 schrieb Julien Isorce:

Signed-off-by: Julien Isorce >


Actually I'm not sure if that is correct.

If you release the VABuffer of an exported resource you won't be
able to properly close the handle with vlVaReleaseBufferHandle().

On the other hand the semantic VA requires for
vlVaAcquireBufferHandle() and vlVaReleaseBufferHandle() is
complete nonsense for DMA-buf handles.

Christian.

---
  src/gallium/state_trackers/va/buffer.c | 8 +---
  1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/src/gallium/state_trackers/va/buffer.c
b/src/gallium/state_trackers/va/buffer.c
index 2fd8661..7d3167b 100644
--- a/src/gallium/state_trackers/va/buffer.c
+++ b/src/gallium/state_trackers/va/buffer.c
@@ -192,14 +192,8 @@ vlVaDestroyBuffer(VADriverContextP ctx,
VABufferID buf_id)
return VA_STATUS_ERROR_INVALID_BUFFER;
 }
  -   if (buf->derived_surface.resource) {
-  if (buf->export_refcount > 0) {
- pipe_mutex_unlock(drv->mutex);
- return VA_STATUS_ERROR_INVALID_BUFFER;
-  }
-
+   if (buf->derived_surface.resource)
pipe_resource_reference(>derived_surface.resource, NULL);
-   }
   FREE(buf->data);
 FREE(buf);


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org 
https://lists.freedesktop.org/mailman/listinfo/mesa-dev




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v4 2/2] st/vdpau: implement luma keyring

2016-06-08 Thread Christian König
With the type fixed both patches are Reviewed-by: Christian König 



If there aren't any more objections I'm going to push the patches now.

Regards,
Christian.

Am 08.06.2016 um 11:28 schrieb Michel Dänzer:

AFAIK the term is "luma keying", not "luma keyring".




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] st/va: it is valid to release the VABuffer of an exported resource

2016-06-08 Thread Julien Isorce
Hi Christian,

Thx for the review.

pipe_resource_reference(, NULL)  will decrement reference counting
(p_atomic_dec  res->count). But the va surface still has the initial
reference since it created that resource.
So calling vaDestroyImage on a derived image will call VaDestroyBuffer but
the decrementation wont't reach 0.

It is just wrong that vlVaDestroyBuffer relies on the export_refcount flag.
I also compared with vaapi intel driver and they have same flag and it is
not present in their vaDestroyBuffer.

Cheers
Julien


On 8 June 2016 at 09:22, Christian König  wrote:

> Am 02.06.2016 um 16:03 schrieb Julien Isorce:
>
>> Signed-off-by: Julien Isorce 
>>
>
> Actually I'm not sure if that is correct.
>
> If you release the VABuffer of an exported resource you won't be able to
> properly close the handle with vlVaReleaseBufferHandle().
>
> On the other hand the semantic VA requires for vlVaAcquireBufferHandle()
> and vlVaReleaseBufferHandle() is complete nonsense for DMA-buf handles.
>
> Christian.
>
> ---
>>   src/gallium/state_trackers/va/buffer.c | 8 +---
>>   1 file changed, 1 insertion(+), 7 deletions(-)
>>
>> diff --git a/src/gallium/state_trackers/va/buffer.c
>> b/src/gallium/state_trackers/va/buffer.c
>> index 2fd8661..7d3167b 100644
>> --- a/src/gallium/state_trackers/va/buffer.c
>> +++ b/src/gallium/state_trackers/va/buffer.c
>> @@ -192,14 +192,8 @@ vlVaDestroyBuffer(VADriverContextP ctx, VABufferID
>> buf_id)
>> return VA_STATUS_ERROR_INVALID_BUFFER;
>>  }
>>   -   if (buf->derived_surface.resource) {
>> -  if (buf->export_refcount > 0) {
>> - pipe_mutex_unlock(drv->mutex);
>> - return VA_STATUS_ERROR_INVALID_BUFFER;
>> -  }
>> -
>> +   if (buf->derived_surface.resource)
>> pipe_resource_reference(>derived_surface.resource, NULL);
>> -   }
>>FREE(buf->data);
>>  FREE(buf);
>>
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v4] st/vdpau: implement luma keying

2016-06-08 Thread Nayan Deshmukh
Signed-off-by: Nayan Deshmukh 
---
 src/gallium/state_trackers/vdpau/mixer.c | 45 ++--
 src/gallium/state_trackers/vdpau/vdpau_private.h |  6 +++-
 2 files changed, 39 insertions(+), 12 deletions(-)

diff --git a/src/gallium/state_trackers/vdpau/mixer.c 
b/src/gallium/state_trackers/vdpau/mixer.c
index 1070e96..b12a580 100644
--- a/src/gallium/state_trackers/vdpau/mixer.c
+++ b/src/gallium/state_trackers/vdpau/mixer.c
@@ -92,7 +92,6 @@ vlVdpVideoMixerCreate(VdpDevice device,
   case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L8:
   case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L9:
   case VDP_VIDEO_MIXER_FEATURE_INVERSE_TELECINE:
-  case VDP_VIDEO_MIXER_FEATURE_LUMA_KEY:
  break;
 
   case VDP_VIDEO_MIXER_FEATURE_DEINTERLACE_TEMPORAL:
@@ -107,6 +106,10 @@ vlVdpVideoMixerCreate(VdpDevice device,
  vmixer->noise_reduction.supported = true;
  break;
 
+  case VDP_VIDEO_MIXER_FEATURE_LUMA_KEY:
+ vmixer->luma_key.supported = true;
+ break;
+
   default: goto no_params;
   }
}
@@ -148,8 +151,8 @@ vlVdpVideoMixerCreate(VdpDevice device,
 vmixer->video_height, max_size);
   goto no_params;
}
-   vmixer->luma_key_min = 0.f;
-   vmixer->luma_key_max = 1.f;
+   vmixer->luma_key.luma_min = 1.0f;
+   vmixer->luma_key.luma_max = 0.0f;
pipe_mutex_unlock(dev->mutex);
 
return VDP_STATUS_OK;
@@ -490,7 +493,6 @@ vlVdpVideoMixerGetFeatureSupport(VdpVideoMixer mixer,
   case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L8:
   case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L9:
   case VDP_VIDEO_MIXER_FEATURE_INVERSE_TELECINE:
-  case VDP_VIDEO_MIXER_FEATURE_LUMA_KEY:
  feature_supports[i] = false;
  break;
 
@@ -506,6 +508,10 @@ vlVdpVideoMixerGetFeatureSupport(VdpVideoMixer mixer,
  feature_supports[i] = vmixer->noise_reduction.supported;
  break;
 
+  case VDP_VIDEO_MIXER_FEATURE_LUMA_KEY:
+ feature_supports[i] = vmixer->luma_key.supported;
+ break;
+
   default:
  return VDP_STATUS_INVALID_VIDEO_MIXER_FEATURE;
   }
@@ -548,7 +554,6 @@ vlVdpVideoMixerSetFeatureEnables(VdpVideoMixer mixer,
   case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L8:
   case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L9:
   case VDP_VIDEO_MIXER_FEATURE_INVERSE_TELECINE:
-  case VDP_VIDEO_MIXER_FEATURE_LUMA_KEY:
  break;
 
   case VDP_VIDEO_MIXER_FEATURE_DEINTERLACE_TEMPORAL:
@@ -566,6 +571,13 @@ vlVdpVideoMixerSetFeatureEnables(VdpVideoMixer mixer,
  vlVdpVideoMixerUpdateNoiseReductionFilter(vmixer);
  break;
 
+  case VDP_VIDEO_MIXER_FEATURE_LUMA_KEY:
+ vmixer->luma_key.enabled = feature_enables[i];
+ if (!debug_get_bool_option("G3DVL_NO_CSC", FALSE))
+vl_compositor_set_csc_matrix(>cstate, (const vl_csc_matrix 
*)>csc,
+ vmixer->luma_key.luma_min, 
vmixer->luma_key.luma_max);
+ break;
+
   default:
  pipe_mutex_unlock(vmixer->device->mutex);
  return VDP_STATUS_INVALID_VIDEO_MIXER_FEATURE;
@@ -610,7 +622,6 @@ vlVdpVideoMixerGetFeatureEnables(VdpVideoMixer mixer,
   case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L8:
   case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L9:
   case VDP_VIDEO_MIXER_FEATURE_INVERSE_TELECINE:
-  case VDP_VIDEO_MIXER_FEATURE_LUMA_KEY:
  break;
 
   case VDP_VIDEO_MIXER_FEATURE_SHARPNESS:
@@ -621,6 +632,10 @@ vlVdpVideoMixerGetFeatureEnables(VdpVideoMixer mixer,
  feature_enables[i] = vmixer->noise_reduction.enabled;
  break;
 
+  case VDP_VIDEO_MIXER_FEATURE_LUMA_KEY:
+ feature_enables[i] = vmixer->luma_key.enabled;
+ break;
+
   default:
  return VDP_STATUS_INVALID_VIDEO_MIXER_FEATURE;
   }
@@ -671,7 +686,8 @@ vlVdpVideoMixerSetAttributeValues(VdpVideoMixer mixer,
  else
 memcpy(vmixer->csc, vdp_csc, sizeof(vl_csc_matrix));
  if (!debug_get_bool_option("G3DVL_NO_CSC", FALSE))
-vl_compositor_set_csc_matrix(>cstate, (const vl_csc_matrix 
*)>csc, 1.0f, 0.0f);
+vl_compositor_set_csc_matrix(>cstate, (const vl_csc_matrix 
*)>csc,
+ vmixer->luma_key.luma_min, 
vmixer->luma_key.luma_max);
  break;
 
   case VDP_VIDEO_MIXER_ATTRIBUTE_NOISE_REDUCTION_LEVEL:
@@ -692,15 +708,22 @@ vlVdpVideoMixerSetAttributeValues(VdpVideoMixer mixer,
 ret = VDP_STATUS_INVALID_VALUE;
 goto fail;
  }
- vmixer->luma_key_min = val;
+ vmixer->luma_key.luma_min = val;
+ if (!debug_get_bool_option("G3DVL_NO_CSC", FALSE))
+vl_compositor_set_csc_matrix(>cstate, (const vl_csc_matrix 
*)>csc,
+ vmixer->luma_key.luma_min, 
vmixer->luma_key.luma_max);
  break;
+
 

Re: [Mesa-dev] [PATCH v4 2/2] st/vdpau: implement luma keyring

2016-06-08 Thread Michel Dänzer

AFAIK the term is "luma keying", not "luma keyring".


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v4 1/2] vl: Apply luma key filter before CSC conversion

2016-06-08 Thread Nayan Deshmukh
Apply the luma key filter to the YCbCr values during the CSC conversion
in video buffer shader. The initial values of max and min luma are set
to opposite values to disable the filter initially and will be set when
enabling it.

Add extra parmeters min and max luma for the luma key filter in
vl_compositor_set_csc_matrix in va, xvmc. Setting them
to opposite value 1.f and 0.f respectively won't effect the CSC
conversion

v2: -Squash 1,2 and 3 into one patch to avoid breaking build of
other components. (Christian)
-use ureg_swizzle. (Christian)
-change name of the variables. (Christian)

v3: -Squash all patches in one to avoid breaking of build. (Emil)
-wrap functions properly. (Emil)
-use 0.0f and 1.0f instead of 0.f and 1.f respectively. (Emil)

v4: -Divide it in two patches one which introduces the functionality
 and assigs dummy values to the changed functions and second which
 implements the lumakey filter. (Christian)
-use ureg_scalar instead ureg_swizzle. (Christian)

Signed-off-by: Nayan Deshmukh 
---
 src/gallium/auxiliary/vl/vl_compositor.c | 44 
 src/gallium/auxiliary/vl/vl_compositor.h |  4 ++-
 src/gallium/state_trackers/va/context.c  |  2 +-
 src/gallium/state_trackers/vdpau/mixer.c |  4 +--
 src/gallium/state_trackers/vdpau/output.c|  4 +--
 src/gallium/state_trackers/xvmc/attributes.c |  2 +-
 src/gallium/state_trackers/xvmc/context.c|  2 +-
 7 files changed, 42 insertions(+), 20 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c 
b/src/gallium/auxiliary/vl/vl_compositor.c
index acb2f4f..1a383f2 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -132,8 +132,10 @@ create_frag_shader_video_buffer(struct vl_compositor *c)
struct ureg_src tc;
struct ureg_src csc[3];
struct ureg_src sampler[3];
+   struct ureg_src lumakey;
struct ureg_dst texel;
struct ureg_dst fragment;
+   struct ureg_dst temp[2];
unsigned i;
 
shader = ureg_create(PIPE_SHADER_FRAGMENT);
@@ -145,6 +147,11 @@ create_frag_shader_video_buffer(struct vl_compositor *c)
   csc[i] = ureg_DECL_constant(shader, i);
   sampler[i] = ureg_DECL_sampler(shader, i);
}
+
+   for (i = 0; i < 2; ++i)
+  temp[i] = ureg_DECL_temporary(shader);
+
+   lumakey = ureg_DECL_constant(shader, 3);
texel = ureg_DECL_temporary(shader);
fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
@@ -160,7 +167,17 @@ create_frag_shader_video_buffer(struct vl_compositor *c)
for (i = 0; i < 3; ++i)
   ureg_DP4(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X << i), 
csc[i], ureg_src(texel));
 
-   ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), 
ureg_imm1f(shader, 1.0f));
+   ureg_MOV(shader, ureg_writemask(temp[0], TGSI_WRITEMASK_W),
+ureg_scalar(ureg_src(texel), TGSI_SWIZZLE_Z));
+   ureg_SLE(shader, ureg_writemask(temp[1],TGSI_WRITEMASK_W),
+ureg_src(temp[0]), ureg_scalar(lumakey, TGSI_SWIZZLE_X));
+   ureg_SGT(shader, ureg_writemask(temp[0],TGSI_WRITEMASK_W),
+ureg_src(temp[0]), ureg_scalar(lumakey, TGSI_SWIZZLE_Y));
+   ureg_MAX(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W),
+ureg_src(temp[0]), ureg_src(temp[1]));
+
+   for (i = 0; i < 2; ++i)
+   ureg_release_temporary(shader, temp[i]);
 
ureg_release_temporary(shader, texel);
ureg_END(shader);
@@ -852,20 +869,23 @@ vl_compositor_cleanup(struct vl_compositor *c)
 }
 
 void
-vl_compositor_set_csc_matrix(struct vl_compositor_state *s, vl_csc_matrix 
const *matrix)
+vl_compositor_set_csc_matrix(struct vl_compositor_state *s,
+ vl_csc_matrix const *matrix,
+ float luma_min, float luma_max)
 {
struct pipe_transfer *buf_transfer;
 
assert(s);
 
-   memcpy
-   (
-  pipe_buffer_map(s->pipe, s->csc_matrix,
-  PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD_RANGE,
-  _transfer),
-  matrix,
-  sizeof(vl_csc_matrix)
-   );
+   float *ptr = pipe_buffer_map(s->pipe, s->csc_matrix,
+   PIPE_TRANSFER_WRITE | 
PIPE_TRANSFER_DISCARD_RANGE,
+   _transfer);
+
+   memcpy(ptr, matrix, sizeof(vl_csc_matrix));
+
+   ptr += sizeof(vl_csc_matrix)/sizeof(float);
+   ptr[0] = luma_min;
+   ptr[1] = luma_max;
 
pipe_buffer_unmap(s->pipe, buf_transfer);
 }
@@ -1142,13 +1162,13 @@ vl_compositor_init_state(struct vl_compositor_state *s, 
struct pipe_context *pip
   pipe->screen,
   PIPE_BIND_CONSTANT_BUFFER,
   PIPE_USAGE_DEFAULT,
-  sizeof(csc_matrix)
+  sizeof(csc_matrix) + 2*sizeof(float)
);
 
vl_compositor_clear_layers(s);
 
vl_csc_get_matrix(VL_CSC_COLOR_STANDARD_IDENTITY, NULL, true, _matrix);
-   vl_compositor_set_csc_matrix(s, (const 

[Mesa-dev] [PATCH v4 2/2] st/vdpau: implement luma keyring

2016-06-08 Thread Nayan Deshmukh
Signed-off-by: Nayan Deshmukh 
---
 src/gallium/state_trackers/vdpau/mixer.c | 45 ++--
 src/gallium/state_trackers/vdpau/vdpau_private.h |  6 +++-
 2 files changed, 39 insertions(+), 12 deletions(-)

diff --git a/src/gallium/state_trackers/vdpau/mixer.c 
b/src/gallium/state_trackers/vdpau/mixer.c
index 1070e96..b12a580 100644
--- a/src/gallium/state_trackers/vdpau/mixer.c
+++ b/src/gallium/state_trackers/vdpau/mixer.c
@@ -92,7 +92,6 @@ vlVdpVideoMixerCreate(VdpDevice device,
   case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L8:
   case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L9:
   case VDP_VIDEO_MIXER_FEATURE_INVERSE_TELECINE:
-  case VDP_VIDEO_MIXER_FEATURE_LUMA_KEY:
  break;
 
   case VDP_VIDEO_MIXER_FEATURE_DEINTERLACE_TEMPORAL:
@@ -107,6 +106,10 @@ vlVdpVideoMixerCreate(VdpDevice device,
  vmixer->noise_reduction.supported = true;
  break;
 
+  case VDP_VIDEO_MIXER_FEATURE_LUMA_KEY:
+ vmixer->luma_key.supported = true;
+ break;
+
   default: goto no_params;
   }
}
@@ -148,8 +151,8 @@ vlVdpVideoMixerCreate(VdpDevice device,
 vmixer->video_height, max_size);
   goto no_params;
}
-   vmixer->luma_key_min = 0.f;
-   vmixer->luma_key_max = 1.f;
+   vmixer->luma_key.luma_min = 1.0f;
+   vmixer->luma_key.luma_max = 0.0f;
pipe_mutex_unlock(dev->mutex);
 
return VDP_STATUS_OK;
@@ -490,7 +493,6 @@ vlVdpVideoMixerGetFeatureSupport(VdpVideoMixer mixer,
   case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L8:
   case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L9:
   case VDP_VIDEO_MIXER_FEATURE_INVERSE_TELECINE:
-  case VDP_VIDEO_MIXER_FEATURE_LUMA_KEY:
  feature_supports[i] = false;
  break;
 
@@ -506,6 +508,10 @@ vlVdpVideoMixerGetFeatureSupport(VdpVideoMixer mixer,
  feature_supports[i] = vmixer->noise_reduction.supported;
  break;
 
+  case VDP_VIDEO_MIXER_FEATURE_LUMA_KEY:
+ feature_supports[i] = vmixer->luma_key.supported;
+ break;
+
   default:
  return VDP_STATUS_INVALID_VIDEO_MIXER_FEATURE;
   }
@@ -548,7 +554,6 @@ vlVdpVideoMixerSetFeatureEnables(VdpVideoMixer mixer,
   case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L8:
   case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L9:
   case VDP_VIDEO_MIXER_FEATURE_INVERSE_TELECINE:
-  case VDP_VIDEO_MIXER_FEATURE_LUMA_KEY:
  break;
 
   case VDP_VIDEO_MIXER_FEATURE_DEINTERLACE_TEMPORAL:
@@ -566,6 +571,13 @@ vlVdpVideoMixerSetFeatureEnables(VdpVideoMixer mixer,
  vlVdpVideoMixerUpdateNoiseReductionFilter(vmixer);
  break;
 
+  case VDP_VIDEO_MIXER_FEATURE_LUMA_KEY:
+ vmixer->luma_key.enabled = feature_enables[i];
+ if (!debug_get_bool_option("G3DVL_NO_CSC", FALSE))
+vl_compositor_set_csc_matrix(>cstate, (const vl_csc_matrix 
*)>csc,
+ vmixer->luma_key.luma_min, 
vmixer->luma_key.luma_max);
+ break;
+
   default:
  pipe_mutex_unlock(vmixer->device->mutex);
  return VDP_STATUS_INVALID_VIDEO_MIXER_FEATURE;
@@ -610,7 +622,6 @@ vlVdpVideoMixerGetFeatureEnables(VdpVideoMixer mixer,
   case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L8:
   case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L9:
   case VDP_VIDEO_MIXER_FEATURE_INVERSE_TELECINE:
-  case VDP_VIDEO_MIXER_FEATURE_LUMA_KEY:
  break;
 
   case VDP_VIDEO_MIXER_FEATURE_SHARPNESS:
@@ -621,6 +632,10 @@ vlVdpVideoMixerGetFeatureEnables(VdpVideoMixer mixer,
  feature_enables[i] = vmixer->noise_reduction.enabled;
  break;
 
+  case VDP_VIDEO_MIXER_FEATURE_LUMA_KEY:
+ feature_enables[i] = vmixer->luma_key.enabled;
+ break;
+
   default:
  return VDP_STATUS_INVALID_VIDEO_MIXER_FEATURE;
   }
@@ -671,7 +686,8 @@ vlVdpVideoMixerSetAttributeValues(VdpVideoMixer mixer,
  else
 memcpy(vmixer->csc, vdp_csc, sizeof(vl_csc_matrix));
  if (!debug_get_bool_option("G3DVL_NO_CSC", FALSE))
-vl_compositor_set_csc_matrix(>cstate, (const vl_csc_matrix 
*)>csc, 1.0f, 0.0f);
+vl_compositor_set_csc_matrix(>cstate, (const vl_csc_matrix 
*)>csc,
+ vmixer->luma_key.luma_min, 
vmixer->luma_key.luma_max);
  break;
 
   case VDP_VIDEO_MIXER_ATTRIBUTE_NOISE_REDUCTION_LEVEL:
@@ -692,15 +708,22 @@ vlVdpVideoMixerSetAttributeValues(VdpVideoMixer mixer,
 ret = VDP_STATUS_INVALID_VALUE;
 goto fail;
  }
- vmixer->luma_key_min = val;
+ vmixer->luma_key.luma_min = val;
+ if (!debug_get_bool_option("G3DVL_NO_CSC", FALSE))
+vl_compositor_set_csc_matrix(>cstate, (const vl_csc_matrix 
*)>csc,
+ vmixer->luma_key.luma_min, 
vmixer->luma_key.luma_max);
  break;
+
 

Re: [Mesa-dev] [PATCH] st/va: it is valid to release the VABuffer of an exported resource

2016-06-08 Thread Christian König

Am 02.06.2016 um 16:03 schrieb Julien Isorce:

Signed-off-by: Julien Isorce 


Actually I'm not sure if that is correct.

If you release the VABuffer of an exported resource you won't be able to 
properly close the handle with vlVaReleaseBufferHandle().


On the other hand the semantic VA requires for vlVaAcquireBufferHandle() 
and vlVaReleaseBufferHandle() is complete nonsense for DMA-buf handles.


Christian.


---
  src/gallium/state_trackers/va/buffer.c | 8 +---
  1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/src/gallium/state_trackers/va/buffer.c 
b/src/gallium/state_trackers/va/buffer.c
index 2fd8661..7d3167b 100644
--- a/src/gallium/state_trackers/va/buffer.c
+++ b/src/gallium/state_trackers/va/buffer.c
@@ -192,14 +192,8 @@ vlVaDestroyBuffer(VADriverContextP ctx, VABufferID buf_id)
return VA_STATUS_ERROR_INVALID_BUFFER;
 }
  
-   if (buf->derived_surface.resource) {

-  if (buf->export_refcount > 0) {
- pipe_mutex_unlock(drv->mutex);
- return VA_STATUS_ERROR_INVALID_BUFFER;
-  }
-
+   if (buf->derived_surface.resource)
pipe_resource_reference(>derived_surface.resource, NULL);
-   }
  
 FREE(buf->data);

 FREE(buf);


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 96408] [PERF] SSO: dirty all stages when only one is updated. Trigger extra validations.

2016-06-08 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=96408

--- Comment #2 from gregory.hain...@gmail.com ---
Hum, when a new program is set, use_shader_program is called and the
_NEW_PROGRAM flag will be set.

use_shader_program()
{
  
  if (*target != shProg) {
  /* Program is current, flush it */
  if (shTarget == ctx->_Shader) {
 FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
  }
  
}

During the draw, the program update will be checked if _NEW_PROGRAM is enabled.
And new program will be bound accordingly

#0  st_bind_program (ctx=0x8801f88, target=34820, prog=0x8acfbf0) at
state_tracker/st_cb_program.c:59
#1  0xf460bd3c in update_program (ctx=0x8801f88) at main/state.c:262
#2  _mesa_update_state_locked (ctx=0x8801f88) at main/state.c:468
#3  0xf460c0c4 in _mesa_update_state (ctx=0x8801f88) at main/state.c:499
#4  0xf4501739 in _mesa_valid_to_render (ctx=0x8801f88, where=0xf4a901be
"glDrawArrays") at main/context.c:1935
#5  0xf44db08f in check_valid_to_render (function=0xf4a901be "glDrawArrays",
ctx=0x8801f88) at main/api_validate.c:44

So I have the feeling that "ctx->Driver.UseProgram" is kind of useless. Indeed,
I didn't manage to find the code for dri (not-gallium) drivers. So it is
probably undefined.

Perf wise, the removal of Driver.UseProgram yields a 10-15% speed increase on
my use case.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] st/va: use proper temp pipe_video_buffer template

2016-06-08 Thread Christian König

Am 02.06.2016 um 16:03 schrieb Julien Isorce:

Instead of changing the format on the existing template
which makes error handling not nice and confuses coverity.

CoverityID: 1337953

Signed-off-by: Julien Isorce 


Reviewed-by: Christian König 


---
  src/gallium/state_trackers/va/image.c | 8 
  1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/gallium/state_trackers/va/image.c 
b/src/gallium/state_trackers/va/image.c
index 92d014c..1b956e3 100644
--- a/src/gallium/state_trackers/va/image.c
+++ b/src/gallium/state_trackers/va/image.c
@@ -473,19 +473,19 @@ vlVaPutImage(VADriverContextP ctx, VASurfaceID surface, 
VAImageID image,
  
 if (format != surf->buffer->buffer_format) {

struct pipe_video_buffer *tmp_buf;
-  enum pipe_format old_surf_format = surf->templat.buffer_format;
+  struct pipe_video_buffer templat = surf->templat;
  
-  surf->templat.buffer_format = format;

-  tmp_buf = drv->pipe->create_video_buffer(drv->pipe, >templat);
+  templat.buffer_format = format;
+  tmp_buf = drv->pipe->create_video_buffer(drv->pipe, );
  
if (!tmp_buf) {

- surf->templat.buffer_format = old_surf_format;
   pipe_mutex_unlock(drv->mutex);
   return VA_STATUS_ERROR_ALLOCATION_FAILED;
}
  
surf->buffer->destroy(surf->buffer);

surf->buffer = tmp_buf;
+  surf->templat.buffer_format = format;
 }
  
 views = surf->buffer->get_sampler_view_planes(surf->buffer);


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   >