Re: [Mesa-dev] [PATCH 3/3] llvmpipe: add sse code for fixed position calculation

2016-01-04 Thread Jose Fonseca

On 02/01/16 20:39, srol...@vmware.com wrote:

From: Roland Scheidegger 

This is quite a few less instructions, albeit still do the 2 64bit muls
with scalar c code (they'd need way more shuffles, plus fixup for the signed
mul so it totally doesn't seem worth it - x86 can do 32x32->64bit signed
scalar muls natively just fine after all (even on 32bit).

(This still doesn't have a measurable performance impact in reality, although
profiler seems to say time spent in setup indeed has gone down by 10% or so
overall.)
---
  src/gallium/drivers/llvmpipe/lp_setup_tri.c | 58 +
  1 file changed, 50 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c 
b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index cb1d715..fefd1c1 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -65,11 +65,11 @@ fixed_to_float(int a)
  struct fixed_position {
 int32_t x[4];
 int32_t y[4];
-   int64_t area;
 int32_t dx01;
 int32_t dy01;
 int32_t dx20;
 int32_t dy20;
+   int64_t area;
  };


@@ -866,29 +866,71 @@ static void retry_triangle_ccw( struct lp_setup_context 
*setup,

  /**
   * Calculate fixed position data for a triangle
+ * It is unfortunate we need to do that here (as we need area
+ * calculated in fixed point), as there's quite some code duplication
+ * to what is done in the jit setup prog.
   */
  static inline void
-calc_fixed_position( struct lp_setup_context *setup,
- struct fixed_position* position,
- const float (*v0)[4],
- const float (*v1)[4],
- const float (*v2)[4])
+calc_fixed_position(struct lp_setup_context *setup,
+struct fixed_position* position,
+const float (*v0)[4],
+const float (*v1)[4],
+const float (*v2)[4])
  {
+   /*
+* The rounding may not be quite the same with PIPE_ARCH_SSE
+* (util_iround right now only does nearest/even on x87,
+* otherwise nearest/away-from-zero).
+* Both should be acceptable, I think.
+*/
+#if defined(PIPE_ARCH_SSE)
+   __m128d v0r, v1r, v2r;
+   __m128 vxy0xy2, vxy1xy0;
+   __m128i vxy0xy2i, vxy1xy0i;
+   __m128i dxdy0120, x0x2y0y2, x1x0y1y0, x0120, y0120;
+   __m128 pix_offset = _mm_set1_ps(setup->pixel_offset);
+   __m128 fixed_one = _mm_set1_ps((float)FIXED_ONE);
+   v0r = _mm_load_sd((const double *)v0[0]);
+   v1r = _mm_load_sd((const double *)v1[0]);
+   v2r = _mm_load_sd((const double *)v2[0]);
+   vxy0xy2 = (__m128)_mm_unpacklo_pd(v0r, v2r);
+   vxy1xy0 = (__m128)_mm_unpacklo_pd(v1r, v0r);
+   vxy0xy2 = _mm_sub_ps(vxy0xy2, pix_offset);
+   vxy1xy0 = _mm_sub_ps(vxy1xy0, pix_offset);
+   vxy0xy2 = _mm_mul_ps(vxy0xy2, fixed_one);
+   vxy1xy0 = _mm_mul_ps(vxy1xy0, fixed_one);
+   vxy0xy2i = _mm_cvtps_epi32(vxy0xy2);
+   vxy1xy0i = _mm_cvtps_epi32(vxy1xy0);
+   dxdy0120 = _mm_sub_epi32(vxy0xy2i, vxy1xy0i);
+   _mm_store_si128((__m128i *)>dx01, dxdy0120);
+   /*
+* For the mul, would need some more shuffles, plus emulation
+* for the signed mul (without sse41), so don't bother.
+*/
+   x0x2y0y2 = _mm_shuffle_epi32(vxy0xy2i, _MM_SHUFFLE(3,1,2,0));
+   x1x0y1y0 = _mm_shuffle_epi32(vxy1xy0i, _MM_SHUFFLE(3,1,2,0));
+   x0120 = _mm_unpacklo_epi32(x0x2y0y2, x1x0y1y0);
+   y0120 = _mm_unpackhi_epi32(x0x2y0y2, x1x0y1y0);
+   _mm_store_si128((__m128i *)>x[0], x0120);
+   _mm_store_si128((__m128i *)>y[0], y0120);
+
+#else
 position->x[0] = subpixel_snap(v0[0][0] - setup->pixel_offset);
 position->x[1] = subpixel_snap(v1[0][0] - setup->pixel_offset);
 position->x[2] = subpixel_snap(v2[0][0] - setup->pixel_offset);
-   position->x[3] = 0;
+   position->x[3] = 0; // should be unused

 position->y[0] = subpixel_snap(v0[0][1] - setup->pixel_offset);
 position->y[1] = subpixel_snap(v1[0][1] - setup->pixel_offset);
 position->y[2] = subpixel_snap(v2[0][1] - setup->pixel_offset);
-   position->y[3] = 0;
+   position->y[3] = 0; // should be unused

 position->dx01 = position->x[0] - position->x[1];
 position->dy01 = position->y[0] - position->y[1];

 position->dx20 = position->x[2] - position->x[0];
 position->dy20 = position->y[2] - position->y[0];
+#endif

 position->area = IMUL64(position->dx01, position->dy20) -
   IMUL64(position->dx20, position->dy01);



LGTM too.

Reviewed-by: Jose Fonseca 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/5] nvc0: add ARB_indirect_parameters support

2016-01-04 Thread Samuel Pitoiset

Did you write piglit tests for this new extension?
I don't see any tests which use MultiDrawArraysIndirectCountARB() or 
MultiDrawElementsIndirectCountARB().



Are you going to submit the rest of the series for nvc0?

On 01/02/2016 09:38 PM, Ilia Mirkin wrote:

I chose to make separate macros for this due to the additional
complexity and extra scratch usage.

Signed-off-by: Ilia Mirkin 
---
  docs/relnotes/11.2.0.html  |   1 +
  src/gallium/drivers/nouveau/nvc0/mme/com9097.mme   | 157 +
  src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h | 125 
  src/gallium/drivers/nouveau/nvc0/nvc0_macros.h |   4 +
  src/gallium/drivers/nouveau/nvc0/nvc0_screen.c |   4 +-
  src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c|  29 +++-
  6 files changed, 314 insertions(+), 6 deletions(-)

diff --git a/docs/relnotes/11.2.0.html b/docs/relnotes/11.2.0.html
index d31da8b..616c134 100644
--- a/docs/relnotes/11.2.0.html
+++ b/docs/relnotes/11.2.0.html
@@ -47,6 +47,7 @@ Note: some of the new features are only available with 
certain drivers.
  GL_ARB_base_instance on freedreno/a4xx
  GL_ARB_compute_shader on i965
  GL_ARB_copy_image on r600
+GL_ARB_indirect_parameters on nvc0
  GL_ARB_shader_draw_parameters on i965, nvc0
  GL_ARB_tessellation_shader on i965 and r600 (evergreen/cayman only)
  GL_ARB_texture_buffer_object_rgb32 on freedreno/a4xx
diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme 
b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
index 35355ed..4daa57d 100644
--- a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
+++ b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
@@ -334,3 +334,160 @@ dai_end:
 mov $r6 (add $r6 1)
 exit maddr 0x50e /* VB_INSTANCE_BASE to restore */
 send $r5
+
+/* NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT
+ *
+ * NOTE: Saves and restores VB_ELEMENT,INSTANCE_BASE.
+ * Forcefully sets VERTEX_ID_BASE to the value of VB_ELEMENT_BASE.
+ *
+ * arg = mode
+ * parm[0] = start_drawid
+ * parm[1] = numparams
+ * parm[2] = totaldraws
+ * parm[3 + 5n + 0] = count
+ * parm[3 + 5n + 1] = instance_count
+ * parm[3 + 5n + 2] = start
+ * parm[3 + 5n + 3] = index_bias
+ * parm[3 + 5n + 4] = start_instance
+ *
+ * SCRATCH[0] = saved VB_ELEMENT_BASE
+ * SCRATCH[1] = saved VB_INSTANCE_BASE
+ * SCRATCH[2] = draws left
+ */
+.section #mme9097_draw_elts_indirect_count
+   read $r6 0x50d /* VB_ELEMENT_BASE */
+   read $r7 0x50e /* VB_INSTANCE_BASE */
+   maddr 0x1d00
+   send $r6 /* SCRATCH[0] = VB_ELEMENT_BASE */
+   send $r7 /* SCRATCH[1] = VB_INSTANCE_BASE */
+   parm $r6 /* start_drawid */
+   parm $r7 /* numparams */
+   parm $r5 /* totaldraws */
+   mov $r5 (sub $r5 $r6) /* draws left */
+   braz $r5 #deic_runout
+   mov $r3 (extrinsrt 0x0 $r5 31 1 0) /* extract high bit */
+   branz $r3 #deic_runout
+   send $r5
+deic_draw_again:
+   parm $r3 /* count */
+   parm $r2 /* instance_count */
+   parm $r4 maddr 0x5f7 /* INDEX_BATCH_FIRST, start */
+   parm $r4 send $r4 /* index_bias, send start */
+   maddr 0x18e3 /* CB_POS */
+   send 0x180 /* 256 + 128 */
+   braz $r2 #deic_end
+   parm $r5 send $r4 /* start_instance, send index_bias */
+   send $r5 /* send start_instance */
+   send $r6 /* draw id */
+   maddr 0x150d /* VB_ELEMENT,INSTANCE_BASE */
+   send $r4
+   send $r5
+   maddr 0x446
+   send $r4
+   mov $r4 0x1
+   mov $r1 (extrinsrt $r1 0x0 0 1 26) /* clear INSTANCE_NEXT */
+deic_again:
+   maddr 0x586 /* VERTEX_BEGIN_GL */
+   send $r1 /* mode */
+   maddr 0x5f8 /* INDEX_BATCH_COUNT */
+   send $r3 /* count */
+   mov $r2 (sub $r2 $r4)
+   maddrsend 0x585 /* VERTEX_END_GL */
+   branz $r2 #deic_again
+   mov $r1 (extrinsrt $r1 $r4 0 1 26) /* set INSTANCE_NEXT */
+deic_end:
+   read $r5 0xd02
+   mov $r5 (add $r5 -1)
+   braz $r5 #deic_runout_check
+   mov $r7 (add $r7 -1)
+   maddr 0xd02
+   send $r5
+   branz $r7 #deic_draw_again
+   mov $r6 (add $r6 1)
+deic_restore:
+   read $r6 0xd00
+   read $r7 0xd01
+   maddr 0x150d /* VB_ELEMENT,INSTANCE_BASE */
+   send $r6
+   send $r7
+   exit maddr 0x446
+   send $r6
+deic_runout:
+   parm $r2
+   parm $r2
+   parm $r2
+   parm $r2
+   parm $r2
+   mov $r7 (add $r7 -1)
+deic_runout_check:
+   branz annul $r7 #deic_runout
+   bra annul #deic_restore
+
+/* NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT:
+ *
+ * NOTE: Saves and restores VB_INSTANCE_BASE.
+ *
+ * arg = mode
+ * parm[0] = start_drawid
+ * parm[1] = numparams
+ * parm[2] = totaldraws
+ * parm[3 + 4n + 0] = count
+ * parm[3 + 4n + 1] = instance_count
+ * parm[3 + 4n + 2] = start
+ * parm[3 + 4n + 3] = start_instance
+ *
+ * SCRATCH[0] = VB_INSTANCE_BASE
+ */
+.section #mme9097_draw_arrays_indirect_count
+   read $r5 0x50e /* VB_INSTANCE_BASE */
+   maddr 0xd00
+   parm $r6 send $r5 /* start_drawid, save VB_INSTANCE_BASE */
+   parm $r7 /* numparams */
+   parm $r5 /* totaldraws */
+   mov $r5 (sub $r5 $r6) /* draws left */
+   braz $r5 #daic_runout
+   mov $r3 (extrinsrt 0x0 $r5 31 

Re: [Mesa-dev] [PATCH] nir: few missing struct names

2016-01-04 Thread eocallaghan

Reviewed-by: Edward O'Callaghan 

On 2016-01-05 05:27, Rob Clark wrote:

From: Rob Clark 

nir.h is a bit inconsistent about 'typedef struct {} nir_foo' vs
'typedef struct nir_foo {} nir_foo'.  But missing struct name tags is
inconvenient when you need a fwd declaration without pulling in all
of nir.

So add missing struct name tag for nir_variable, and a couple other
spots where it would likely be useful.

Signed-off-by: Rob Clark 
---
 src/glsl/nir/nir.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 4286738..bedcc0d 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -139,7 +139,7 @@ typedef enum {
  * ir_variable - it should be easy to translate between the two.
  */

-typedef struct {
+typedef struct nir_variable {
struct exec_node node;

/**
@@ -349,7 +349,7 @@ typedef struct {
 #define nir_foreach_variable(var, var_list) \
foreach_list_typed(nir_variable, var, node, var_list)

-typedef struct {
+typedef struct nir_register {
struct exec_node node;

unsigned num_components; /** < number of vector components */
@@ -443,7 +443,7 @@ nir_instr_is_last(nir_instr *instr)
return 
exec_node_is_tail_sentinel(exec_node_get_next(>node));

 }

-typedef struct {
+typedef struct nir_ssa_def {
/** for debugging only, can be NULL */
const char* name;


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/5] nvc0: add ARB_indirect_parameters support

2016-01-04 Thread Ilia Mirkin
On Mon, Jan 4, 2016 at 3:04 PM, Samuel Pitoiset
 wrote:
> Did you write piglit tests for this new extension?
> I don't see any tests which use MultiDrawArraysIndirectCountARB() or
> MultiDrawElementsIndirectCountARB().

http://patchwork.freedesktop.org/patch/69334/

>
>
> Are you going to submit the rest of the series for nvc0?

Mmmmaybe. It's part of the ARB_multi_draw_indirect stuff.

>
>
> On 01/02/2016 09:38 PM, Ilia Mirkin wrote:
>>
>> I chose to make separate macros for this due to the additional
>> complexity and extra scratch usage.
>>
>> Signed-off-by: Ilia Mirkin 
>> ---
>>   docs/relnotes/11.2.0.html  |   1 +
>>   src/gallium/drivers/nouveau/nvc0/mme/com9097.mme   | 157
>> +
>>   src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h | 125
>> 
>>   src/gallium/drivers/nouveau/nvc0/nvc0_macros.h |   4 +
>>   src/gallium/drivers/nouveau/nvc0/nvc0_screen.c |   4 +-
>>   src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c|  29 +++-
>>   6 files changed, 314 insertions(+), 6 deletions(-)
>>
>> diff --git a/docs/relnotes/11.2.0.html b/docs/relnotes/11.2.0.html
>> index d31da8b..616c134 100644
>> --- a/docs/relnotes/11.2.0.html
>> +++ b/docs/relnotes/11.2.0.html
>> @@ -47,6 +47,7 @@ Note: some of the new features are only available with
>> certain drivers.
>>   GL_ARB_base_instance on freedreno/a4xx
>>   GL_ARB_compute_shader on i965
>>   GL_ARB_copy_image on r600
>> +GL_ARB_indirect_parameters on nvc0
>>   GL_ARB_shader_draw_parameters on i965, nvc0
>>   GL_ARB_tessellation_shader on i965 and r600 (evergreen/cayman
>> only)
>>   GL_ARB_texture_buffer_object_rgb32 on freedreno/a4xx
>> diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
>> b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
>> index 35355ed..4daa57d 100644
>> --- a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
>> +++ b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
>> @@ -334,3 +334,160 @@ dai_end:
>>  mov $r6 (add $r6 1)
>>  exit maddr 0x50e /* VB_INSTANCE_BASE to restore */
>>  send $r5
>> +
>> +/* NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT
>> + *
>> + * NOTE: Saves and restores VB_ELEMENT,INSTANCE_BASE.
>> + * Forcefully sets VERTEX_ID_BASE to the value of VB_ELEMENT_BASE.
>> + *
>> + * arg = mode
>> + * parm[0] = start_drawid
>> + * parm[1] = numparams
>> + * parm[2] = totaldraws
>> + * parm[3 + 5n + 0] = count
>> + * parm[3 + 5n + 1] = instance_count
>> + * parm[3 + 5n + 2] = start
>> + * parm[3 + 5n + 3] = index_bias
>> + * parm[3 + 5n + 4] = start_instance
>> + *
>> + * SCRATCH[0] = saved VB_ELEMENT_BASE
>> + * SCRATCH[1] = saved VB_INSTANCE_BASE
>> + * SCRATCH[2] = draws left
>> + */
>> +.section #mme9097_draw_elts_indirect_count
>> +   read $r6 0x50d /* VB_ELEMENT_BASE */
>> +   read $r7 0x50e /* VB_INSTANCE_BASE */
>> +   maddr 0x1d00
>> +   send $r6 /* SCRATCH[0] = VB_ELEMENT_BASE */
>> +   send $r7 /* SCRATCH[1] = VB_INSTANCE_BASE */
>> +   parm $r6 /* start_drawid */
>> +   parm $r7 /* numparams */
>> +   parm $r5 /* totaldraws */
>> +   mov $r5 (sub $r5 $r6) /* draws left */
>> +   braz $r5 #deic_runout
>> +   mov $r3 (extrinsrt 0x0 $r5 31 1 0) /* extract high bit */
>> +   branz $r3 #deic_runout
>> +   send $r5
>> +deic_draw_again:
>> +   parm $r3 /* count */
>> +   parm $r2 /* instance_count */
>> +   parm $r4 maddr 0x5f7 /* INDEX_BATCH_FIRST, start */
>> +   parm $r4 send $r4 /* index_bias, send start */
>> +   maddr 0x18e3 /* CB_POS */
>> +   send 0x180 /* 256 + 128 */
>> +   braz $r2 #deic_end
>> +   parm $r5 send $r4 /* start_instance, send index_bias */
>> +   send $r5 /* send start_instance */
>> +   send $r6 /* draw id */
>> +   maddr 0x150d /* VB_ELEMENT,INSTANCE_BASE */
>> +   send $r4
>> +   send $r5
>> +   maddr 0x446
>> +   send $r4
>> +   mov $r4 0x1
>> +   mov $r1 (extrinsrt $r1 0x0 0 1 26) /* clear INSTANCE_NEXT */
>> +deic_again:
>> +   maddr 0x586 /* VERTEX_BEGIN_GL */
>> +   send $r1 /* mode */
>> +   maddr 0x5f8 /* INDEX_BATCH_COUNT */
>> +   send $r3 /* count */
>> +   mov $r2 (sub $r2 $r4)
>> +   maddrsend 0x585 /* VERTEX_END_GL */
>> +   branz $r2 #deic_again
>> +   mov $r1 (extrinsrt $r1 $r4 0 1 26) /* set INSTANCE_NEXT */
>> +deic_end:
>> +   read $r5 0xd02
>> +   mov $r5 (add $r5 -1)
>> +   braz $r5 #deic_runout_check
>> +   mov $r7 (add $r7 -1)
>> +   maddr 0xd02
>> +   send $r5
>> +   branz $r7 #deic_draw_again
>> +   mov $r6 (add $r6 1)
>> +deic_restore:
>> +   read $r6 0xd00
>> +   read $r7 0xd01
>> +   maddr 0x150d /* VB_ELEMENT,INSTANCE_BASE */
>> +   send $r6
>> +   send $r7
>> +   exit maddr 0x446
>> +   send $r6
>> +deic_runout:
>> +   parm $r2
>> +   parm $r2
>> +   parm $r2
>> +   parm $r2
>> +   parm $r2
>> +   mov $r7 (add $r7 -1)
>> +deic_runout_check:
>> +   branz annul $r7 #deic_runout
>> +   bra annul #deic_restore
>> +
>> +/* NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT:
>> + *
>> + * NOTE: Saves and restores VB_INSTANCE_BASE.
>> + *
>> + * 

Re: [Mesa-dev] [PATCH 1/8] tgsi: add ureg support for image decls

2016-01-04 Thread Ilia Mirkin
On Mon, Jan 4, 2016 at 3:25 PM, Ilia Mirkin  wrote:
> On Mon, Jan 4, 2016 at 3:13 PM, Samuel Pitoiset
>  wrote:
>> On 01/03/2016 05:37 AM, Ilia Mirkin wrote:
>>> Signed-off-by: Ilia Mirkin 
>>> diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c
>>> b/src/gallium/auxiliary/tgsi/tgsi_strings.c
>>> index fd926b3..ae30399 100644
>>> --- a/src/gallium/auxiliary/tgsi/tgsi_strings.c
>>> +++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c
>>> @@ -54,8 +54,8 @@ static const char *tgsi_file_names[] =
>>>  "IMM",
>>>  "PRED",
>>>  "SV",
>>> -   "RES",
>>> -   "SVIEW"
>>> +   "IMAGE",
>>> +   "SVIEW",
>>>   };
>>
>>
>> You forgot to update src/gallium/tests/trivial/compute.c, and so you did
>> break all of those compute tests. Please don't break them because they are
>> useful for people which work on compute. :-)
>
> Can I update it by deleting it? Like I was saying, all that stuff
> needs to be reworked... since compute.c doesn't *actually* work with
> anything, and actively conflicts with plans for OpenGL things, I see
> no problem with just invalidating the interfaces it was using. (This
> is also why I was discouraging people from trying to do TGSI with
> compute without considering these things first.)

After some IRC discussion with Samuel we agreed that I'll just note
that I'm breaking the sole user of the RESOURCE TGSI in the commit,
and the test can be updated when someone has time to think about how
clover should plug into the new GL-facing interfaces being developed.

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/8] tgsi: add ureg support for image decls

2016-01-04 Thread Samuel Pitoiset



On 01/04/2016 09:25 PM, Ilia Mirkin wrote:

On Mon, Jan 4, 2016 at 3:13 PM, Samuel Pitoiset
 wrote:

On 01/03/2016 05:37 AM, Ilia Mirkin wrote:

Signed-off-by: Ilia Mirkin 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c
b/src/gallium/auxiliary/tgsi/tgsi_strings.c
index fd926b3..ae30399 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_strings.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c
@@ -54,8 +54,8 @@ static const char *tgsi_file_names[] =
  "IMM",
  "PRED",
  "SV",
-   "RES",
-   "SVIEW"
+   "IMAGE",
+   "SVIEW",
   };



You forgot to update src/gallium/tests/trivial/compute.c, and so you did
break all of those compute tests. Please don't break them because they are
useful for people which work on compute. :-)


Can I update it by deleting it? Like I was saying, all that stuff
needs to be reworked... since compute.c doesn't *actually* work with
anything, and actively conflicts with plans for OpenGL things, I see
no problem with just invalidating the interfaces it was using. (This
is also why I was discouraging people from trying to do TGSI with
compute without considering these things first.)


We discussed about that on IRC and the plan is to update compute.c once 
the interface will be more suitable (when BUFFER/IMAGE will be 
implemented) because this RES thing is weird and need to be removed.


This patch is :

Acked-by: Samuel Pitoiset 

Thanks!




diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index beb67fe..87c5dc1 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -348,7 +348,7 @@ static nv50_ir::DataFile translateFile(uint file)
  case TGSI_FILE_PREDICATE:   return nv50_ir::FILE_PREDICATE;
  case TGSI_FILE_IMMEDIATE:   return nv50_ir::FILE_IMMEDIATE;
  case TGSI_FILE_SYSTEM_VALUE:return nv50_ir::FILE_SYSTEM_VALUE;
-   case TGSI_FILE_RESOURCE:return nv50_ir::FILE_MEMORY_GLOBAL;
+   //case TGSI_FILE_RESOURCE:return nv50_ir::FILE_MEMORY_GLOBAL;



You definitely break global ressources here. What is your plan for that? Are
you going to fix them in upcoming patches?


What are global resources needed for? Anyways, if it's *actually*
necessary for anything (and I've yet to see what that might be), one
could have a GLOBAL bit on a BUFFER declaration, much like the ATOMIC
flag (and I plan on also having a SHARED flag you can set for
compute). The previous thing with fixed large indices was a pretty big
hack anyways.

   -ilia


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/8] tgsi: add ureg support for image decls

2016-01-04 Thread Samuel Pitoiset



On 01/03/2016 05:37 AM, Ilia Mirkin wrote:

Signed-off-by: Ilia Mirkin 
---
  src/gallium/auxiliary/tgsi/tgsi_build.c| 62 +
  src/gallium/auxiliary/tgsi/tgsi_dump.c | 10 +--
  src/gallium/auxiliary/tgsi/tgsi_parse.c|  4 +-
  src/gallium/auxiliary/tgsi/tgsi_parse.h|  2 +-
  src/gallium/auxiliary/tgsi/tgsi_strings.c  |  4 +-
  src/gallium/auxiliary/tgsi/tgsi_text.c | 10 +--
  src/gallium/auxiliary/tgsi/tgsi_ureg.c | 77 ++
  src/gallium/auxiliary/tgsi/tgsi_ureg.h |  7 ++
  src/gallium/drivers/ilo/shader/toy_tgsi.c  |  8 +--
  .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  | 12 +++-
  src/gallium/drivers/svga/svga_tgsi_vgpu10.c|  2 +
  src/gallium/include/pipe/p_shader_tokens.h |  7 +-
  12 files changed, 153 insertions(+), 52 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c 
b/src/gallium/auxiliary/tgsi/tgsi_build.c
index fdb7feb..bb9d0cb 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -259,36 +259,39 @@ tgsi_build_declaration_semantic(
 return ds;
  }

-static struct tgsi_declaration_resource
-tgsi_default_declaration_resource(void)
+static struct tgsi_declaration_image
+tgsi_default_declaration_image(void)
  {
-   struct tgsi_declaration_resource dr;
+   struct tgsi_declaration_image di;

-   dr.Resource = TGSI_TEXTURE_BUFFER;
-   dr.Raw = 0;
-   dr.Writable = 0;
-   dr.Padding = 0;
+   di.Resource = TGSI_TEXTURE_BUFFER;
+   di.Raw = 0;
+   di.Writable = 0;
+   di.Format = 0;
+   di.Padding = 0;

-   return dr;
+   return di;
  }

-static struct tgsi_declaration_resource
-tgsi_build_declaration_resource(unsigned texture,
-unsigned raw,
-unsigned writable,
-struct tgsi_declaration *declaration,
-struct tgsi_header *header)
+static struct tgsi_declaration_image
+tgsi_build_declaration_image(unsigned texture,
+ unsigned format,
+ unsigned raw,
+ unsigned writable,
+ struct tgsi_declaration *declaration,
+ struct tgsi_header *header)
  {
-   struct tgsi_declaration_resource dr;
+   struct tgsi_declaration_image di;

-   dr = tgsi_default_declaration_resource();
-   dr.Resource = texture;
-   dr.Raw = raw;
-   dr.Writable = writable;
+   di = tgsi_default_declaration_image();
+   di.Resource = texture;
+   di.Format = format;
+   di.Raw = raw;
+   di.Writable = writable;

 declaration_grow(declaration, header);

-   return dr;
+   return di;
  }

  static struct tgsi_declaration_sampler_view
@@ -364,7 +367,7 @@ tgsi_default_full_declaration( void )
 full_declaration.Range = tgsi_default_declaration_range();
 full_declaration.Semantic = tgsi_default_declaration_semantic();
 full_declaration.Interp = tgsi_default_declaration_interp();
-   full_declaration.Resource = tgsi_default_declaration_resource();
+   full_declaration.Image = tgsi_default_declaration_image();
 full_declaration.SamplerView = tgsi_default_declaration_sampler_view();
 full_declaration.Array = tgsi_default_declaration_array();

@@ -454,20 +457,21 @@ tgsi_build_full_declaration(
   header );
 }

-   if (full_decl->Declaration.File == TGSI_FILE_RESOURCE) {
-  struct tgsi_declaration_resource *dr;
+   if (full_decl->Declaration.File == TGSI_FILE_IMAGE) {
+  struct tgsi_declaration_image *di;

if (maxsize <= size) {
   return  0;
}
-  dr = (struct tgsi_declaration_resource *)[size];
+  di = (struct tgsi_declaration_image *)[size];
size++;

-  *dr = tgsi_build_declaration_resource(full_decl->Resource.Resource,
-full_decl->Resource.Raw,
-full_decl->Resource.Writable,
-declaration,
-header);
+  *di = tgsi_build_declaration_image(full_decl->Image.Resource,
+ full_decl->Image.Format,
+ full_decl->Image.Raw,
+ full_decl->Image.Writable,
+ declaration,
+ header);
 }

 if (full_decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c 
b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index e29ffb3..dad3839 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -348,12 +348,14 @@ iter_declaration(
}
 }

-   if (decl->Declaration.File == TGSI_FILE_RESOURCE) {
+   if 

Re: [Mesa-dev] [PATCH 2/8] ureg: add buffer support to ureg

2016-01-04 Thread Ilia Mirkin
RES is split into two -- BUFFER and IMAGE.

On Mon, Jan 4, 2016 at 3:22 PM, Samuel Pitoiset
 wrote:
> I assume this is going to replace the old RES stuff?
>
>
> On 01/03/2016 05:37 AM, Ilia Mirkin wrote:
>>
>> Signed-off-by: Ilia Mirkin 
>> ---
>>   src/gallium/auxiliary/tgsi/tgsi_dump.c |  5 +++
>>   src/gallium/auxiliary/tgsi/tgsi_strings.c  |  1 +
>>   src/gallium/auxiliary/tgsi/tgsi_text.c |  5 +++
>>   src/gallium/auxiliary/tgsi/tgsi_ureg.c | 52
>> ++
>>   src/gallium/auxiliary/tgsi/tgsi_ureg.h |  3 ++
>>   src/gallium/include/pipe/p_shader_tokens.h |  4 ++-
>>   6 files changed, 69 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c
>> b/src/gallium/auxiliary/tgsi/tgsi_dump.c
>> index dad3839..de3aae5 100644
>> --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
>> +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
>> @@ -359,6 +359,11 @@ iter_declaration(
>>TXT(", RAW");
>>  }
>>
>> +   if (decl->Declaration.File == TGSI_FILE_BUFFER) {
>> +  if (decl->Declaration.Atomic)
>> + TXT(", ATOMIC");
>> +   }
>> +
>>  if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
>> TXT(", ");
>> ENM(decl->SamplerView.Resource, tgsi_texture_names);
>> diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c
>> b/src/gallium/auxiliary/tgsi/tgsi_strings.c
>> index ae30399..c0dd044 100644
>> --- a/src/gallium/auxiliary/tgsi/tgsi_strings.c
>> +++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c
>> @@ -56,6 +56,7 @@ static const char *tgsi_file_names[] =
>>  "SV",
>>  "IMAGE",
>>  "SVIEW",
>> +   "BUFFER",
>>   };
>>
>>   const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] =
>> diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c
>> b/src/gallium/auxiliary/tgsi/tgsi_text.c
>> index a45ab90..d72d843 100644
>> --- a/src/gallium/auxiliary/tgsi/tgsi_text.c
>> +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
>> @@ -1350,6 +1350,11 @@ static boolean parse_declaration( struct
>> translate_ctx *ctx )
>>  decl.SamplerView.ReturnTypeX;
>>}
>>ctx->cur = cur;
>> +  } else if (file == TGSI_FILE_BUFFER) {
>> + if (str_match_nocase_whole(, "ATOMIC")) {
>> +decl.Declaration.Atomic = 1;
>> +ctx->cur = cur;
>> + }
>> } else {
>>if (str_match_nocase_whole(, "LOCAL")) {
>>   decl.Declaration.Local = 1;
>> diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
>> b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
>> index ee23df9..6d5092b 100644
>> --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
>> +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
>> @@ -165,6 +165,12 @@ struct ureg_program
>>  } image[PIPE_MAX_SHADER_IMAGES];
>>  unsigned nr_images;
>>
>> +   struct {
>> +  unsigned index;
>> +  bool atomic;
>> +   } buffer[PIPE_MAX_SHADER_BUFFERS];
>> +   unsigned nr_buffers;
>> +
>>  struct util_bitmask *free_temps;
>>  struct util_bitmask *local_temps;
>>  struct util_bitmask *decl_temps;
>> @@ -689,6 +695,29 @@ ureg_DECL_image(struct ureg_program *ureg,
>>  return reg;
>>   }
>>
>> +/* Allocate a new buffer.
>> + */
>> +struct ureg_src ureg_DECL_buffer(struct ureg_program *ureg, unsigned nr,
>> + bool atomic)
>> +{
>> +   struct ureg_src reg = ureg_src_register(TGSI_FILE_BUFFER, nr);
>> +   unsigned i;
>> +
>> +   for (i = 0; i < ureg->nr_buffers; i++)
>> +  if (ureg->buffer[i].index == nr)
>> + return reg;
>> +
>> +   if (i < PIPE_MAX_SHADER_BUFFERS) {
>> +  ureg->buffer[i].index = nr;
>> +  ureg->buffer[i].atomic = atomic;
>> +  ureg->nr_buffers++;
>> +  return reg;
>> +   }
>> +
>> +   assert(0);
>> +   return reg;
>> +}
>> +
>>   static int
>>   match_or_expand_immediate64( const unsigned *v,
>>int type,
>> @@ -1546,6 +1575,25 @@ emit_decl_image(struct ureg_program *ureg,
>>   }
>>
>>   static void
>> +emit_decl_buffer(struct ureg_program *ureg,
>> + unsigned index,
>> + bool atomic)
>> +{
>> +   union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2);
>> +
>> +   out[0].value = 0;
>> +   out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
>> +   out[0].decl.NrTokens = 2;
>> +   out[0].decl.File = TGSI_FILE_BUFFER;
>> +   out[0].decl.UsageMask = 0xf;
>> +   out[0].decl.Atomic = atomic;
>> +
>> +   out[1].value = 0;
>> +   out[1].decl_range.First = index;
>> +   out[1].decl_range.Last = index;
>> +}
>> +
>> +static void
>>   emit_immediate( struct ureg_program *ureg,
>>   const unsigned *v,
>>   unsigned type )
>> @@ -1713,6 +1761,10 @@ static void emit_decls( struct ureg_program *ureg )
>> ureg->image[i].raw);
>>  }
>>
>> +   for (i = 0; i < ureg->nr_buffers; i++) {
>> +  emit_decl_buffer(ureg, ureg->buffer[i].index,
>> ureg->buffer[i].atomic);
>> +   }
>> +

Re: [Mesa-dev] [PATCH 1/8] tgsi: add ureg support for image decls

2016-01-04 Thread Ilia Mirkin
On Mon, Jan 4, 2016 at 3:13 PM, Samuel Pitoiset
 wrote:
> On 01/03/2016 05:37 AM, Ilia Mirkin wrote:
>> Signed-off-by: Ilia Mirkin 
>> diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c
>> b/src/gallium/auxiliary/tgsi/tgsi_strings.c
>> index fd926b3..ae30399 100644
>> --- a/src/gallium/auxiliary/tgsi/tgsi_strings.c
>> +++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c
>> @@ -54,8 +54,8 @@ static const char *tgsi_file_names[] =
>>  "IMM",
>>  "PRED",
>>  "SV",
>> -   "RES",
>> -   "SVIEW"
>> +   "IMAGE",
>> +   "SVIEW",
>>   };
>
>
> You forgot to update src/gallium/tests/trivial/compute.c, and so you did
> break all of those compute tests. Please don't break them because they are
> useful for people which work on compute. :-)

Can I update it by deleting it? Like I was saying, all that stuff
needs to be reworked... since compute.c doesn't *actually* work with
anything, and actively conflicts with plans for OpenGL things, I see
no problem with just invalidating the interfaces it was using. (This
is also why I was discouraging people from trying to do TGSI with
compute without considering these things first.)

>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
>> index beb67fe..87c5dc1 100644
>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
>> @@ -348,7 +348,7 @@ static nv50_ir::DataFile translateFile(uint file)
>>  case TGSI_FILE_PREDICATE:   return nv50_ir::FILE_PREDICATE;
>>  case TGSI_FILE_IMMEDIATE:   return nv50_ir::FILE_IMMEDIATE;
>>  case TGSI_FILE_SYSTEM_VALUE:return nv50_ir::FILE_SYSTEM_VALUE;
>> -   case TGSI_FILE_RESOURCE:return nv50_ir::FILE_MEMORY_GLOBAL;
>> +   //case TGSI_FILE_RESOURCE:return nv50_ir::FILE_MEMORY_GLOBAL;
>
>
> You definitely break global ressources here. What is your plan for that? Are
> you going to fix them in upcoming patches?

What are global resources needed for? Anyways, if it's *actually*
necessary for anything (and I've yet to see what that might be), one
could have a GLOBAL bit on a BUFFER declaration, much like the ATOMIC
flag (and I plan on also having a SHARED flag you can set for
compute). The previous thing with fixed large indices was a pretty big
hack anyways.

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/5] nvc0: add ARB_indirect_parameters support

2016-01-04 Thread Samuel Pitoiset



On 01/04/2016 09:18 PM, Ilia Mirkin wrote:

On Mon, Jan 4, 2016 at 3:04 PM, Samuel Pitoiset
 wrote:

Did you write piglit tests for this new extension?
I don't see any tests which use MultiDrawArraysIndirectCountARB() or
MultiDrawElementsIndirectCountARB().


http://patchwork.freedesktop.org/patch/69334/


Okay, good!






Are you going to submit the rest of the series for nvc0?


Mmmmaybe. It's part of the ARB_multi_draw_indirect stuff.


This should be good if you want someone to have a look at it.






On 01/02/2016 09:38 PM, Ilia Mirkin wrote:


I chose to make separate macros for this due to the additional
complexity and extra scratch usage.

Signed-off-by: Ilia Mirkin 
---
   docs/relnotes/11.2.0.html  |   1 +
   src/gallium/drivers/nouveau/nvc0/mme/com9097.mme   | 157
+
   src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h | 125

   src/gallium/drivers/nouveau/nvc0/nvc0_macros.h |   4 +
   src/gallium/drivers/nouveau/nvc0/nvc0_screen.c |   4 +-
   src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c|  29 +++-
   6 files changed, 314 insertions(+), 6 deletions(-)

diff --git a/docs/relnotes/11.2.0.html b/docs/relnotes/11.2.0.html
index d31da8b..616c134 100644
--- a/docs/relnotes/11.2.0.html
+++ b/docs/relnotes/11.2.0.html
@@ -47,6 +47,7 @@ Note: some of the new features are only available with
certain drivers.
   GL_ARB_base_instance on freedreno/a4xx
   GL_ARB_compute_shader on i965
   GL_ARB_copy_image on r600
+GL_ARB_indirect_parameters on nvc0
   GL_ARB_shader_draw_parameters on i965, nvc0
   GL_ARB_tessellation_shader on i965 and r600 (evergreen/cayman
only)
   GL_ARB_texture_buffer_object_rgb32 on freedreno/a4xx
diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
index 35355ed..4daa57d 100644
--- a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
+++ b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
@@ -334,3 +334,160 @@ dai_end:
  mov $r6 (add $r6 1)
  exit maddr 0x50e /* VB_INSTANCE_BASE to restore */
  send $r5
+
+/* NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT
+ *
+ * NOTE: Saves and restores VB_ELEMENT,INSTANCE_BASE.
+ * Forcefully sets VERTEX_ID_BASE to the value of VB_ELEMENT_BASE.
+ *
+ * arg = mode
+ * parm[0] = start_drawid
+ * parm[1] = numparams
+ * parm[2] = totaldraws
+ * parm[3 + 5n + 0] = count
+ * parm[3 + 5n + 1] = instance_count
+ * parm[3 + 5n + 2] = start
+ * parm[3 + 5n + 3] = index_bias
+ * parm[3 + 5n + 4] = start_instance
+ *
+ * SCRATCH[0] = saved VB_ELEMENT_BASE
+ * SCRATCH[1] = saved VB_INSTANCE_BASE
+ * SCRATCH[2] = draws left
+ */
+.section #mme9097_draw_elts_indirect_count
+   read $r6 0x50d /* VB_ELEMENT_BASE */
+   read $r7 0x50e /* VB_INSTANCE_BASE */
+   maddr 0x1d00
+   send $r6 /* SCRATCH[0] = VB_ELEMENT_BASE */
+   send $r7 /* SCRATCH[1] = VB_INSTANCE_BASE */
+   parm $r6 /* start_drawid */
+   parm $r7 /* numparams */
+   parm $r5 /* totaldraws */
+   mov $r5 (sub $r5 $r6) /* draws left */
+   braz $r5 #deic_runout
+   mov $r3 (extrinsrt 0x0 $r5 31 1 0) /* extract high bit */
+   branz $r3 #deic_runout
+   send $r5
+deic_draw_again:
+   parm $r3 /* count */
+   parm $r2 /* instance_count */
+   parm $r4 maddr 0x5f7 /* INDEX_BATCH_FIRST, start */
+   parm $r4 send $r4 /* index_bias, send start */
+   maddr 0x18e3 /* CB_POS */
+   send 0x180 /* 256 + 128 */
+   braz $r2 #deic_end
+   parm $r5 send $r4 /* start_instance, send index_bias */
+   send $r5 /* send start_instance */
+   send $r6 /* draw id */
+   maddr 0x150d /* VB_ELEMENT,INSTANCE_BASE */
+   send $r4
+   send $r5
+   maddr 0x446
+   send $r4
+   mov $r4 0x1
+   mov $r1 (extrinsrt $r1 0x0 0 1 26) /* clear INSTANCE_NEXT */
+deic_again:
+   maddr 0x586 /* VERTEX_BEGIN_GL */
+   send $r1 /* mode */
+   maddr 0x5f8 /* INDEX_BATCH_COUNT */
+   send $r3 /* count */
+   mov $r2 (sub $r2 $r4)
+   maddrsend 0x585 /* VERTEX_END_GL */
+   branz $r2 #deic_again
+   mov $r1 (extrinsrt $r1 $r4 0 1 26) /* set INSTANCE_NEXT */
+deic_end:
+   read $r5 0xd02
+   mov $r5 (add $r5 -1)
+   braz $r5 #deic_runout_check
+   mov $r7 (add $r7 -1)
+   maddr 0xd02
+   send $r5
+   branz $r7 #deic_draw_again
+   mov $r6 (add $r6 1)
+deic_restore:
+   read $r6 0xd00
+   read $r7 0xd01
+   maddr 0x150d /* VB_ELEMENT,INSTANCE_BASE */
+   send $r6
+   send $r7
+   exit maddr 0x446
+   send $r6
+deic_runout:
+   parm $r2
+   parm $r2
+   parm $r2
+   parm $r2
+   parm $r2
+   mov $r7 (add $r7 -1)
+deic_runout_check:
+   branz annul $r7 #deic_runout
+   bra annul #deic_restore
+
+/* NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT:
+ *
+ * NOTE: Saves and restores VB_INSTANCE_BASE.
+ *
+ * arg = mode
+ * parm[0] = start_drawid
+ * parm[1] = numparams
+ * parm[2] = totaldraws
+ * parm[3 + 4n + 0] = count
+ * parm[3 + 4n + 1] = instance_count
+ * parm[3 + 4n + 2] = start
+ * parm[3 + 4n + 3] = start_instance
+ *
+ * SCRATCH[0] = 

Re: [Mesa-dev] [PATCH 2/8] ureg: add buffer support to ureg

2016-01-04 Thread Samuel Pitoiset

I assume this is going to replace the old RES stuff?

On 01/03/2016 05:37 AM, Ilia Mirkin wrote:

Signed-off-by: Ilia Mirkin 
---
  src/gallium/auxiliary/tgsi/tgsi_dump.c |  5 +++
  src/gallium/auxiliary/tgsi/tgsi_strings.c  |  1 +
  src/gallium/auxiliary/tgsi/tgsi_text.c |  5 +++
  src/gallium/auxiliary/tgsi/tgsi_ureg.c | 52 ++
  src/gallium/auxiliary/tgsi/tgsi_ureg.h |  3 ++
  src/gallium/include/pipe/p_shader_tokens.h |  4 ++-
  6 files changed, 69 insertions(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c 
b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index dad3839..de3aae5 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -359,6 +359,11 @@ iter_declaration(
   TXT(", RAW");
 }

+   if (decl->Declaration.File == TGSI_FILE_BUFFER) {
+  if (decl->Declaration.Atomic)
+ TXT(", ATOMIC");
+   }
+
 if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
TXT(", ");
ENM(decl->SamplerView.Resource, tgsi_texture_names);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c 
b/src/gallium/auxiliary/tgsi/tgsi_strings.c
index ae30399..c0dd044 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_strings.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c
@@ -56,6 +56,7 @@ static const char *tgsi_file_names[] =
 "SV",
 "IMAGE",
 "SVIEW",
+   "BUFFER",
  };

  const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] =
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c 
b/src/gallium/auxiliary/tgsi/tgsi_text.c
index a45ab90..d72d843 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -1350,6 +1350,11 @@ static boolean parse_declaration( struct translate_ctx 
*ctx )
 decl.SamplerView.ReturnTypeX;
   }
   ctx->cur = cur;
+  } else if (file == TGSI_FILE_BUFFER) {
+ if (str_match_nocase_whole(, "ATOMIC")) {
+decl.Declaration.Atomic = 1;
+ctx->cur = cur;
+ }
} else {
   if (str_match_nocase_whole(, "LOCAL")) {
  decl.Declaration.Local = 1;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c 
b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index ee23df9..6d5092b 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -165,6 +165,12 @@ struct ureg_program
 } image[PIPE_MAX_SHADER_IMAGES];
 unsigned nr_images;

+   struct {
+  unsigned index;
+  bool atomic;
+   } buffer[PIPE_MAX_SHADER_BUFFERS];
+   unsigned nr_buffers;
+
 struct util_bitmask *free_temps;
 struct util_bitmask *local_temps;
 struct util_bitmask *decl_temps;
@@ -689,6 +695,29 @@ ureg_DECL_image(struct ureg_program *ureg,
 return reg;
  }

+/* Allocate a new buffer.
+ */
+struct ureg_src ureg_DECL_buffer(struct ureg_program *ureg, unsigned nr,
+ bool atomic)
+{
+   struct ureg_src reg = ureg_src_register(TGSI_FILE_BUFFER, nr);
+   unsigned i;
+
+   for (i = 0; i < ureg->nr_buffers; i++)
+  if (ureg->buffer[i].index == nr)
+ return reg;
+
+   if (i < PIPE_MAX_SHADER_BUFFERS) {
+  ureg->buffer[i].index = nr;
+  ureg->buffer[i].atomic = atomic;
+  ureg->nr_buffers++;
+  return reg;
+   }
+
+   assert(0);
+   return reg;
+}
+
  static int
  match_or_expand_immediate64( const unsigned *v,
   int type,
@@ -1546,6 +1575,25 @@ emit_decl_image(struct ureg_program *ureg,
  }

  static void
+emit_decl_buffer(struct ureg_program *ureg,
+ unsigned index,
+ bool atomic)
+{
+   union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2);
+
+   out[0].value = 0;
+   out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
+   out[0].decl.NrTokens = 2;
+   out[0].decl.File = TGSI_FILE_BUFFER;
+   out[0].decl.UsageMask = 0xf;
+   out[0].decl.Atomic = atomic;
+
+   out[1].value = 0;
+   out[1].decl_range.First = index;
+   out[1].decl_range.Last = index;
+}
+
+static void
  emit_immediate( struct ureg_program *ureg,
  const unsigned *v,
  unsigned type )
@@ -1713,6 +1761,10 @@ static void emit_decls( struct ureg_program *ureg )
ureg->image[i].raw);
 }

+   for (i = 0; i < ureg->nr_buffers; i++) {
+  emit_decl_buffer(ureg, ureg->buffer[i].index, ureg->buffer[i].atomic);
+   }
+
 if (ureg->const_decls.nr_constant_ranges) {
for (i = 0; i < ureg->const_decls.nr_constant_ranges; i++) {
   emit_decl_range(ureg,
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h 
b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
index bba2afb..e25c961 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
@@ -335,6 +335,9 @@ ureg_DECL_image(struct ureg_program *ureg,
  boolean wr,
  boolean raw);

+struct ureg_src
+ureg_DECL_buffer(struct ureg_program 

[Mesa-dev] [Bug 93570] the image of llvmpipe has a low quality on arm (with too many points on it)

2016-01-04 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=93570

--- Comment #9 from Icenowy Zheng  ---
I'm sorry, but my device is not capable to build a svn version of LLVM. (The
original version of LLVM (3.7) is built on a buildbot, so does Mesa 11.0)

Can you provide me a simple testsuite to check whether it's the fault of llvm?

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3] mesa: use gl_shader_variable in program resource list

2016-01-04 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Mon, Jan 4, 2016 at 8:55 AM, Tapani Pälli  wrote:
> Patch changes linker to allocate gl_shader_variable instead of using
> ir_variable. This makes it possible to get rid of ir_variables and ir
> in memory after linking.
>
> v2: check that we do not create duplicate entries with
> packed varyings
>
> v3: document 'patch' bit (Ilia Mirkin)
>
> Signed-off-by: Tapani Pälli 
> ---
>  src/glsl/linker.cpp| 61 
> +++---
>  src/mesa/main/mtypes.h | 61 
> ++
>  src/mesa/main/shader_query.cpp | 38 +-
>  3 files changed, 132 insertions(+), 28 deletions(-)
>
> diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
> index a6e81b4..45daa12 100644
> --- a/src/glsl/linker.cpp
> +++ b/src/glsl/linker.cpp
> @@ -3373,6 +3373,30 @@ build_stageref(struct gl_shader_program *shProg, const 
> char *name,
> return stages;
>  }
>
> +/**
> + * Create gl_shader_variable from ir_variable class.
> + */
> +static gl_shader_variable *
> +create_shader_variable(struct gl_shader_program *shProg, const ir_variable 
> *in)
> +{
> +   gl_shader_variable *out = ralloc(shProg, struct gl_shader_variable);
> +   if (!out)
> +  return NULL;
> +
> +   out->type = in->type;
> +   out->name = ralloc_strdup(shProg, in->name);
> +
> +   if (!out->name)
> +  return NULL;
> +
> +   out->location = in->data.location;
> +   out->index = in->data.index;
> +   out->patch = in->data.patch;
> +   out->mode = in->data.mode;
> +
> +   return out;
> +}
> +
>  static bool
>  add_interface_variables(struct gl_shader_program *shProg,
>  exec_list *ir, GLenum programInterface)
> @@ -3424,9 +3448,13 @@ add_interface_variables(struct gl_shader_program 
> *shProg,
>if (strncmp(var->name, "gl_out_FragData", 15) == 0)
>   continue;
>
> -  if (!add_program_resource(shProg, programInterface, var,
> -build_stageref(shProg, var->name,
> -   var->data.mode) | mask))
> +  gl_shader_variable *sha_v = create_shader_variable(shProg, var);
> +  if (!sha_v)
> + return false;
> +
> +  if (!add_program_resource(shProg, programInterface, sha_v,
> +build_stageref(shProg, sha_v->name,
> +   sha_v->mode) | mask))
>   return false;
> }
> return true;
> @@ -3454,9 +3482,14 @@ add_packed_varyings(struct gl_shader_program *shProg, 
> int stage)
>   default:
>  unreachable("unexpected type");
>   }
> - if (!add_program_resource(shProg, iface, var,
> -   build_stageref(shProg, var->name,
> -  var->data.mode)))
> +
> + gl_shader_variable *sha_v = create_shader_variable(shProg, var);
> + if (!sha_v)
> +return false;
> +
> + if (!add_program_resource(shProg, iface, sha_v,
> +   build_stageref(shProg, sha_v->name,
> +  sha_v->mode)))
>  return false;
>}
> }
> @@ -3475,7 +3508,12 @@ add_fragdata_arrays(struct gl_shader_program *shProg)
>ir_variable *var = node->as_variable();
>if (var) {
>   assert(var->data.mode == ir_var_shader_out);
> - if (!add_program_resource(shProg, GL_PROGRAM_OUTPUT, var,
> +
> + gl_shader_variable *sha_v = create_shader_variable(shProg, var);
> + if (!sha_v)
> +return false;
> +
> + if (!add_program_resource(shProg, GL_PROGRAM_OUTPUT, sha_v,
> 1 << MESA_SHADER_FRAGMENT))
>  return false;
>}
> @@ -3726,8 +3764,13 @@ build_program_resource_list(struct gl_shader_program 
> *shProg)
> if (shProg->SeparateShader) {
>if (!add_packed_varyings(shProg, input_stage))
>   return;
> -  if (!add_packed_varyings(shProg, output_stage))
> - return;
> +  /* Only when dealing with multiple stages, otherwise we would have
> +   * duplicate gl_shader_variable entries.
> +   */
> +  if (input_stage != output_stage) {
> + if (!add_packed_varyings(shProg, output_stage))
> +return;
> +  }
> }
>
> if (!add_fragdata_arrays(shProg))
> diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
> index 5b9fce8..c9fe728 100644
> --- a/src/mesa/main/mtypes.h
> +++ b/src/mesa/main/mtypes.h
> @@ -2525,6 +2525,67 @@ struct gl_active_atomic_buffer
>  };
>
>  /**
> + * Data container for shader queries. This holds only the minimal
> + * amount of required information for resource queries to work.
> + */
> +struct gl_shader_variable
> +{
> +   /**
> +* Declared type of the variable
> +   

Re: [Mesa-dev] [PATCH v2 1/2] mesa: Map program UBOs and SSBOs to Interface Blocks

2016-01-04 Thread Iago Toral
On Thu, 2015-12-31 at 11:20 -0800, Jordan Justen wrote:
> v2:
>  * Fill UboInterfaceBlockIndex and SsboInterfaceBlockIndex in
>split_ubos_and_ssbos (Iago)
> 
> Cc: Samuel Iglesias Gonsálvez 
> Cc: Iago Toral 
> Signed-off-by: Jordan Justen 
> Reviewed-by: Juha-Pekka Heikkila 

Reviewed-by: Iago Toral Quiroga 

> ---
>  src/glsl/linker.cpp | 30 +-
>  src/glsl/standalone_scaffolding.cpp |  5 +
>  src/mesa/main/mtypes.h  |  7 +++
>  3 files changed, 37 insertions(+), 5 deletions(-)
> 
> diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
> index a6e81b4..367ae26 100644
> --- a/src/glsl/linker.cpp
> +++ b/src/glsl/linker.cpp
> @@ -3942,8 +3942,10 @@ split_ubos_and_ssbos(void *mem_ctx,
>   unsigned num_blocks,
>   struct gl_uniform_block ***ubos,
>   unsigned *num_ubos,
> + unsigned **ubo_interface_block_indices,
>   struct gl_uniform_block ***ssbos,
> - unsigned *num_ssbos)
> + unsigned *num_ssbos,
> + unsigned **ssbo_interface_block_indices)
>  {
> unsigned num_ubo_blocks = 0;
> unsigned num_ssbo_blocks = 0;
> @@ -3961,11 +3963,25 @@ split_ubos_and_ssbos(void *mem_ctx,
> *ssbos = ralloc_array(mem_ctx, gl_uniform_block *, num_ssbo_blocks);
> *num_ssbos = 0;
>  
> +   if (ubo_interface_block_indices)
> +  *ubo_interface_block_indices =
> + ralloc_array(mem_ctx, unsigned, num_ubo_blocks);
> +
> +   if (ssbo_interface_block_indices)
> +  *ssbo_interface_block_indices =
> + ralloc_array(mem_ctx, unsigned, num_ssbo_blocks);
> +
> for (unsigned i = 0; i < num_blocks; i++) {
>if (blocks[i].IsShaderStorage) {
> - (*ssbos)[(*num_ssbos)++] = [i];
> + (*ssbos)[*num_ssbos] = [i];
> + if (ssbo_interface_block_indices)
> +(*ssbo_interface_block_indices)[*num_ssbos] = i;
> + (*num_ssbos)++;
>} else {
> - (*ubos)[(*num_ubos)++] = [i];
> + (*ubos)[*num_ubos] = [i];
> + if (ubo_interface_block_indices)
> +(*ubo_interface_block_indices)[*num_ubos] = i;
> + (*num_ubos)++;
>}
> }
>  
> @@ -4577,8 +4593,10 @@ link_shaders(struct gl_context *ctx, struct 
> gl_shader_program *prog)
>sh->NumBufferInterfaceBlocks,
>>UniformBlocks,
>>NumUniformBlocks,
> +  NULL,
>>ShaderStorageBlocks,
> -  >NumShaderStorageBlocks);
> +  >NumShaderStorageBlocks,
> +  NULL);
>}
> }
>  
> @@ -4587,8 +4605,10 @@ link_shaders(struct gl_context *ctx, struct 
> gl_shader_program *prog)
>  prog->NumBufferInterfaceBlocks,
>  >UniformBlocks,
>  >NumUniformBlocks,
> +>UboInterfaceBlockIndex,
>  >ShaderStorageBlocks,
> ->NumShaderStorageBlocks);
> +>NumShaderStorageBlocks,
> +>SsboInterfaceBlockIndex);
>  
> /* FINISHME: Assign fragment shader output locations. */
>  
> diff --git a/src/glsl/standalone_scaffolding.cpp 
> b/src/glsl/standalone_scaffolding.cpp
> index e350f70..d5d214b 100644
> --- a/src/glsl/standalone_scaffolding.cpp
> +++ b/src/glsl/standalone_scaffolding.cpp
> @@ -124,6 +124,11 @@ _mesa_clear_shader_program_data(struct gl_shader_program 
> *shProg)
>shProg->InterfaceBlockStageIndex[i] = NULL;
> }
>  
> +   ralloc_free(shProg->UboInterfaceBlockIndex);
> +   shProg->UboInterfaceBlockIndex = NULL;
> +   ralloc_free(shProg->SsboInterfaceBlockIndex);
> +   shProg->SsboInterfaceBlockIndex = NULL;
> +
> ralloc_free(shProg->AtomicBuffers);
> shProg->AtomicBuffers = NULL;
> shProg->NumAtomicBuffers = 0;
> diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
> index 5b9fce8..6eff56d 100644
> --- a/src/mesa/main/mtypes.h
> +++ b/src/mesa/main/mtypes.h
> @@ -2736,6 +2736,13 @@ struct gl_shader_program
> int *InterfaceBlockStageIndex[MESA_SHADER_STAGES];
>  
> /**
> +* Indices into the BufferInterfaceBlocks[] array for Uniform Buffer
> +* Objects and Shader Storage Buffer Objects.
> +*/
> +   unsigned *UboInterfaceBlockIndex;
> +   unsigned *SsboInterfaceBlockIndex;
> +
> +   /**
>  * Map of active uniform names to locations
>  *
>  * Maps any active uniform that is not an array element to a location.


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 2/2] mesa: Use separate indices for UBO & SSBO during binding

2016-01-04 Thread Iago Toral
On Thu, 2015-12-31 at 11:20 -0800, Jordan Justen wrote:
> Previously we were treating the binding index for Uniform Buffer
> Objects and Shader Storage Buffer Objects as being part of the
> combined BufferInterfaceBlocks array.
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93322
> Cc: Samuel Iglesias Gonsálvez 
> Cc: Iago Toral 
> Cc: Tapani Pälli 
> Signed-off-by: Jordan Justen 
> Reviewed-by: Juha-Pekka Heikkila 

Reviewed-by: Iago Toral Quiroga 

> ---
> 
> Notes:
> Maybe the GL_ARB_program_interface_query support code should be able
> to help out with this, since GetProgramResourceIndex will have the
> same concept of UBO/SSBO indices. Unfortunately, I don't think the
> internals of the GL_ARB_program_interface_query implementation
> currently have the info we need.
> 
>  src/mesa/main/uniforms.c | 30 --
>  1 file changed, 20 insertions(+), 10 deletions(-)
> 
> diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c
> index 758ca24..47f80ce 100644
> --- a/src/mesa/main/uniforms.c
> +++ b/src/mesa/main/uniforms.c
> @@ -1002,10 +1002,10 @@ _mesa_UniformBlockBinding(GLuint program,
> if (!shProg)
>return;
>  
> -   if (uniformBlockIndex >= shProg->NumBufferInterfaceBlocks) {
> +   if (uniformBlockIndex >= shProg->NumUniformBlocks) {
>_mesa_error(ctx, GL_INVALID_VALUE,
> "glUniformBlockBinding(block index %u >= %u)",
> -   uniformBlockIndex, shProg->NumBufferInterfaceBlocks);
> +   uniformBlockIndex, shProg->NumUniformBlocks);
>return;
> }
>  
> @@ -1016,17 +1016,22 @@ _mesa_UniformBlockBinding(GLuint program,
>return;
> }
>  
> -   if (shProg->BufferInterfaceBlocks[uniformBlockIndex].Binding !=
> +   if (shProg->UniformBlocks[uniformBlockIndex]->Binding !=
> uniformBlockBinding) {
>int i;
>  
>FLUSH_VERTICES(ctx, 0);
>ctx->NewDriverState |= ctx->DriverFlags.NewUniformBuffer;
>  
> -  shProg->BufferInterfaceBlocks[uniformBlockIndex].Binding = 
> uniformBlockBinding;
> +  const int interface_block_index =
> + shProg->UboInterfaceBlockIndex[uniformBlockIndex];
> +
> +  shProg->BufferInterfaceBlocks[interface_block_index].Binding =
> + uniformBlockBinding;
>  
>for (i = 0; i < MESA_SHADER_STAGES; i++) {
> -  int stage_index = 
> shProg->InterfaceBlockStageIndex[i][uniformBlockIndex];
> +  int stage_index =
> +shProg->InterfaceBlockStageIndex[i][interface_block_index];
>  
>if (stage_index != -1) {
>   struct gl_shader *sh = shProg->_LinkedShaders[i];
> @@ -1054,10 +1059,10 @@ _mesa_ShaderStorageBlockBinding(GLuint program,
> if (!shProg)
>return;
>  
> -   if (shaderStorageBlockIndex >= shProg->NumBufferInterfaceBlocks) {
> +   if (shaderStorageBlockIndex >= shProg->NumShaderStorageBlocks) {
>_mesa_error(ctx, GL_INVALID_VALUE,
> "glShaderStorageBlockBinding(block index %u >= %u)",
> -   shaderStorageBlockIndex, shProg->NumBufferInterfaceBlocks);
> +   shaderStorageBlockIndex, shProg->NumShaderStorageBlocks);
>return;
> }
>  
> @@ -1069,17 +1074,22 @@ _mesa_ShaderStorageBlockBinding(GLuint program,
>return;
> }
>  
> -   if (shProg->BufferInterfaceBlocks[shaderStorageBlockIndex].Binding !=
> +   if (shProg->ShaderStorageBlocks[shaderStorageBlockIndex]->Binding !=
> shaderStorageBlockBinding) {
>int i;
>  
>FLUSH_VERTICES(ctx, 0);
>ctx->NewDriverState |= ctx->DriverFlags.NewShaderStorageBuffer;
>  
> -  shProg->BufferInterfaceBlocks[shaderStorageBlockIndex].Binding = 
> shaderStorageBlockBinding;
> +  const int interface_block_index =
> + shProg->SsboInterfaceBlockIndex[shaderStorageBlockIndex];
> +
> +  shProg->BufferInterfaceBlocks[interface_block_index].Binding =
> + shaderStorageBlockBinding;
>  
>for (i = 0; i < MESA_SHADER_STAGES; i++) {
> -  int stage_index = 
> shProg->InterfaceBlockStageIndex[i][shaderStorageBlockIndex];
> +  int stage_index =
> +shProg->InterfaceBlockStageIndex[i][interface_block_index];
>  
>if (stage_index != -1) {
>   struct gl_shader *sh = shProg->_LinkedShaders[i];


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] st/mesa: make KHR_debug output independent of context creation flags

2016-01-04 Thread Ilia Mirkin
On Mon, Jan 4, 2016 at 5:34 PM, Nicolai Hähnle  wrote:
> From: Nicolai Hähnle 
>
> Instead, keep track of GL_DEBUG_OUTPUT and (un)install the pipe_debug_callback
> accordingly. Hardware drivers can still use the absence of the callback to
> skip more expensive operations in the normal case, and users can no longer be
> surprised by the need to set the debug flag at context creation time.
> ---
>  src/mesa/state_tracker/st_context.c | 18 ++
>  src/mesa/state_tracker/st_debug.c   | 70 
> +
>  src/mesa/state_tracker/st_debug.h   |  4 +++
>  src/mesa/state_tracker/st_manager.c | 64 +
>  4 files changed, 93 insertions(+), 63 deletions(-)
>
> diff --git a/src/mesa/state_tracker/st_context.c 
> b/src/mesa/state_tracker/st_context.c
> index 724c3c5..31cc99d 100644
> --- a/src/mesa/state_tracker/st_context.c
> +++ b/src/mesa/state_tracker/st_context.c
> @@ -80,6 +80,23 @@ DEBUG_GET_ONCE_BOOL_OPTION(mesa_mvp_dp4, "MESA_MVP_DP4", 
> FALSE)
>
>
>  /**
> + * Called via ctx->Driver.Enable()
> + */
> +static void st_Enable(struct gl_context * ctx, GLenum cap, GLboolean state)
> +{
> +   struct st_context *st = st_context(ctx);
> +
> +   switch (cap) {
> +   case GL_DEBUG_OUTPUT:
> +  st_enable_debug_output(st, state);
> +  break;
> +   default:
> +  break;
> +   }
> +}
> +
> +
> +/**
>   * Called via ctx->Driver.UpdateState()
>   */
>  void st_invalidate_state(struct gl_context * ctx, GLuint new_state)
> @@ -457,5 +474,6 @@ void st_init_driver_functions(struct pipe_screen *screen,
>
> st_init_vdpau_functions(functions);
>
> +   functions->Enable = st_Enable;
> functions->UpdateState = st_invalidate_state;
>  }
> diff --git a/src/mesa/state_tracker/st_debug.c 
> b/src/mesa/state_tracker/st_debug.c
> index 6d859c6..ac77558 100644
> --- a/src/mesa/state_tracker/st_debug.c
> +++ b/src/mesa/state_tracker/st_debug.c
> @@ -104,3 +104,73 @@ st_print_current(void)
>  }
>
>
> +/**
> + * Installed as pipe_debug_callback when GL_DEBUG_OUTPUT is enabled.
> + */
> +static void
> +st_debug_message(void *data,
> + unsigned *id,
> + enum pipe_debug_type ptype,
> + const char *fmt,
> + va_list args)
> +{
> +   struct st_context *st = data;
> +   enum mesa_debug_source source;
> +   enum mesa_debug_type type;
> +   enum mesa_debug_severity severity;
> +
> +   switch (ptype) {
> +   case PIPE_DEBUG_TYPE_OUT_OF_MEMORY:
> +  source = MESA_DEBUG_SOURCE_API;
> +  type = MESA_DEBUG_TYPE_ERROR;
> +  severity = MESA_DEBUG_SEVERITY_MEDIUM;
> +  break;
> +   case PIPE_DEBUG_TYPE_ERROR:
> +  source = MESA_DEBUG_SOURCE_API;
> +  type = MESA_DEBUG_TYPE_ERROR;
> +  severity = MESA_DEBUG_SEVERITY_MEDIUM;
> +  break;
> +   case PIPE_DEBUG_TYPE_SHADER_INFO:
> +  source = MESA_DEBUG_SOURCE_SHADER_COMPILER;
> +  type = MESA_DEBUG_TYPE_OTHER;
> +  severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
> +  break;
> +   case PIPE_DEBUG_TYPE_PERF_INFO:
> +  source = MESA_DEBUG_SOURCE_API;
> +  type = MESA_DEBUG_TYPE_PERFORMANCE;
> +  severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
> +  break;
> +   case PIPE_DEBUG_TYPE_INFO:
> +  source = MESA_DEBUG_SOURCE_API;
> +  type = MESA_DEBUG_TYPE_OTHER;
> +  severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
> +  break;
> +   case PIPE_DEBUG_TYPE_FALLBACK:
> +  source = MESA_DEBUG_SOURCE_API;
> +  type = MESA_DEBUG_TYPE_PERFORMANCE;
> +  severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
> +  break;
> +   case PIPE_DEBUG_TYPE_CONFORMANCE:
> +  source = MESA_DEBUG_SOURCE_API;
> +  type = MESA_DEBUG_TYPE_OTHER;
> +  severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
> +  break;

Perhaps add a

default:
  unreachable("invalid debug type");

to avoid the stupid compiler warning?

> +   }
> +   _mesa_gl_vdebug(st->ctx, id, source, type, severity, fmt, args);
> +}
> +
> +void
> +st_enable_debug_output(struct st_context *st, boolean enable)
> +{
> +   struct pipe_context *pipe = st->pipe;
> +
> +   if (!pipe->set_debug_callback)
> +  return;
> +
> +   if (enable) {
> +  struct pipe_debug_callback cb = { st_debug_message, st };
> +  pipe->set_debug_callback(pipe, );
> +   } else {
> +  pipe->set_debug_callback(pipe, NULL);
> +   }
> +}
> diff --git a/src/mesa/state_tracker/st_debug.h 
> b/src/mesa/state_tracker/st_debug.h
> index 288eccf..ed3ead8 100644
> --- a/src/mesa/state_tracker/st_debug.h
> +++ b/src/mesa/state_tracker/st_debug.h
> @@ -32,6 +32,8 @@
>  #include "pipe/p_compiler.h"
>  #include "util/u_debug.h"
>
> +struct st_context;
> +
>  extern void
>  st_print_current(void);
>
> @@ -59,6 +61,8 @@ extern int ST_DEBUG;
>
>  void st_debug_init( void );
>
> +void st_enable_debug_output(struct st_context *st, boolean enable);
> +
>  static inline void
>  ST_DBG( unsigned flag, const char *fmt, ... )
>  {
> diff --git 

[Mesa-dev] [PATCH v2] st/va: count number of slices

2016-01-04 Thread Julien Isorce
From: Julien Isorce 

The counter was not set but used by the nouveau driver.
It is required otherwise visual output is garbage.

Signed-off-by: Julien Isorce 
---
 src/gallium/state_trackers/va/picture.c| 8 
 src/gallium/state_trackers/va/picture_h264.c   | 2 ++
 src/gallium/state_trackers/va/picture_mpeg12.c | 6 ++
 src/gallium/state_trackers/va/picture_vc1.c| 7 +++
 src/gallium/state_trackers/va/va_private.h | 2 ++
 5 files changed, 25 insertions(+)

diff --git a/src/gallium/state_trackers/va/picture.c 
b/src/gallium/state_trackers/va/picture.c
index 7b30bf8..da9ca5a 100644
--- a/src/gallium/state_trackers/va/picture.c
+++ b/src/gallium/state_trackers/va/picture.c
@@ -174,6 +174,14 @@ static void
 handleSliceParameterBuffer(vlVaContext *context, vlVaBuffer *buf)
 {
switch (u_reduce_video_profile(context->templat.profile)) {
+   case PIPE_VIDEO_FORMAT_MPEG12:
+  vlVaHandleSliceParameterBufferMPEG12(context, buf);
+  break;
+
+   case PIPE_VIDEO_FORMAT_VC1:
+  vlVaHandleSliceParameterBufferVC1(context, buf);
+  break;
+
case PIPE_VIDEO_FORMAT_MPEG4_AVC:
   vlVaHandleSliceParameterBufferH264(context, buf);
   break;
diff --git a/src/gallium/state_trackers/va/picture_h264.c 
b/src/gallium/state_trackers/va/picture_h264.c
index acbfe5d..883a94a 100644
--- a/src/gallium/state_trackers/va/picture_h264.c
+++ b/src/gallium/state_trackers/va/picture_h264.c
@@ -48,6 +48,7 @@ void vlVaHandlePictureParameterBufferH264(vlVaDriver *drv, 
vlVaContext *context,
unsigned i;
 
assert(buf->size >= sizeof(VAPictureParameterBufferH264) && 
buf->num_elements == 1);
+   context->desc.h264.slice_count = 0;
/*CurrPic*/
context->desc.h264.field_order_cnt[0] = h264->CurrPic.TopFieldOrderCnt;
context->desc.h264.field_order_cnt[1] = h264->CurrPic.BottomFieldOrderCnt;
@@ -162,6 +163,7 @@ void vlVaHandleSliceParameterBufferH264(vlVaContext 
*context, vlVaBuffer *buf)
VASliceParameterBufferH264 *h264 = buf->data;
 
assert(buf->size >= sizeof(VASliceParameterBufferH264) && buf->num_elements 
== 1);
+   context->desc.h264.slice_count += buf->num_elements;
context->desc.h264.num_ref_idx_l0_active_minus1 =
   h264->num_ref_idx_l0_active_minus1;
context->desc.h264.num_ref_idx_l1_active_minus1 =
diff --git a/src/gallium/state_trackers/va/picture_mpeg12.c 
b/src/gallium/state_trackers/va/picture_mpeg12.c
index e587b1e..812e9e5 100644
--- a/src/gallium/state_trackers/va/picture_mpeg12.c
+++ b/src/gallium/state_trackers/va/picture_mpeg12.c
@@ -32,6 +32,7 @@ void vlVaHandlePictureParameterBufferMPEG12(vlVaDriver *drv, 
vlVaContext *contex
VAPictureParameterBufferMPEG2 *mpeg2 = buf->data;
 
assert(buf->size >= sizeof(VAPictureParameterBufferMPEG2) && 
buf->num_elements == 1);
+   context->desc.mpeg12.num_slices = 0;
/*horizontal_size;*/
/*vertical_size;*/
vlVaGetReferenceFrame(drv, mpeg2->forward_reference_picture, 
>desc.mpeg12.ref[0]);
@@ -78,3 +79,8 @@ void vlVaHandleIQMatrixBufferMPEG12(vlVaContext *context, 
vlVaBuffer *buf)
   context->desc.mpeg12.non_intra_matrix = NULL;
 }
 
+void vlVaHandleSliceParameterBufferMPEG12(vlVaContext *context, vlVaBuffer 
*buf)
+{
+   assert(buf->size >= sizeof(VASliceParameterBufferMPEG2) && 
buf->num_elements == 1);
+   context->desc.mpeg12.num_slices += buf->num_elements;
+}
diff --git a/src/gallium/state_trackers/va/picture_vc1.c 
b/src/gallium/state_trackers/va/picture_vc1.c
index f95fd83..6ad1571 100644
--- a/src/gallium/state_trackers/va/picture_vc1.c
+++ b/src/gallium/state_trackers/va/picture_vc1.c
@@ -32,6 +32,7 @@ void vlVaHandlePictureParameterBufferVC1(vlVaDriver *drv, 
vlVaContext *context,
VAPictureParameterBufferVC1 * vc1 = buf->data;
 
assert(buf->size >= sizeof(VAPictureParameterBufferVC1) && 
buf->num_elements == 1);
+   context->desc.vc1.slice_count = 0;
vlVaGetReferenceFrame(drv, vc1->forward_reference_picture, 
>desc.vc1.ref[0]);
vlVaGetReferenceFrame(drv, vc1->backward_reference_picture, 
>desc.vc1.ref[1]);
context->desc.vc1.picture_type = vc1->picture_fields.bits.picture_type;
@@ -65,3 +66,9 @@ void vlVaHandlePictureParameterBufferVC1(vlVaDriver *drv, 
vlVaContext *context,
context->desc.vc1.deblockEnable = vc1->post_processing != 0;
context->desc.vc1.pquant = 
vc1->pic_quantizer_fields.bits.pic_quantizer_scale;
 }
+
+void vlVaHandleSliceParameterBufferVC1(vlVaContext *context, vlVaBuffer *buf)
+{
+   assert(buf->size >= sizeof(VASliceParameterBufferVC1) && buf->num_elements 
== 1);
+   context->desc.vc1.slice_count += buf->num_elements;
+}
diff --git a/src/gallium/state_trackers/va/va_private.h 
b/src/gallium/state_trackers/va/va_private.h
index fa6e0fb..bf9d24b 100644
--- a/src/gallium/state_trackers/va/va_private.h
+++ b/src/gallium/state_trackers/va/va_private.h
@@ -351,10 +351,12 @@ VAStatus 
vlVaHandleVAProcPipelineParameterBufferType(vlVaDriver *drv, vlVaContex
 void 

[Mesa-dev] [PATCH] st/mesa: make KHR_debug output independent of context creation flags

2016-01-04 Thread Nicolai Hähnle
From: Nicolai Hähnle 

Instead, keep track of GL_DEBUG_OUTPUT and (un)install the pipe_debug_callback
accordingly. Hardware drivers can still use the absence of the callback to
skip more expensive operations in the normal case, and users can no longer be
surprised by the need to set the debug flag at context creation time.
---
 src/mesa/state_tracker/st_context.c | 18 ++
 src/mesa/state_tracker/st_debug.c   | 70 +
 src/mesa/state_tracker/st_debug.h   |  4 +++
 src/mesa/state_tracker/st_manager.c | 64 +
 4 files changed, 93 insertions(+), 63 deletions(-)

diff --git a/src/mesa/state_tracker/st_context.c 
b/src/mesa/state_tracker/st_context.c
index 724c3c5..31cc99d 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -80,6 +80,23 @@ DEBUG_GET_ONCE_BOOL_OPTION(mesa_mvp_dp4, "MESA_MVP_DP4", 
FALSE)
 
 
 /**
+ * Called via ctx->Driver.Enable()
+ */
+static void st_Enable(struct gl_context * ctx, GLenum cap, GLboolean state)
+{
+   struct st_context *st = st_context(ctx);
+
+   switch (cap) {
+   case GL_DEBUG_OUTPUT:
+  st_enable_debug_output(st, state);
+  break;
+   default:
+  break;
+   }
+}
+
+
+/**
  * Called via ctx->Driver.UpdateState()
  */
 void st_invalidate_state(struct gl_context * ctx, GLuint new_state)
@@ -457,5 +474,6 @@ void st_init_driver_functions(struct pipe_screen *screen,
 
st_init_vdpau_functions(functions);
 
+   functions->Enable = st_Enable;
functions->UpdateState = st_invalidate_state;
 }
diff --git a/src/mesa/state_tracker/st_debug.c 
b/src/mesa/state_tracker/st_debug.c
index 6d859c6..ac77558 100644
--- a/src/mesa/state_tracker/st_debug.c
+++ b/src/mesa/state_tracker/st_debug.c
@@ -104,3 +104,73 @@ st_print_current(void)
 }
 
 
+/**
+ * Installed as pipe_debug_callback when GL_DEBUG_OUTPUT is enabled.
+ */
+static void
+st_debug_message(void *data,
+ unsigned *id,
+ enum pipe_debug_type ptype,
+ const char *fmt,
+ va_list args)
+{
+   struct st_context *st = data;
+   enum mesa_debug_source source;
+   enum mesa_debug_type type;
+   enum mesa_debug_severity severity;
+
+   switch (ptype) {
+   case PIPE_DEBUG_TYPE_OUT_OF_MEMORY:
+  source = MESA_DEBUG_SOURCE_API;
+  type = MESA_DEBUG_TYPE_ERROR;
+  severity = MESA_DEBUG_SEVERITY_MEDIUM;
+  break;
+   case PIPE_DEBUG_TYPE_ERROR:
+  source = MESA_DEBUG_SOURCE_API;
+  type = MESA_DEBUG_TYPE_ERROR;
+  severity = MESA_DEBUG_SEVERITY_MEDIUM;
+  break;
+   case PIPE_DEBUG_TYPE_SHADER_INFO:
+  source = MESA_DEBUG_SOURCE_SHADER_COMPILER;
+  type = MESA_DEBUG_TYPE_OTHER;
+  severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
+  break;
+   case PIPE_DEBUG_TYPE_PERF_INFO:
+  source = MESA_DEBUG_SOURCE_API;
+  type = MESA_DEBUG_TYPE_PERFORMANCE;
+  severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
+  break;
+   case PIPE_DEBUG_TYPE_INFO:
+  source = MESA_DEBUG_SOURCE_API;
+  type = MESA_DEBUG_TYPE_OTHER;
+  severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
+  break;
+   case PIPE_DEBUG_TYPE_FALLBACK:
+  source = MESA_DEBUG_SOURCE_API;
+  type = MESA_DEBUG_TYPE_PERFORMANCE;
+  severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
+  break;
+   case PIPE_DEBUG_TYPE_CONFORMANCE:
+  source = MESA_DEBUG_SOURCE_API;
+  type = MESA_DEBUG_TYPE_OTHER;
+  severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
+  break;
+   }
+   _mesa_gl_vdebug(st->ctx, id, source, type, severity, fmt, args);
+}
+
+void
+st_enable_debug_output(struct st_context *st, boolean enable)
+{
+   struct pipe_context *pipe = st->pipe;
+
+   if (!pipe->set_debug_callback)
+  return;
+
+   if (enable) {
+  struct pipe_debug_callback cb = { st_debug_message, st };
+  pipe->set_debug_callback(pipe, );
+   } else {
+  pipe->set_debug_callback(pipe, NULL);
+   }
+}
diff --git a/src/mesa/state_tracker/st_debug.h 
b/src/mesa/state_tracker/st_debug.h
index 288eccf..ed3ead8 100644
--- a/src/mesa/state_tracker/st_debug.h
+++ b/src/mesa/state_tracker/st_debug.h
@@ -32,6 +32,8 @@
 #include "pipe/p_compiler.h"
 #include "util/u_debug.h"
 
+struct st_context;
+
 extern void
 st_print_current(void);
 
@@ -59,6 +61,8 @@ extern int ST_DEBUG;
 
 void st_debug_init( void );
 
+void st_enable_debug_output(struct st_context *st, boolean enable);
+
 static inline void
 ST_DBG( unsigned flag, const char *fmt, ... )
 {
diff --git a/src/mesa/state_tracker/st_manager.c 
b/src/mesa/state_tracker/st_manager.c
index d0d261f..525aff7 100644
--- a/src/mesa/state_tracker/st_manager.c
+++ b/src/mesa/state_tracker/st_manager.c
@@ -623,58 +623,6 @@ st_context_destroy(struct st_context_iface *stctxi)
st_destroy_context(st);
 }
 
-static void
-st_debug_message(void *data,
- unsigned *id,
- enum pipe_debug_type ptype,
- const char *fmt,
- 

Re: [Mesa-dev] [PATCH v2] st/mesa: make KHR_debug output independent of context creation flags (v2)

2016-01-04 Thread Ilia Mirkin
Reviewed-by: Ilia Mirkin 

On Mon, Jan 4, 2016 at 5:47 PM, Nicolai Hähnle  wrote:
> From: Nicolai Hähnle 
>
> Instead, keep track of GL_DEBUG_OUTPUT and (un)install the pipe_debug_callback
> accordingly. Hardware drivers can still use the absence of the callback to
> skip more expensive operations in the normal case, and users can no longer be
> surprised by the need to set the debug flag at context creation time.
>
> v2:
> - re-add the proper initialization of debug contexts (Ilia Mirkin)
> - silence a potential warning (Ilia Mirkin)
> ---
>  src/mesa/state_tracker/st_context.c | 18 ++
>  src/mesa/state_tracker/st_debug.c   | 72 
> +
>  src/mesa/state_tracker/st_debug.h   |  4 +++
>  src/mesa/state_tracker/st_manager.c | 61 +++
>  4 files changed, 98 insertions(+), 57 deletions(-)
>
> diff --git a/src/mesa/state_tracker/st_context.c 
> b/src/mesa/state_tracker/st_context.c
> index 724c3c5..31cc99d 100644
> --- a/src/mesa/state_tracker/st_context.c
> +++ b/src/mesa/state_tracker/st_context.c
> @@ -80,6 +80,23 @@ DEBUG_GET_ONCE_BOOL_OPTION(mesa_mvp_dp4, "MESA_MVP_DP4", 
> FALSE)
>
>
>  /**
> + * Called via ctx->Driver.Enable()
> + */
> +static void st_Enable(struct gl_context * ctx, GLenum cap, GLboolean state)
> +{
> +   struct st_context *st = st_context(ctx);
> +
> +   switch (cap) {
> +   case GL_DEBUG_OUTPUT:
> +  st_enable_debug_output(st, state);
> +  break;
> +   default:
> +  break;
> +   }
> +}
> +
> +
> +/**
>   * Called via ctx->Driver.UpdateState()
>   */
>  void st_invalidate_state(struct gl_context * ctx, GLuint new_state)
> @@ -457,5 +474,6 @@ void st_init_driver_functions(struct pipe_screen *screen,
>
> st_init_vdpau_functions(functions);
>
> +   functions->Enable = st_Enable;
> functions->UpdateState = st_invalidate_state;
>  }
> diff --git a/src/mesa/state_tracker/st_debug.c 
> b/src/mesa/state_tracker/st_debug.c
> index 6d859c6..134366d 100644
> --- a/src/mesa/state_tracker/st_debug.c
> +++ b/src/mesa/state_tracker/st_debug.c
> @@ -104,3 +104,75 @@ st_print_current(void)
>  }
>
>
> +/**
> + * Installed as pipe_debug_callback when GL_DEBUG_OUTPUT is enabled.
> + */
> +static void
> +st_debug_message(void *data,
> + unsigned *id,
> + enum pipe_debug_type ptype,
> + const char *fmt,
> + va_list args)
> +{
> +   struct st_context *st = data;
> +   enum mesa_debug_source source;
> +   enum mesa_debug_type type;
> +   enum mesa_debug_severity severity;
> +
> +   switch (ptype) {
> +   case PIPE_DEBUG_TYPE_OUT_OF_MEMORY:
> +  source = MESA_DEBUG_SOURCE_API;
> +  type = MESA_DEBUG_TYPE_ERROR;
> +  severity = MESA_DEBUG_SEVERITY_MEDIUM;
> +  break;
> +   case PIPE_DEBUG_TYPE_ERROR:
> +  source = MESA_DEBUG_SOURCE_API;
> +  type = MESA_DEBUG_TYPE_ERROR;
> +  severity = MESA_DEBUG_SEVERITY_MEDIUM;
> +  break;
> +   case PIPE_DEBUG_TYPE_SHADER_INFO:
> +  source = MESA_DEBUG_SOURCE_SHADER_COMPILER;
> +  type = MESA_DEBUG_TYPE_OTHER;
> +  severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
> +  break;
> +   case PIPE_DEBUG_TYPE_PERF_INFO:
> +  source = MESA_DEBUG_SOURCE_API;
> +  type = MESA_DEBUG_TYPE_PERFORMANCE;
> +  severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
> +  break;
> +   case PIPE_DEBUG_TYPE_INFO:
> +  source = MESA_DEBUG_SOURCE_API;
> +  type = MESA_DEBUG_TYPE_OTHER;
> +  severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
> +  break;
> +   case PIPE_DEBUG_TYPE_FALLBACK:
> +  source = MESA_DEBUG_SOURCE_API;
> +  type = MESA_DEBUG_TYPE_PERFORMANCE;
> +  severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
> +  break;
> +   case PIPE_DEBUG_TYPE_CONFORMANCE:
> +  source = MESA_DEBUG_SOURCE_API;
> +  type = MESA_DEBUG_TYPE_OTHER;
> +  severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
> +  break;
> +   default:
> +  unreachable("invalid debug type");
> +   }
> +   _mesa_gl_vdebug(st->ctx, id, source, type, severity, fmt, args);
> +}
> +
> +void
> +st_enable_debug_output(struct st_context *st, boolean enable)
> +{
> +   struct pipe_context *pipe = st->pipe;
> +
> +   if (!pipe->set_debug_callback)
> +  return;
> +
> +   if (enable) {
> +  struct pipe_debug_callback cb = { st_debug_message, st };
> +  pipe->set_debug_callback(pipe, );
> +   } else {
> +  pipe->set_debug_callback(pipe, NULL);
> +   }
> +}
> diff --git a/src/mesa/state_tracker/st_debug.h 
> b/src/mesa/state_tracker/st_debug.h
> index 288eccf..ed3ead8 100644
> --- a/src/mesa/state_tracker/st_debug.h
> +++ b/src/mesa/state_tracker/st_debug.h
> @@ -32,6 +32,8 @@
>  #include "pipe/p_compiler.h"
>  #include "util/u_debug.h"
>
> +struct st_context;
> +
>  extern void
>  st_print_current(void);
>
> @@ -59,6 +61,8 @@ extern int ST_DEBUG;
>
>  void st_debug_init( void );
>
> +void st_enable_debug_output(struct 

[Mesa-dev] [PATCH v2] st/mesa: make KHR_debug output independent of context creation flags (v2)

2016-01-04 Thread Nicolai Hähnle
From: Nicolai Hähnle 

Instead, keep track of GL_DEBUG_OUTPUT and (un)install the pipe_debug_callback
accordingly. Hardware drivers can still use the absence of the callback to
skip more expensive operations in the normal case, and users can no longer be
surprised by the need to set the debug flag at context creation time.

v2:
- re-add the proper initialization of debug contexts (Ilia Mirkin)
- silence a potential warning (Ilia Mirkin)
---
 src/mesa/state_tracker/st_context.c | 18 ++
 src/mesa/state_tracker/st_debug.c   | 72 +
 src/mesa/state_tracker/st_debug.h   |  4 +++
 src/mesa/state_tracker/st_manager.c | 61 +++
 4 files changed, 98 insertions(+), 57 deletions(-)

diff --git a/src/mesa/state_tracker/st_context.c 
b/src/mesa/state_tracker/st_context.c
index 724c3c5..31cc99d 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -80,6 +80,23 @@ DEBUG_GET_ONCE_BOOL_OPTION(mesa_mvp_dp4, "MESA_MVP_DP4", 
FALSE)
 
 
 /**
+ * Called via ctx->Driver.Enable()
+ */
+static void st_Enable(struct gl_context * ctx, GLenum cap, GLboolean state)
+{
+   struct st_context *st = st_context(ctx);
+
+   switch (cap) {
+   case GL_DEBUG_OUTPUT:
+  st_enable_debug_output(st, state);
+  break;
+   default:
+  break;
+   }
+}
+
+
+/**
  * Called via ctx->Driver.UpdateState()
  */
 void st_invalidate_state(struct gl_context * ctx, GLuint new_state)
@@ -457,5 +474,6 @@ void st_init_driver_functions(struct pipe_screen *screen,
 
st_init_vdpau_functions(functions);
 
+   functions->Enable = st_Enable;
functions->UpdateState = st_invalidate_state;
 }
diff --git a/src/mesa/state_tracker/st_debug.c 
b/src/mesa/state_tracker/st_debug.c
index 6d859c6..134366d 100644
--- a/src/mesa/state_tracker/st_debug.c
+++ b/src/mesa/state_tracker/st_debug.c
@@ -104,3 +104,75 @@ st_print_current(void)
 }
 
 
+/**
+ * Installed as pipe_debug_callback when GL_DEBUG_OUTPUT is enabled.
+ */
+static void
+st_debug_message(void *data,
+ unsigned *id,
+ enum pipe_debug_type ptype,
+ const char *fmt,
+ va_list args)
+{
+   struct st_context *st = data;
+   enum mesa_debug_source source;
+   enum mesa_debug_type type;
+   enum mesa_debug_severity severity;
+
+   switch (ptype) {
+   case PIPE_DEBUG_TYPE_OUT_OF_MEMORY:
+  source = MESA_DEBUG_SOURCE_API;
+  type = MESA_DEBUG_TYPE_ERROR;
+  severity = MESA_DEBUG_SEVERITY_MEDIUM;
+  break;
+   case PIPE_DEBUG_TYPE_ERROR:
+  source = MESA_DEBUG_SOURCE_API;
+  type = MESA_DEBUG_TYPE_ERROR;
+  severity = MESA_DEBUG_SEVERITY_MEDIUM;
+  break;
+   case PIPE_DEBUG_TYPE_SHADER_INFO:
+  source = MESA_DEBUG_SOURCE_SHADER_COMPILER;
+  type = MESA_DEBUG_TYPE_OTHER;
+  severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
+  break;
+   case PIPE_DEBUG_TYPE_PERF_INFO:
+  source = MESA_DEBUG_SOURCE_API;
+  type = MESA_DEBUG_TYPE_PERFORMANCE;
+  severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
+  break;
+   case PIPE_DEBUG_TYPE_INFO:
+  source = MESA_DEBUG_SOURCE_API;
+  type = MESA_DEBUG_TYPE_OTHER;
+  severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
+  break;
+   case PIPE_DEBUG_TYPE_FALLBACK:
+  source = MESA_DEBUG_SOURCE_API;
+  type = MESA_DEBUG_TYPE_PERFORMANCE;
+  severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
+  break;
+   case PIPE_DEBUG_TYPE_CONFORMANCE:
+  source = MESA_DEBUG_SOURCE_API;
+  type = MESA_DEBUG_TYPE_OTHER;
+  severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
+  break;
+   default:
+  unreachable("invalid debug type");
+   }
+   _mesa_gl_vdebug(st->ctx, id, source, type, severity, fmt, args);
+}
+
+void
+st_enable_debug_output(struct st_context *st, boolean enable)
+{
+   struct pipe_context *pipe = st->pipe;
+
+   if (!pipe->set_debug_callback)
+  return;
+
+   if (enable) {
+  struct pipe_debug_callback cb = { st_debug_message, st };
+  pipe->set_debug_callback(pipe, );
+   } else {
+  pipe->set_debug_callback(pipe, NULL);
+   }
+}
diff --git a/src/mesa/state_tracker/st_debug.h 
b/src/mesa/state_tracker/st_debug.h
index 288eccf..ed3ead8 100644
--- a/src/mesa/state_tracker/st_debug.h
+++ b/src/mesa/state_tracker/st_debug.h
@@ -32,6 +32,8 @@
 #include "pipe/p_compiler.h"
 #include "util/u_debug.h"
 
+struct st_context;
+
 extern void
 st_print_current(void);
 
@@ -59,6 +61,8 @@ extern int ST_DEBUG;
 
 void st_debug_init( void );
 
+void st_enable_debug_output(struct st_context *st, boolean enable);
+
 static inline void
 ST_DBG( unsigned flag, const char *fmt, ... )
 {
diff --git a/src/mesa/state_tracker/st_manager.c 
b/src/mesa/state_tracker/st_manager.c
index d0d261f..385e26b 100644
--- a/src/mesa/state_tracker/st_manager.c
+++ b/src/mesa/state_tracker/st_manager.c
@@ -39,6 +39,7 @@
 #include "st_texture.h"
 
 #include "st_context.h"
+#include "st_debug.h"
 #include 

[Mesa-dev] [PATCH 8/8] mesa: minor clean-up of some memcpy/sizeof() calls in m_matrix.c

2016-01-04 Thread Brian Paul
---
 src/mesa/math/m_matrix.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/mesa/math/m_matrix.c b/src/mesa/math/m_matrix.c
index 6522200..b3cfcd2 100644
--- a/src/mesa/math/m_matrix.c
+++ b/src/mesa/math/m_matrix.c
@@ -654,7 +654,7 @@ static GLboolean invert_matrix_3d_no_rot( GLmatrix *mat )
if (MAT(in,0,0) == 0 || MAT(in,1,1) == 0 || MAT(in,2,2) == 0 )
   return GL_FALSE;
 
-   memcpy( out, Identity, 16 * sizeof(GLfloat) );
+   memcpy( out, Identity, sizeof(Identity) );
MAT(out,0,0) = 1.0F / MAT(in,0,0);
MAT(out,1,1) = 1.0F / MAT(in,1,1);
MAT(out,2,2) = 1.0F / MAT(in,2,2);
@@ -687,7 +687,7 @@ static GLboolean invert_matrix_2d_no_rot( GLmatrix *mat )
if (MAT(in,0,0) == 0 || MAT(in,1,1) == 0)
   return GL_FALSE;
 
-   memcpy( out, Identity, 16 * sizeof(GLfloat) );
+   memcpy( out, Identity, sizeof(Identity) );
MAT(out,0,0) = 1.0F / MAT(in,0,0);
MAT(out,1,1) = 1.0F / MAT(in,1,1);
 
@@ -709,7 +709,7 @@ static GLboolean invert_matrix_perspective( GLmatrix *mat )
if (MAT(in,2,3) == 0)
   return GL_FALSE;
 
-   memcpy( out, Identity, 16 * sizeof(GLfloat) );
+   memcpy( out, Identity, sizeof(Identity) );
 
MAT(out,0,0) = 1.0F / MAT(in,0,0);
MAT(out,1,1) = 1.0F / MAT(in,1,1);
@@ -802,7 +802,7 @@ _math_matrix_rotate( GLmatrix *mat,
s = sinf( angle * M_PI / 180.0 );
c = cosf( angle * M_PI / 180.0 );
 
-   memcpy(m, Identity, sizeof(GLfloat)*16);
+   memcpy(m, Identity, sizeof(Identity));
optimized = GL_FALSE;
 
 #define M(row,col)  m[col*4+row]
@@ -1136,8 +1136,8 @@ _math_matrix_viewport(GLmatrix *m, const float scale[3],
 void
 _math_matrix_set_identity( GLmatrix *mat )
 {
-   memcpy( mat->m, Identity, 16*sizeof(GLfloat) );
-   memcpy( mat->inv, Identity, 16*sizeof(GLfloat) );
+   memcpy( mat->m, Identity, sizeof(Identity) );
+   memcpy( mat->inv, Identity, sizeof(Identity) );
 
mat->type = MATRIX_IDENTITY;
mat->flags &= ~(MAT_DIRTY_FLAGS|
@@ -1437,7 +1437,7 @@ _math_matrix_is_dirty( const GLmatrix *m )
 void
 _math_matrix_copy( GLmatrix *to, const GLmatrix *from )
 {
-   memcpy( to->m, from->m, sizeof(Identity) );
+   memcpy(to->m, from->m, 16 * sizeof(GLfloat));
memcpy(to->inv, from->inv, 16 * sizeof(GLfloat));
to->flags = from->flags;
to->type = from->type;
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/8] mesa: check for z=0 in _mesa_Vertex3dv()

2016-01-04 Thread Brian Paul
It's very rare that a GL app calls glVertex3dv(), but one in particular
calls it lot, always with Z = 0.  Check for that condition and convert
the call into glVertex2f.  This reduces VBO memory used and reduces
the number of times we have to switch between float[2] and float[3]
vertex formats in the svga driver.  This results in a small but
measurable performance improvement.
---
 src/mesa/main/api_loopback.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/mesa/main/api_loopback.c b/src/mesa/main/api_loopback.c
index a7fd82c..8b63d9c 100644
--- a/src/mesa/main/api_loopback.c
+++ b/src/mesa/main/api_loopback.c
@@ -629,7 +629,10 @@ _mesa_Vertex2sv( const GLshort *v )
 void GLAPIENTRY
 _mesa_Vertex3dv( const GLdouble *v )
 {
-   VERTEX3( (GLfloat) v[0], (GLfloat) v[1], (GLfloat) v[2] );
+   if (v[2] == 0.0)
+  VERTEX2( (GLfloat) v[0], (GLfloat) v[1] );
+   else
+  VERTEX3( (GLfloat) v[0], (GLfloat) v[1], (GLfloat) v[2] );
 }
 
 void GLAPIENTRY
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/8] svga: change svga_hw_view_state::dirty to boolean

2016-01-04 Thread Brian Paul
Since it's a true/false value.
---
 src/gallium/drivers/svga/svga_context.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/svga/svga_context.h 
b/src/gallium/drivers/svga/svga_context.h
index d21b071..e4f29b8 100644
--- a/src/gallium/drivers/svga/svga_context.h
+++ b/src/gallium/drivers/svga/svga_context.h
@@ -314,7 +314,7 @@ struct svga_hw_view_state
struct svga_sampler_view *v;
unsigned min_lod;
unsigned max_lod;
-   int dirty;
+   boolean dirty;
 };
 
 /* Updated by calling svga_update_state( SVGA_STATE_HW_DRAW )
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 7/8] util: add debug_dump_ubyte_rgba_bmp()

2016-01-04 Thread Brian Paul
Like debug_dump_float_rgba_bmp() but takes ubyte values.
---
 src/gallium/auxiliary/util/u_debug.c | 59 
 src/gallium/auxiliary/util/u_debug.h |  4 +++
 2 files changed, 63 insertions(+)

diff --git a/src/gallium/auxiliary/util/u_debug.c 
b/src/gallium/auxiliary/util/u_debug.c
index cb162d8..2b60559 100644
--- a/src/gallium/auxiliary/util/u_debug.c
+++ b/src/gallium/auxiliary/util/u_debug.c
@@ -727,6 +727,65 @@ error1:
;
 }
 
+void
+debug_dump_ubyte_rgba_bmp(const char *filename,
+  unsigned width, unsigned height,
+  const ubyte *rgba, unsigned stride)
+{
+   FILE *stream;
+   struct bmp_file_header bmfh;
+   struct bmp_info_header bmih;
+   unsigned x, y;
+
+   assert(rgba);
+   if(!rgba)
+  goto error1;
+
+   bmfh.bfType = 0x4d42;
+   bmfh.bfSize = 14 + 40 + height*width*4;
+   bmfh.bfReserved1 = 0;
+   bmfh.bfReserved2 = 0;
+   bmfh.bfOffBits = 14 + 40;
+
+   bmih.biSize = 40;
+   bmih.biWidth = width;
+   bmih.biHeight = height;
+   bmih.biPlanes = 1;
+   bmih.biBitCount = 32;
+   bmih.biCompression = 0;
+   bmih.biSizeImage = height*width*4;
+   bmih.biXPelsPerMeter = 0;
+   bmih.biYPelsPerMeter = 0;
+   bmih.biClrUsed = 0;
+   bmih.biClrImportant = 0;
+
+   stream = fopen(filename, "wb");
+   assert(stream);
+   if(!stream)
+  goto error1;
+
+   fwrite(, 14, 1, stream);
+   fwrite(, 40, 1, stream);
+
+   y = height;
+   while(y--) {
+  const ubyte *ptr = rgba + (stride * y * 4);
+  for(x = 0; x < width; ++x)
+  {
+ struct bmp_rgb_quad pixel;
+ pixel.rgbRed   = ptr[x*4 + 0];
+ pixel.rgbGreen = ptr[x*4 + 1];
+ pixel.rgbBlue  = ptr[x*4 + 2];
+ pixel.rgbAlpha = ptr[x*4 + 3];
+ fwrite(, 1, 4, stream);
+  }
+   }
+
+   fclose(stream);
+error1:
+   ;
+}
+
 
 /**
  * Print PIPE_TRANSFER_x flags with a message.
diff --git a/src/gallium/auxiliary/util/u_debug.h 
b/src/gallium/auxiliary/util/u_debug.h
index 34668f8..671bd37 100644
--- a/src/gallium/auxiliary/util/u_debug.h
+++ b/src/gallium/auxiliary/util/u_debug.h
@@ -490,12 +490,16 @@ void debug_dump_transfer_bmp(struct pipe_context *pipe,
 void debug_dump_float_rgba_bmp(const char *filename,
unsigned width, unsigned height,
float *rgba, unsigned stride);
+void debug_dump_ubyte_rgba_bmp(const char *filename,
+   unsigned width, unsigned height,
+   const ubyte *rgba, unsigned stride);
 #else
 #define debug_dump_image(prefix, format, cpp, width, height, stride, data) 
((void)0)
 #define debug_dump_surface(pipe, prefix, surface) ((void)0)
 #define debug_dump_surface_bmp(pipe, filename, surface) ((void)0)
 #define debug_dump_transfer_bmp(filename, transfer, ptr) ((void)0)
 #define debug_dump_float_rgba_bmp(filename, width, height, rgba, stride) 
((void)0)
+#define debug_dump_ubyte_rgba_bmp(filename, width, height, rgba, stride) 
((void)0)
 #endif
 
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/8] svga: fix test for SVGA_NEW_STIPPLE

2016-01-04 Thread Brian Paul
We only want to set the SVGA_NEW_STIPPLE dirty flag when the polygon
stipple state changes.  Before, we only set the flag when we were
enabling stipple, but not disabling.

We don't really have to add SVGA_NEW_STIPPLE to the dirty FS state
set since it's a subset of SVGA_NEW_RAST, but let's be explicit.

This doesn't fix any known bugs.
---
 src/gallium/drivers/svga/svga_pipe_rasterizer.c | 11 +++
 src/gallium/drivers/svga/svga_state_fs.c|  1 +
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/svga/svga_pipe_rasterizer.c 
b/src/gallium/drivers/svga/svga_pipe_rasterizer.c
index fa1744f..8e0db53 100644
--- a/src/gallium/drivers/svga/svga_pipe_rasterizer.c
+++ b/src/gallium/drivers/svga/svga_pipe_rasterizer.c
@@ -368,13 +368,16 @@ static void svga_bind_rasterizer_state( struct 
pipe_context *pipe,
struct svga_context *svga = svga_context(pipe);
struct svga_rasterizer_state *raster = (struct svga_rasterizer_state 
*)state;
 
+   if (!raster ||
+   !svga->curr.rast ||
+   raster->templ.poly_stipple_enable !=
+   svga->curr.rast->templ.poly_stipple_enable) {
+  svga->dirty |= SVGA_NEW_STIPPLE;
+   }
+
svga->curr.rast = raster;
 
svga->dirty |= SVGA_NEW_RAST;
-
-   if (raster && raster->templ.poly_stipple_enable) {
-  svga->dirty |= SVGA_NEW_STIPPLE;
-   }
 }
 
 static void
diff --git a/src/gallium/drivers/svga/svga_state_fs.c 
b/src/gallium/drivers/svga/svga_state_fs.c
index e392778..bac9166 100644
--- a/src/gallium/drivers/svga/svga_state_fs.c
+++ b/src/gallium/drivers/svga/svga_state_fs.c
@@ -452,6 +452,7 @@ struct svga_tracked_state svga_hw_fs =
 SVGA_NEW_TEXTURE_BINDING |
 SVGA_NEW_NEED_SWTNL |
 SVGA_NEW_RAST |
+SVGA_NEW_STIPPLE |
 SVGA_NEW_REDUCED_PRIMITIVE |
 SVGA_NEW_SAMPLER |
 SVGA_NEW_FRAME_BUFFER |
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] i965/wm: use binding size for ubo/ssbo when automatic size is unset

2016-01-04 Thread Ilia Mirkin
Signed-off-by: Ilia Mirkin 
Cc: Samuel Iglesias Gonsálvez 
Cc: Marta Lofstedt 
---

I have not tested whether this fixes your CTS tests (as I don't have access
to them). But it should... and if it doesn't, it'll be a variation of the
logic used here.

 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 14 ++
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 76dc577..5ab2f7f 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -946,12 +946,15 @@ brw_upload_ubo_surfaces(struct brw_context *brw,
   } else {
  struct intel_buffer_object *intel_bo =
 intel_buffer_object(binding->BufferObject);
+ GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
+ if (!binding->AutomaticSize)
+size = MIN2(size, binding->Size);
  drm_intel_bo *bo =
 intel_bufferobj_buffer(brw, intel_bo,
binding->Offset,
-   binding->BufferObject->Size - 
binding->Offset);
+   size);
  brw_create_constant_surface(brw, bo, binding->Offset,
- binding->BufferObject->Size - 
binding->Offset,
+ size,
  _surf_offsets[i]);
   }
}
@@ -968,12 +971,15 @@ brw_upload_ubo_surfaces(struct brw_context *brw,
   } else {
  struct intel_buffer_object *intel_bo =
 intel_buffer_object(binding->BufferObject);
+ GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
+ if (!binding->AutomaticSize)
+size = MIN2(size, binding->Size);
  drm_intel_bo *bo =
 intel_bufferobj_buffer(brw, intel_bo,
binding->Offset,
-   binding->BufferObject->Size - 
binding->Offset);
+   size);
  brw_create_buffer_surface(brw, bo, binding->Offset,
-   binding->BufferObject->Size - 
binding->Offset,
+   size,
_surf_offsets[i]);
   }
}
-- 
2.4.10

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] Revert "i965/wm: use proper API buffer size for the surfaces."

2016-01-04 Thread Ilia Mirkin
This reverts commit 8cf2e892fca20c4776b4a07c39918343cb2d4e0e. It's
entirely bogus to attempt to store anything about the binding the buffer
object itself, which might be bound any number of times.

Signed-off-by: Ilia Mirkin 
Cc: Samuel Iglesias Gonsálvez 
Cc: Marta Lofstedt 
---
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 12 
 src/mesa/drivers/dri/i965/intel_buffer_objects.c |  1 -
 src/mesa/main/bufferobj.c|  4 +---
 src/mesa/main/mtypes.h   |  1 -
 4 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 7da4a40..76dc577 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -946,14 +946,12 @@ brw_upload_ubo_surfaces(struct brw_context *brw,
   } else {
  struct intel_buffer_object *intel_bo =
 intel_buffer_object(binding->BufferObject);
- GLsizeiptrARB size = MIN2(binding->BufferObject->BufferRangeSize,
-   binding->BufferObject->Size - 
binding->Offset);
  drm_intel_bo *bo =
 intel_bufferobj_buffer(brw, intel_bo,
binding->Offset,
-   size);
+   binding->BufferObject->Size - 
binding->Offset);
  brw_create_constant_surface(brw, bo, binding->Offset,
- size,
+ binding->BufferObject->Size - 
binding->Offset,
  _surf_offsets[i]);
   }
}
@@ -970,14 +968,12 @@ brw_upload_ubo_surfaces(struct brw_context *brw,
   } else {
  struct intel_buffer_object *intel_bo =
 intel_buffer_object(binding->BufferObject);
- GLsizeiptrARB size = MIN2(binding->BufferObject->BufferRangeSize,
-   binding->BufferObject->Size - 
binding->Offset);
  drm_intel_bo *bo =
 intel_bufferobj_buffer(brw, intel_bo,
binding->Offset,
-   size);
+   binding->BufferObject->Size - 
binding->Offset);
  brw_create_buffer_surface(brw, bo, binding->Offset,
-   size,
+   binding->BufferObject->Size - 
binding->Offset,
_surf_offsets[i]);
   }
}
diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c 
b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
index b26c939..7a5b3fc 100644
--- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
@@ -198,7 +198,6 @@ brw_buffer_data(struct gl_context *ctx,
(void) target;
 
intel_obj->Base.Size = size;
-   intel_obj->Base.BufferRangeSize = size;
intel_obj->Base.Usage = usage;
intel_obj->Base.StorageFlags = storageFlags;
 
diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index 774d1d5..fef84e0 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -43,7 +43,7 @@
 #include "glformats.h"
 #include "texstore.h"
 #include "transformfeedback.h"
-#include "macros.h"
+
 
 /* Debug flags */
 /*#define VBO_DEBUG*/
@@ -2855,7 +2855,6 @@ bind_buffer_range_uniform_buffer(struct gl_context *ctx,
 
_mesa_reference_buffer_object(ctx, >UniformBuffer, bufObj);
bind_uniform_buffer(ctx, index, bufObj, offset, size, GL_FALSE);
-   bufObj->BufferRangeSize = MIN2(size, bufObj->Size);
 }
 
 /**
@@ -2891,7 +2890,6 @@ bind_buffer_range_shader_storage_buffer(struct gl_context 
*ctx,
 
_mesa_reference_buffer_object(ctx, >ShaderStorageBuffer, bufObj);
bind_shader_storage_buffer(ctx, index, bufObj, offset, size, GL_FALSE);
-   bufObj->BufferRangeSize = MIN2(size, bufObj->Size);
 }
 
 /**
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index a03a2c1..dd52368 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1268,7 +1268,6 @@ struct gl_buffer_object
GLenum Usage;/**< GL_STREAM_DRAW_ARB, GL_STREAM_READ_ARB, etc. */
GLbitfield StorageFlags; /**< GL_MAP_PERSISTENT_BIT, etc. */
GLsizeiptrARB Size;  /**< Size of buffer storage in bytes */
-   GLsizeiptrARB BufferRangeSize;  /**< Amount of data that can be read from 
or written to, in bytes */
GLubyte *Data;   /**< Location of storage either in RAM or VRAM. */
GLboolean DeletePending;   /**< true if buffer object is removed from the 
hash */
GLboolean Written;   /**< Ever written to? (for debugging) */
-- 
2.4.10

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/4] Actual ARB_multi_draw_indirect support

2016-01-04 Thread Ilia Mirkin
On Mon, Jan 4, 2016 at 10:07 PM, Ian Romanick  wrote:
> On 12/31/2015 11:55 AM, Ilia Mirkin wrote:
>> The current support in mesa for ARB_multi_draw_indirect is pretty
>> shameful... it just does N separate draws. Instead make it possible
>> for drivers to handle this in a more efficient manner. Also create the
>> backend interfaces necessary to support ARB_indirect_parameters,
>> although the (trivial) frontend bits are not part of this series.
>>
>> Of course right now st/mesa will still break up the multidraw into N
>> draws since no backends actually support this. And there are no
>> piglits (other than one in images) which actually make use of
>> glMultiDraw*Indirect... oh well.
>
> Are there any known applications that use this feature?

Maybe? My motivation to fix this was that the existing thing was
dirty, and I also wanted to do ARB_indirect_parameters (not that any
applications use *that* either).

Supertuxkart will use it if you have a bunch of stuff which mesa doesn't have:

https://github.com/supertuxkart/stk-code/blob/9e6cf8ee9180fb3cfd4524bef0990a8675b1cd2f/src/graphics/render_geometry.cpp#L1165

The condition for this getting triggered, from what I can tell, is
"isAZDOEnabled", which only is enabled if you have bindless textures
(and multi draw obviously).

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/4] vbo: create a new draw function interface for indirect draws

2016-01-04 Thread Ian Romanick
On 12/31/2015 11:55 AM, Ilia Mirkin wrote:
> This is optional for now in the transition period, but optimally all
> backends that support indirect draws would switch over to it and we can
> remove the support for indirect in the "regular" draw function.
> 
> This should allow a backend to properly support ARB_multi_draw_indirect
> and ARB_indirect_parameters.
> 
> Signed-off-by: Ilia Mirkin 
> ---
>  src/mesa/vbo/vbo.h|  15 
>  src/mesa/vbo/vbo_context.c|   7 ++
>  src/mesa/vbo/vbo_context.h|   6 ++
>  src/mesa/vbo/vbo_exec_array.c | 166 
> +-
>  4 files changed, 127 insertions(+), 67 deletions(-)
> 
> diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h
> index cef3b8c..0c63bf3 100644
> --- a/src/mesa/vbo/vbo.h
> +++ b/src/mesa/vbo/vbo.h
> @@ -110,6 +110,18 @@ typedef void (*vbo_draw_func)( struct gl_context *ctx,
>  struct gl_buffer_object *indirect);
>  
>  
> +typedef void (*vbo_indirect_draw_func)(
> +   struct gl_context *ctx,
> +   GLuint mode,
> +   struct gl_buffer_object *indirect_data,
> +   GLsizeiptr indirect_offset,
> +   unsigned draw_count,
> +   unsigned stride,
> +   struct gl_buffer_object *indirect_params,
> +   GLsizeiptr indirect_params_offset,
> +   const struct _mesa_index_buffer *ib);
> +
> +
>  
>  
>  /* Utility function to cope with various constraints on tnl modules or
> @@ -179,6 +191,9 @@ void vbo_always_unmap_buffers(struct gl_context *ctx);
>  
>  void vbo_set_draw_func(struct gl_context *ctx, vbo_draw_func func);
>  
> +void vbo_set_indirect_draw_func(struct gl_context *ctx,
> +vbo_indirect_draw_func func);
> +
>  void vbo_check_buffers_are_unmapped(struct gl_context *ctx);
>  
>  void vbo_bind_arrays(struct gl_context *ctx);
> diff --git a/src/mesa/vbo/vbo_context.c b/src/mesa/vbo/vbo_context.c
> index 5e1a760..8a196e7 100644
> --- a/src/mesa/vbo/vbo_context.c
> +++ b/src/mesa/vbo/vbo_context.c
> @@ -223,3 +223,10 @@ void vbo_set_draw_func(struct gl_context *ctx, 
> vbo_draw_func func)
> vbo->draw_prims = func;
>  }
>  
> +
> +void vbo_set_indirect_draw_func(struct gl_context *ctx,
> +vbo_indirect_draw_func func)
> +{
> +   struct vbo_context *vbo = vbo_context(ctx);
> +   vbo->draw_indirect_prims = func;
> +}
> diff --git a/src/mesa/vbo/vbo_context.h b/src/mesa/vbo/vbo_context.h
> index 6293a8b..11f9b17 100644
> --- a/src/mesa/vbo/vbo_context.h
> +++ b/src/mesa/vbo/vbo_context.h
> @@ -76,6 +76,12 @@ struct vbo_context {
>  * is responsible for initiating any fallback actions required:
>  */
> vbo_draw_func draw_prims;
> +
> +   /* Optional callback for indirect draws. This allows multidraws to not be
> +* broken up, as well as for the actual count to be passed in as a 
> separate
> +* indirect parameter.
> +*/
> +   vbo_indirect_draw_func draw_indirect_prims;
>  };
>  
>  
> diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
> index 502b288..3da2d19 100644
> --- a/src/mesa/vbo/vbo_exec_array.c
> +++ b/src/mesa/vbo/vbo_exec_array.c
> @@ -1550,23 +1550,30 @@ vbo_validated_drawarraysindirect(struct gl_context 
> *ctx,
>  
> vbo_bind_arrays(ctx);
>  
> -   memset(prim, 0, sizeof(prim));
> -   prim[0].begin = 1;
> -   prim[0].end = 1;
> -   prim[0].mode = mode;
> -   prim[0].is_indirect = 1;
> -   prim[0].indirect_offset = (GLsizeiptr)indirect;
> -
> -   /* NOTE: We do NOT want to handle primitive restart here, nor perform any
> -* other checks that require knowledge of the values in the command 
> buffer.
> -* That would defeat the whole purpose of this function.
> -*/
> +   if (vbo->draw_indirect_prims) {

My expectation is that we'll want to transition quickly to whatever new
interface we decide upon.  Would it be possible to make the existing
loops over vbo->draw_prims be a separate function that is the default
implementation of vbo->draw_indirect_prims?  At the very least, that
would make the callers a bit cleaner.  We also wouldn't punish "proper"
implementations with the extra test, and the indirection ought to
provide more incentive to "improper" implementations to get on it. :)

I haven't looked very closely at the actual interface yet.  Most of my
day back at work was going through e-mail, and going to buy a new
keyboard with a "D" key that isn't all worn out. :)

> +  vbo->draw_indirect_prims(ctx, mode,
> +   ctx->DrawIndirectBuffer, (GLsizeiptr)indirect,
> +   1 /* primcount */, 0 /* stride */,
> +   NULL, 0, NULL);
> +   } else {
> +  memset(prim, 0, sizeof(prim));
> +  prim[0].begin = 1;
> +  prim[0].end = 1;
> +  prim[0].mode = mode;
> +  prim[0].is_indirect = 1;
> +  prim[0].indirect_offset = (GLsizeiptr)indirect;
> +
> +  /* NOTE: We do NOT want to handle primitive restart here, nor perform
> +   * 

Re: [Mesa-dev] [PATCH 1/6] gallium: document PK2H/UP2H

2016-01-04 Thread Ilia Mirkin
On Mon, Jan 4, 2016 at 8:32 PM, Ian Romanick  wrote:
> On 01/03/2016 10:02 AM, Ilia Mirkin wrote:
>> On Sun, Jan 3, 2016 at 12:33 PM, Roland Scheidegger  
>> wrote:
>>> Am 03.01.2016 um 01:37 schrieb Ilia Mirkin:
 Signed-off-by: Ilia Mirkin 
 ---
  src/gallium/docs/source/tgsi.rst | 10 --
  1 file changed, 8 insertions(+), 2 deletions(-)

 diff --git a/src/gallium/docs/source/tgsi.rst 
 b/src/gallium/docs/source/tgsi.rst
 index 955ece8..f69998f 100644
 --- a/src/gallium/docs/source/tgsi.rst
 +++ b/src/gallium/docs/source/tgsi.rst
 @@ -458,7 +458,9 @@ while DDY is allowed to be the same for the entire 2x2 
 quad.

  .. opcode:: PK2H - Pack Two 16-bit Floats

 -  TBD
 +.. math::
 +
 +  dst.x = f32\_to\_f16(src.x) | f32\_to\_f16(src.y) << 16
>>> This doesn't quite match the tgsi info description (which says that the
>>> result is
>>> replicated). If you don't want channel replication probably should make
>>> that CHAN
>>> there instead.
>>
>> I'll add the replication to the docs. Looks like NV_fragment_program
>> also wanted this:
>>
>>   tmp0 = VectorLoad(op0);
>>   /* result obtained by combining raw bits of tmp0.x, tmp0.y */
>>   result.x = RawBits(tmp0.x) | (RawBits(tmp0.y) << 16);
>>   result.y = RawBits(tmp0.x) | (RawBits(tmp0.y) << 16);
>>   result.z = RawBits(tmp0.x) | (RawBits(tmp0.y) << 16);
>>   result.w = RawBits(tmp0.x) | (RawBits(tmp0.y) << 16);
>>
>> But looks like it's just packing, not actually converting. And it's
>> unclear whether UP2H is converting or not... let's assume that they do
>> the conversions or else this is going to be useless.
>
> I thought that NV_fragment_program had fp16 as a native type.  You did
> something like suffix the instruction with H.  In that case, the pack
> instruction wouldn't need to convert.  Right?

Ah that explains why the spec docs don't talk about conversions.
Anyways, we're not going to be adding half-width temp registers to
TGSI until there's a more compelling use-case, until then I think the
current docs that I pushed yesterday can probably stand :)

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] vbo: create a new draw function interface for indirect draws

2016-01-04 Thread Ilia Mirkin
All indirect draws are passed to the new draw function. By default
there's a fallback implementation which pipes it right back to
draw_prims, but eventually both the fallback and draw_prim's support for
indirect drawing should be removed.

This should allow a backend to properly support ARB_multi_draw_indirect
and ARB_indirect_parameters.

Signed-off-by: Ilia Mirkin 
Acked-by: Marek Olšák 
---

v1 -> v2: Per Ian's suggestion, add a fallback implementation so that the
  main paths are nice and clean. The fallback implementation of
  draw_indirect_prims just creates a separate _mesa_prim for each indirect
  draw as before and calls draw_prims.

 src/mesa/vbo/vbo.h| 15 +++
 src/mesa/vbo/vbo_context.c| 50 +++
 src/mesa/vbo/vbo_context.h|  6 +++
 src/mesa/vbo/vbo_exec_array.c | 93 +--
 4 files changed, 89 insertions(+), 75 deletions(-)

diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h
index cef3b8c..0c63bf3 100644
--- a/src/mesa/vbo/vbo.h
+++ b/src/mesa/vbo/vbo.h
@@ -110,6 +110,18 @@ typedef void (*vbo_draw_func)( struct gl_context *ctx,
   struct gl_buffer_object *indirect);
 
 
+typedef void (*vbo_indirect_draw_func)(
+   struct gl_context *ctx,
+   GLuint mode,
+   struct gl_buffer_object *indirect_data,
+   GLsizeiptr indirect_offset,
+   unsigned draw_count,
+   unsigned stride,
+   struct gl_buffer_object *indirect_params,
+   GLsizeiptr indirect_params_offset,
+   const struct _mesa_index_buffer *ib);
+
+
 
 
 /* Utility function to cope with various constraints on tnl modules or
@@ -179,6 +191,9 @@ void vbo_always_unmap_buffers(struct gl_context *ctx);
 
 void vbo_set_draw_func(struct gl_context *ctx, vbo_draw_func func);
 
+void vbo_set_indirect_draw_func(struct gl_context *ctx,
+vbo_indirect_draw_func func);
+
 void vbo_check_buffers_are_unmapped(struct gl_context *ctx);
 
 void vbo_bind_arrays(struct gl_context *ctx);
diff --git a/src/mesa/vbo/vbo_context.c b/src/mesa/vbo/vbo_context.c
index 5e1a760..c29f72b 100644
--- a/src/mesa/vbo/vbo_context.c
+++ b/src/mesa/vbo/vbo_context.c
@@ -135,6 +135,48 @@ static void init_mat_currval(struct gl_context *ctx)
}
 }
 
+static void
+vbo_draw_indirect_prims(struct gl_context *ctx,
+GLuint mode,
+struct gl_buffer_object *indirect_data,
+GLsizeiptr indirect_offset,
+unsigned draw_count,
+unsigned stride,
+struct gl_buffer_object *indirect_params,
+GLsizeiptr indirect_params_offset,
+const struct _mesa_index_buffer *ib)
+{
+   struct vbo_context *vbo = vbo_context(ctx);
+   struct _mesa_prim *prim;
+   GLsizei i;
+
+   prim = calloc(draw_count, sizeof(*prim));
+   if (prim == NULL) {
+  _mesa_error(ctx, GL_OUT_OF_MEMORY, "gl%sDraw%sIndirect%s",
+  (draw_count > 1) ? "Multi" : "",
+  ib ? "Elements" : "Arrays",
+  indirect_params ? "CountARB" : "");
+  return;
+   }
+
+   prim[0].begin = 1;
+   prim[draw_count - 1].end = 1;
+   for (i = 0; i < draw_count; ++i, indirect_offset += stride) {
+  prim[i].mode = mode;
+  prim[i].indexed = !!ib;
+  prim[i].indirect_offset = indirect_offset;
+  prim[i].is_indirect = 1;
+  prim[i].draw_id = i;
+   }
+
+   vbo->draw_prims(ctx, prim, draw_count,
+   ib, GL_TRUE, 0, ~0,
+   NULL, 0,
+   ctx->DrawIndirectBuffer);
+
+   free(prim);
+}
+
 
 GLboolean _vbo_CreateContext( struct gl_context *ctx )
 {
@@ -152,6 +194,7 @@ GLboolean _vbo_CreateContext( struct gl_context *ctx )
init_legacy_currval( ctx );
init_generic_currval( ctx );
init_mat_currval( ctx );
+   vbo_set_indirect_draw_func(ctx, vbo_draw_indirect_prims);
 
/* Build mappings from VERT_ATTRIB -> VBO_ATTRIB depending on type
 * of vertex program active.
@@ -223,3 +266,10 @@ void vbo_set_draw_func(struct gl_context *ctx, 
vbo_draw_func func)
vbo->draw_prims = func;
 }
 
+
+void vbo_set_indirect_draw_func(struct gl_context *ctx,
+vbo_indirect_draw_func func)
+{
+   struct vbo_context *vbo = vbo_context(ctx);
+   vbo->draw_indirect_prims = func;
+}
diff --git a/src/mesa/vbo/vbo_context.h b/src/mesa/vbo/vbo_context.h
index 6293a8b..11f9b17 100644
--- a/src/mesa/vbo/vbo_context.h
+++ b/src/mesa/vbo/vbo_context.h
@@ -76,6 +76,12 @@ struct vbo_context {
 * is responsible for initiating any fallback actions required:
 */
vbo_draw_func draw_prims;
+
+   /* Optional callback for indirect draws. This allows multidraws to not be
+* broken up, as well as for the actual count to be passed in as a separate
+* indirect parameter.
+*/
+   vbo_indirect_draw_func draw_indirect_prims;
 };
 
 
diff --git 

[Mesa-dev] [PATCH 2/8] svga: avoid emitting redundant SetVertexBuffers() commands

2016-01-04 Thread Brian Paul
---
 src/gallium/drivers/svga/svga_context.h |  5 +
 src/gallium/drivers/svga/svga_draw.c| 26 +-
 2 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/svga/svga_context.h 
b/src/gallium/drivers/svga/svga_context.h
index c282932..d21b071 100644
--- a/src/gallium/drivers/svga/svga_context.h
+++ b/src/gallium/drivers/svga/svga_context.h
@@ -345,6 +345,11 @@ struct svga_hw_draw_state
SVGA3dElementLayoutId layout_id;
SVGA3dPrimitiveType topology;
 
+   /** Vertex buffer state */
+   SVGA3dVertexBuffer vbuffers[PIPE_MAX_ATTRIBS];
+   struct svga_winsys_surface *vbuffer_handles[PIPE_MAX_ATTRIBS];
+   unsigned num_vbuffers;
+
struct svga_winsys_surface *ib;  /**< index buffer for drawing */
SVGA3dSurfaceFormat ib_format;
unsigned ib_offset;
diff --git a/src/gallium/drivers/svga/svga_draw.c 
b/src/gallium/drivers/svga/svga_draw.c
index 2d3631d..d4c9914 100644
--- a/src/gallium/drivers/svga/svga_draw.c
+++ b/src/gallium/drivers/svga/svga_draw.c
@@ -517,11 +517,27 @@ draw_vgpu10(struct svga_hwtnl *hwtnl,
  buffers[i].offset = hwtnl->cmd.vbufs[i].buffer_offset;
   }
   if (vbuf_count > 0) {
- ret = SVGA3D_vgpu10_SetVertexBuffers(svga->swc, vbuf_count,
-  0,/* startBuffer */
-  buffers, vb_handle);
- if (ret != PIPE_OK)
-return ret;
+ /* If we haven't yet emitted a drawing command or if any
+  * vertex buffer state is changing, issue that state now.
+  */
+ if (((hwtnl->cmd.swc->hints & SVGA_HINT_FLAG_DRAW_EMITTED) == 0) ||
+ vbuf_count != svga->state.hw_draw.num_vbuffers ||
+ memcmp(buffers, svga->state.hw_draw.vbuffers,
+vbuf_count * sizeof(buffers[0])) ||
+ memcmp(vb_handle, svga->state.hw_draw.vbuffer_handles,
+vbuf_count * sizeof(vb_handle[0]))) {
+ret = SVGA3D_vgpu10_SetVertexBuffers(svga->swc, vbuf_count,
+ 0,/* startBuffer */
+ buffers, vb_handle);
+if (ret != PIPE_OK)
+   return ret;
+
+svga->state.hw_draw.num_vbuffers = vbuf_count;
+memcpy(svga->state.hw_draw.vbuffers, buffers,
+   vbuf_count * sizeof(buffers[0]));
+memcpy(svga->state.hw_draw.vbuffer_handles, vb_handle,
+   vbuf_count * sizeof(vb_handle[0]));
+ }
   }
}
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/8] svga: check for no-ops in svga_bind_sampler_states()

2016-01-04 Thread Brian Paul
and svga_set_sampler_views().  If there's no change, return early
and don't set a SVGA_NEW_x dirty state flag.
---
 src/gallium/drivers/svga/svga_pipe_sampler.c | 16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/svga/svga_pipe_sampler.c 
b/src/gallium/drivers/svga/svga_pipe_sampler.c
index 9524117..3e778f0 100644
--- a/src/gallium/drivers/svga/svga_pipe_sampler.c
+++ b/src/gallium/drivers/svga/svga_pipe_sampler.c
@@ -287,6 +287,7 @@ svga_bind_sampler_states(struct pipe_context *pipe,
 {
struct svga_context *svga = svga_context(pipe);
unsigned i;
+   boolean any_change = FALSE;
 
assert(shader < PIPE_SHADER_TYPES);
assert(start + num <= PIPE_MAX_SAMPLERS);
@@ -295,8 +296,15 @@ svga_bind_sampler_states(struct pipe_context *pipe,
if (!svga_have_vgpu10(svga) && shader != PIPE_SHADER_FRAGMENT)
   return;
 
-   for (i = 0; i < num; i++)
+   for (i = 0; i < num; i++) {
+  if (svga->curr.sampler[shader][start + i] != samplers[i])
+ any_change = TRUE;
   svga->curr.sampler[shader][start + i] = samplers[i];
+   }
+
+   if (!any_change) {
+  return;
+   }
 
/* find highest non-null sampler[] entry */
{
@@ -405,6 +413,7 @@ svga_set_sampler_views(struct pipe_context *pipe,
unsigned flag_1d = 0;
unsigned flag_srgb = 0;
uint i;
+   boolean any_change = FALSE;
 
assert(shader < PIPE_SHADER_TYPES);
assert(start + num <= Elements(svga->curr.sampler_views[shader]));
@@ -422,6 +431,7 @@ svga_set_sampler_views(struct pipe_context *pipe,
  pipe_sampler_view_release(pipe, 
>curr.sampler_views[shader][start + i]);
  pipe_sampler_view_reference(>curr.sampler_views[shader][start + 
i],
  views[i]);
+ any_change = TRUE;
   }
 
   if (!views[i])
@@ -434,6 +444,10 @@ svga_set_sampler_views(struct pipe_context *pipe,
  flag_1d |= 1 << (start + i);
}
 
+   if (!any_change) {
+  return;
+   }
+
/* find highest non-null sampler_views[] entry */
{
   unsigned j = MAX2(svga->curr.num_sampler_views[shader], start + num);
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/8] svga: add some comments in svga_state_vs.c

2016-01-04 Thread Brian Paul
---
 src/gallium/drivers/svga/svga_state_vs.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/gallium/drivers/svga/svga_state_vs.c 
b/src/gallium/drivers/svga/svga_state_vs.c
index 24574c1..a103dab 100644
--- a/src/gallium/drivers/svga/svga_state_vs.c
+++ b/src/gallium/drivers/svga/svga_state_vs.c
@@ -173,8 +173,11 @@ make_vs_key(struct svga_context *svga, struct 
svga_compile_key *key)
   return;
}
 
+   /* SVGA_NEW_PRESCALE */
key->vs.need_prescale = svga->state.hw_clear.prescale.enabled &&
(svga->curr.gs == NULL);
+
+   /* SVGA_NEW_RAST */
key->vs.allow_psiz = svga->curr.rast->templ.point_size_per_vertex;
 
/* SVGA_NEW_FS */
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH shader-db] si-report: Track max waves per CU

2016-01-04 Thread Tom Stellard
---
 si-report.py | 56 ++--
 1 file changed, 54 insertions(+), 2 deletions(-)

diff --git a/si-report.py b/si-report.py
index ec88112..e717af0 100755
--- a/si-report.py
+++ b/si-report.py
@@ -65,6 +65,12 @@ def get_scratch_str(value, suffixes = True):
 suffix = 'bytes per wave'
 return get_value_str(value, 'Scratch', suffix)
 
+def get_waves_per_cu_str(value, suffixes = True):
+suffix = ''
+if suffixes:
+suffix = 'waves'
+return get_value_str(value, 'Max Waves / CU', suffix)
+
 def calculate_percent_change(b, a):
 if b == 0:
 return 0
@@ -89,15 +95,17 @@ class si_stats:
 self.code_size = 0
 self.lds = 0
 self.scratch = 0
+self.max_waves_per_cu = 0
 
 
 def to_string(self, suffixes = True):
-return "{}{}{}{}{}".format(
+return "{}{}{}{}{}{}".format(
 get_sgpr_str(self.sgprs, suffixes),
 get_vgpr_str(self.vgprs, suffixes),
 get_code_size_str(self.code_size, suffixes),
 get_lds_str(self.lds, suffixes),
-get_scratch_str(self.scratch, suffixes))
+get_scratch_str(self.scratch, suffixes),
+get_waves_per_cu_str(self.max_waves_per_cu, suffixes))
 
 
 def __str__(self):
@@ -109,6 +117,7 @@ class si_stats:
 self.code_size += other.code_size
 self.lds += other.lds
 self.scratch += other.scratch
+self.max_waves_per_cu += other.max_waves_per_cu
 
 def update(self, comp, cmp_fn):
 for name in self.__dict__.keys():
@@ -153,6 +162,48 @@ class si_stats:
 return False
 return True
 
+#TODO: Handle VI+ and take LDS into account.
+def compute_max_waves_per_cu(sgprs, vgprs):
+sgpr_waves = 10
+if sgprs <= 48:
+sgpr_waves = 10
+elif sgprs <= 56:
+sgpr_waves = 9
+elif sgprs <= 64:
+sgpr_waves = 8
+elif sgprs <= 72:
+sgpr_waves = 7
+elif sgprs <= 80:
+sgpr_waves = 6
+elif sgprs <= 96:
+sgpr_waves = 5
+else:
+sgpr_waves = 4
+
+vgpr_waves = 10
+if vgprs <= 24:
+vgpr_waves = 10
+elif vgprs <= 28:
+vgpr_waves = 9
+elif vgprs <= 32:
+vgpr_waves = 8
+elif vgprs <= 36:
+vgpr_waves = 7
+elif vgprs <= 40:
+vgpr_waves = 6
+elif vgprs <= 48:
+vgpr_waves = 5
+elif vgprs <= 64:
+vgpr_waves = 4
+elif vgprs <= 84:
+vgpr_waves = 3
+elif vgprs <= 128:
+vgpr_waves = 2
+else:
+vgpr_waves = 1
+
+return min(sgpr_waves, vgpr_waves)
+
 def get_results(filename):
 file = open(filename, "r")
 lines = file.read().split('\n')
@@ -199,6 +250,7 @@ def get_results(filename):
 current_stats.scratch = int(match.groups()[0])
 continue
 
+current_stats.max_waves_per_cu = 
compute_max_waves_per_cu(current_stats.sgprs, current_stats.vgprs)
 match = re.search(re_end, line)
 if match:
 results.append(current_stats)
-- 
2.0.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/4] vbo: create a new draw function interface for indirect draws

2016-01-04 Thread Ilia Mirkin
On Mon, Jan 4, 2016 at 10:10 PM, Ian Romanick  wrote:
> On 12/31/2015 11:55 AM, Ilia Mirkin wrote:
>> This is optional for now in the transition period, but optimally all
>> backends that support indirect draws would switch over to it and we can
>> remove the support for indirect in the "regular" draw function.
>>
>> This should allow a backend to properly support ARB_multi_draw_indirect
>> and ARB_indirect_parameters.
>>
>> Signed-off-by: Ilia Mirkin 
>> ---
>>  src/mesa/vbo/vbo.h|  15 
>>  src/mesa/vbo/vbo_context.c|   7 ++
>>  src/mesa/vbo/vbo_context.h|   6 ++
>>  src/mesa/vbo/vbo_exec_array.c | 166 
>> +-
>>  4 files changed, 127 insertions(+), 67 deletions(-)
>>
>> diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h
>> index cef3b8c..0c63bf3 100644
>> --- a/src/mesa/vbo/vbo.h
>> +++ b/src/mesa/vbo/vbo.h
>> @@ -110,6 +110,18 @@ typedef void (*vbo_draw_func)( struct gl_context *ctx,
>>  struct gl_buffer_object *indirect);
>>
>>
>> +typedef void (*vbo_indirect_draw_func)(
>> +   struct gl_context *ctx,
>> +   GLuint mode,
>> +   struct gl_buffer_object *indirect_data,
>> +   GLsizeiptr indirect_offset,
>> +   unsigned draw_count,
>> +   unsigned stride,
>> +   struct gl_buffer_object *indirect_params,
>> +   GLsizeiptr indirect_params_offset,
>> +   const struct _mesa_index_buffer *ib);
>> +
>> +
>>
>>
>>  /* Utility function to cope with various constraints on tnl modules or
>> @@ -179,6 +191,9 @@ void vbo_always_unmap_buffers(struct gl_context *ctx);
>>
>>  void vbo_set_draw_func(struct gl_context *ctx, vbo_draw_func func);
>>
>> +void vbo_set_indirect_draw_func(struct gl_context *ctx,
>> +vbo_indirect_draw_func func);
>> +
>>  void vbo_check_buffers_are_unmapped(struct gl_context *ctx);
>>
>>  void vbo_bind_arrays(struct gl_context *ctx);
>> diff --git a/src/mesa/vbo/vbo_context.c b/src/mesa/vbo/vbo_context.c
>> index 5e1a760..8a196e7 100644
>> --- a/src/mesa/vbo/vbo_context.c
>> +++ b/src/mesa/vbo/vbo_context.c
>> @@ -223,3 +223,10 @@ void vbo_set_draw_func(struct gl_context *ctx, 
>> vbo_draw_func func)
>> vbo->draw_prims = func;
>>  }
>>
>> +
>> +void vbo_set_indirect_draw_func(struct gl_context *ctx,
>> +vbo_indirect_draw_func func)
>> +{
>> +   struct vbo_context *vbo = vbo_context(ctx);
>> +   vbo->draw_indirect_prims = func;
>> +}
>> diff --git a/src/mesa/vbo/vbo_context.h b/src/mesa/vbo/vbo_context.h
>> index 6293a8b..11f9b17 100644
>> --- a/src/mesa/vbo/vbo_context.h
>> +++ b/src/mesa/vbo/vbo_context.h
>> @@ -76,6 +76,12 @@ struct vbo_context {
>>  * is responsible for initiating any fallback actions required:
>>  */
>> vbo_draw_func draw_prims;
>> +
>> +   /* Optional callback for indirect draws. This allows multidraws to not be
>> +* broken up, as well as for the actual count to be passed in as a 
>> separate
>> +* indirect parameter.
>> +*/
>> +   vbo_indirect_draw_func draw_indirect_prims;
>>  };
>>
>>
>> diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
>> index 502b288..3da2d19 100644
>> --- a/src/mesa/vbo/vbo_exec_array.c
>> +++ b/src/mesa/vbo/vbo_exec_array.c
>> @@ -1550,23 +1550,30 @@ vbo_validated_drawarraysindirect(struct gl_context 
>> *ctx,
>>
>> vbo_bind_arrays(ctx);
>>
>> -   memset(prim, 0, sizeof(prim));
>> -   prim[0].begin = 1;
>> -   prim[0].end = 1;
>> -   prim[0].mode = mode;
>> -   prim[0].is_indirect = 1;
>> -   prim[0].indirect_offset = (GLsizeiptr)indirect;
>> -
>> -   /* NOTE: We do NOT want to handle primitive restart here, nor perform any
>> -* other checks that require knowledge of the values in the command 
>> buffer.
>> -* That would defeat the whole purpose of this function.
>> -*/
>> +   if (vbo->draw_indirect_prims) {
>
> My expectation is that we'll want to transition quickly to whatever new
> interface we decide upon.  Would it be possible to make the existing

Yes, I just didn't want to be on the hook for converting/testing i965.
I think that st/mesa and i965 are going to be the only two users of
this interface.

> loops over vbo->draw_prims be a separate function that is the default
> implementation of vbo->draw_indirect_prims?  At the very least, that

That's... a very good idea :) I'll go and do that shortly and will
resend this patch and that one.

> would make the callers a bit cleaner.  We also wouldn't punish "proper"
> implementations with the extra test, and the indirection ought to
> provide more incentive to "improper" implementations to get on it. :)
>
> I haven't looked very closely at the actual interface yet.  Most of my
> day back at work was going through e-mail, and going to buy a new
> keyboard with a "D" key that isn't all worn out. :)

Sure, no problem. Do you think you'll have time to look at it in the
next day or two though?

Allow me to put in a good word for the 

Re: [Mesa-dev] [PATCH 1/4] vbo: create a new draw function interface for indirect draws

2016-01-04 Thread Ilia Mirkin
[adding Ian and Ken, who may or may not be interested]

I'd like to push this out in the next couple of days (along with my
ARB_indirect_parameters series), but I'd really like to get an ack
from someone on the Intel team that such an interface would be
acceptable to them. You can see the full series at
https://github.com/imirkin/mesa/commits/tmp4 (there have been a few
very minor updates).

Thanks,

  -ilia

On Fri, Jan 1, 2016 at 1:20 PM, Ilia Mirkin  wrote:
> Thanks! I'd also like to get an ack from someone on the Intel crew as
> well to make sure this interface will work for them as well.
>
> On Fri, Jan 1, 2016 at 6:49 AM, Marek Olšák  wrote:
>> Acked-by: Marek Olšák 
>>
>> On Thu, Dec 31, 2015 at 8:55 PM, Ilia Mirkin  wrote:
>>> This is optional for now in the transition period, but optimally all
>>> backends that support indirect draws would switch over to it and we can
>>> remove the support for indirect in the "regular" draw function.
>>>
>>> This should allow a backend to properly support ARB_multi_draw_indirect
>>> and ARB_indirect_parameters.
>>>
>>> Signed-off-by: Ilia Mirkin 
>>> ---
>>>  src/mesa/vbo/vbo.h|  15 
>>>  src/mesa/vbo/vbo_context.c|   7 ++
>>>  src/mesa/vbo/vbo_context.h|   6 ++
>>>  src/mesa/vbo/vbo_exec_array.c | 166 
>>> +-
>>>  4 files changed, 127 insertions(+), 67 deletions(-)
>>>
>>> diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h
>>> index cef3b8c..0c63bf3 100644
>>> --- a/src/mesa/vbo/vbo.h
>>> +++ b/src/mesa/vbo/vbo.h
>>> @@ -110,6 +110,18 @@ typedef void (*vbo_draw_func)( struct gl_context *ctx,
>>>struct gl_buffer_object *indirect);
>>>
>>>
>>> +typedef void (*vbo_indirect_draw_func)(
>>> +   struct gl_context *ctx,
>>> +   GLuint mode,
>>> +   struct gl_buffer_object *indirect_data,
>>> +   GLsizeiptr indirect_offset,
>>> +   unsigned draw_count,
>>> +   unsigned stride,
>>> +   struct gl_buffer_object *indirect_params,
>>> +   GLsizeiptr indirect_params_offset,
>>> +   const struct _mesa_index_buffer *ib);
>>> +
>>> +
>>>
>>>
>>>  /* Utility function to cope with various constraints on tnl modules or
>>> @@ -179,6 +191,9 @@ void vbo_always_unmap_buffers(struct gl_context *ctx);
>>>
>>>  void vbo_set_draw_func(struct gl_context *ctx, vbo_draw_func func);
>>>
>>> +void vbo_set_indirect_draw_func(struct gl_context *ctx,
>>> +vbo_indirect_draw_func func);
>>> +
>>>  void vbo_check_buffers_are_unmapped(struct gl_context *ctx);
>>>
>>>  void vbo_bind_arrays(struct gl_context *ctx);
>>> diff --git a/src/mesa/vbo/vbo_context.c b/src/mesa/vbo/vbo_context.c
>>> index 5e1a760..8a196e7 100644
>>> --- a/src/mesa/vbo/vbo_context.c
>>> +++ b/src/mesa/vbo/vbo_context.c
>>> @@ -223,3 +223,10 @@ void vbo_set_draw_func(struct gl_context *ctx, 
>>> vbo_draw_func func)
>>> vbo->draw_prims = func;
>>>  }
>>>
>>> +
>>> +void vbo_set_indirect_draw_func(struct gl_context *ctx,
>>> +vbo_indirect_draw_func func)
>>> +{
>>> +   struct vbo_context *vbo = vbo_context(ctx);
>>> +   vbo->draw_indirect_prims = func;
>>> +}
>>> diff --git a/src/mesa/vbo/vbo_context.h b/src/mesa/vbo/vbo_context.h
>>> index 6293a8b..11f9b17 100644
>>> --- a/src/mesa/vbo/vbo_context.h
>>> +++ b/src/mesa/vbo/vbo_context.h
>>> @@ -76,6 +76,12 @@ struct vbo_context {
>>>  * is responsible for initiating any fallback actions required:
>>>  */
>>> vbo_draw_func draw_prims;
>>> +
>>> +   /* Optional callback for indirect draws. This allows multidraws to not 
>>> be
>>> +* broken up, as well as for the actual count to be passed in as a 
>>> separate
>>> +* indirect parameter.
>>> +*/
>>> +   vbo_indirect_draw_func draw_indirect_prims;
>>>  };
>>>
>>>
>>> diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
>>> index 502b288..3da2d19 100644
>>> --- a/src/mesa/vbo/vbo_exec_array.c
>>> +++ b/src/mesa/vbo/vbo_exec_array.c
>>> @@ -1550,23 +1550,30 @@ vbo_validated_drawarraysindirect(struct gl_context 
>>> *ctx,
>>>
>>> vbo_bind_arrays(ctx);
>>>
>>> -   memset(prim, 0, sizeof(prim));
>>> -   prim[0].begin = 1;
>>> -   prim[0].end = 1;
>>> -   prim[0].mode = mode;
>>> -   prim[0].is_indirect = 1;
>>> -   prim[0].indirect_offset = (GLsizeiptr)indirect;
>>> -
>>> -   /* NOTE: We do NOT want to handle primitive restart here, nor perform 
>>> any
>>> -* other checks that require knowledge of the values in the command 
>>> buffer.
>>> -* That would defeat the whole purpose of this function.
>>> -*/
>>> +   if (vbo->draw_indirect_prims) {
>>> +  vbo->draw_indirect_prims(ctx, mode,
>>> +   ctx->DrawIndirectBuffer, 
>>> (GLsizeiptr)indirect,
>>> +   1 /* primcount */, 0 /* stride */,
>>> +   NULL, 0, NULL);
>>> +   } else {

Re: [Mesa-dev] [PATCH 1/6] gallium: document PK2H/UP2H

2016-01-04 Thread Ian Romanick
On 01/03/2016 10:02 AM, Ilia Mirkin wrote:
> On Sun, Jan 3, 2016 at 12:33 PM, Roland Scheidegger  
> wrote:
>> Am 03.01.2016 um 01:37 schrieb Ilia Mirkin:
>>> Signed-off-by: Ilia Mirkin 
>>> ---
>>>  src/gallium/docs/source/tgsi.rst | 10 --
>>>  1 file changed, 8 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/src/gallium/docs/source/tgsi.rst 
>>> b/src/gallium/docs/source/tgsi.rst
>>> index 955ece8..f69998f 100644
>>> --- a/src/gallium/docs/source/tgsi.rst
>>> +++ b/src/gallium/docs/source/tgsi.rst
>>> @@ -458,7 +458,9 @@ while DDY is allowed to be the same for the entire 2x2 
>>> quad.
>>>
>>>  .. opcode:: PK2H - Pack Two 16-bit Floats
>>>
>>> -  TBD
>>> +.. math::
>>> +
>>> +  dst.x = f32\_to\_f16(src.x) | f32\_to\_f16(src.y) << 16
>> This doesn't quite match the tgsi info description (which says that the
>> result is
>> replicated). If you don't want channel replication probably should make
>> that CHAN
>> there instead.
> 
> I'll add the replication to the docs. Looks like NV_fragment_program
> also wanted this:
> 
>   tmp0 = VectorLoad(op0);
>   /* result obtained by combining raw bits of tmp0.x, tmp0.y */
>   result.x = RawBits(tmp0.x) | (RawBits(tmp0.y) << 16);
>   result.y = RawBits(tmp0.x) | (RawBits(tmp0.y) << 16);
>   result.z = RawBits(tmp0.x) | (RawBits(tmp0.y) << 16);
>   result.w = RawBits(tmp0.x) | (RawBits(tmp0.y) << 16);
> 
> But looks like it's just packing, not actually converting. And it's
> unclear whether UP2H is converting or not... let's assume that they do
> the conversions or else this is going to be useless.

I thought that NV_fragment_program had fp16 as a native type.  You did
something like suffix the instruction with H.  In that case, the pack
instruction wouldn't need to convert.  Right?

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [shader-db PATCH 1/5] run: create debug contexts

2016-01-04 Thread Tom Stellard
On Wed, Dec 30, 2015 at 09:32:38PM -0500, Nicolai Hähnle wrote:
> For Gallium-based drivers, this is required for receiving shader information
> via debug messages.

Patches 2-5 are

Acked-by: Tom Stellard 

> ---
>  run.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/run.c b/run.c
> index 82d8c91..685f830 100644
> --- a/run.c
> +++ b/run.c
> @@ -435,6 +435,7 @@ main(int argc, char **argv)
>  EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT_KHR,
>  EGL_CONTEXT_MAJOR_VERSION_KHR, 3,
>  EGL_CONTEXT_MINOR_VERSION_KHR, 2,
> +EGL_CONTEXT_FLAGS_KHR, EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR,
>  EGL_NONE
>  };
>  EGLContext core_ctx = eglCreateContext(egl_dpy, cfg, EGL_NO_CONTEXT,
> -- 
> 2.5.0
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [shader-db PATCH 1/5] run: create debug contexts

2016-01-04 Thread Ian Romanick
On 12/31/2015 09:36 AM, Matt Turner wrote:
> On Thu, Dec 31, 2015 at 12:25 PM, Ilia Mirkin  wrote:
>> On Thu, Dec 31, 2015 at 12:17 PM, Matt Turner  wrote:
>>> On Thu, Dec 31, 2015 at 9:45 AM, Ilia Mirkin  wrote:
 On Dec 31, 2015 9:32 AM, "Matt Turner"  wrote:
>
> On Wed, Dec 30, 2015 at 9:32 PM, Nicolai Hähnle 
> wrote:
>> For Gallium-based drivers, this is required for receiving shader
>> information
>> via debug messages.
>> ---
>
> But that's a Gallium bug, right?
>
> Ilia sent a similar patch, and in the discussion I think we decided
> that what run.c is doing is perfectly fine.

 It's not a bug at all. Behavioural difference from i965 though. I was happy
 to let the matter drop as I was the sole user of it. Sounds like there are
 going to be more now, so this has to be addressed one way or the other.
>>>
>>> Could you summarize what that difference is?
>>
>> Sure. i965 always generates messages to be sent to KHR_debug
>> (potentially restricted by some env vars as well). st/mesa only passes
>> the callback info through for GL debug contexts. The drivers, in turn,
>> can use the presence/absence of the callback as an indicator as to
>> whether to perform extra work. Mildly relatedly, currently GL context
>> flags are not made available in pipe contexts.
> 
> Okay, I checked the spec again:
> 
> Debug output functionality is controlled with the DEBUG_OUTPUT enable
> state. If the context is created with the CONTEXT_FLAG_DEBUG_BIT set then
> the initial state of DEBUG_OUTPUT is TRUE, otherwise the initial state of
> DEBUG_OUTPUT is FALSE. In a debug context, if DEBUG_OUTPUT is disabled the
> GL will not generate any debug output logs or callbacks. Enabling
> DEBUG_OUTPUT again will enable full debug output functionality. If the
> context was created without the CONTEXT_FLAG_DEBUG_BIT and the
> DEBUG_OUTPUT is later enabled, the level of debug output logging is
> defined by the GL implementation, which may have zero debug output. To
> guarantee the full debug output support of the GL implementation the
> context should be created with CONTEXT_FLAG_DEBUG_BIT context flag bit
> set.
> 
> I was somewhat surprised to see that it is implementation-defined
> whether *any* debugging information gets out if a debug context is not
> created.
> 
> I'd rather that the "implementation" be "Mesa" and not "i965" or "Gallium".
> 
> Ian, Timothy, others: Are other people okay with changing core-Mesa to
> not emit any debug messages if not in a debug context?
> 
> This would mean that, even if the program has called
> glEnable(GL_DEBUG_OUTPUT), if not in a debug context there are no
> debug messages. This is apparently the route Gallium is taking.

Are there no messages, or are there just no messages of certain types?
Does _mesa_error still generate a message?  Unless there's a compelling
performance case somewhere, I see much benefit in not generating the
same (or nearly same) messages always.  It sure makes it easier to debug
application problems. :)

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965: quieten compiler warning about out-of-bounds access

2016-01-04 Thread Ilia Mirkin
gcc 4.9.3 shows the following error:

brw_vue_map.c:260:20: warning: array subscript is above array bounds
[-Warray-bounds]
return brw_names[slot - VARYING_SLOT_MAX];

This is because BRW_VARYING_SLOT_COUNT is a valid value for the enum
type. Adding an assert will generate no additional code but will teach
the compiler to not complain.

Signed-off-by: Ilia Mirkin 
---
 src/mesa/drivers/dri/i965/brw_vue_map.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vue_map.c 
b/src/mesa/drivers/dri/i965/brw_vue_map.c
index 09eadbc..fea2436 100644
--- a/src/mesa/drivers/dri/i965/brw_vue_map.c
+++ b/src/mesa/drivers/dri/i965/brw_vue_map.c
@@ -257,6 +257,7 @@ varying_name(brw_varying_slot slot)
   [BRW_VARYING_SLOT_PNTC - VARYING_SLOT_MAX] = "BRW_VARYING_SLOT_PNTC",
};
 
+   assert(slot < BRW_VARYING_SLOT_COUNT);
return brw_names[slot - VARYING_SLOT_MAX];
 }
 
-- 
2.4.10

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] ipers performance regression (Was: u_upload_mgr changes)

2016-01-04 Thread Jose Fonseca

On 21/12/15 22:35, Marek Olšák wrote:

Hi,

This patch series adds more flexibility to u_upload_mgr. First, it adds the 
ability to specify the alignment per suballocation. The idea is that several 
users can use the same upload buffer, but each may need a different alignment. 
Finally, it allows specifying PIPE_USAGE, which usually affects memory 
placement.

Please review.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev



Hi Marek,

This patch series, or the commit


http://cgit.freedesktop.org/mesa/mesa/commit/?id=36c93a6fae275614b6004ec5ab085774d527e1bc

which was pushed around the same time, caused ipers framerate to drop on 
llvmpipe from 12.8 to 6.8 FPS as seen in attached screenshot.



llvmpipe tends to be more more rasterization bound than most hw drivers, 
which makes this even stranger.


I think there is a bug here somewhere -- maybe these changes are causing 
more flushes than before.


Jose
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/8] tgsi: add ureg support for image decls

2016-01-04 Thread Marek Olšák
For the series:

Reviewed-by: Marek Olšák 

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] ipers performance regression (Was: u_upload_mgr changes)

2016-01-04 Thread Marek Olšák
On Mon, Jan 4, 2016 at 2:09 PM, Jose Fonseca  wrote:
> On 04/01/16 12:25, Jose Fonseca wrote:
>>
>> On 21/12/15 22:35, Marek Olšák wrote:
>>>
>>> Hi,
>>>
>>> This patch series adds more flexibility to u_upload_mgr. First, it
>>> adds the ability to specify the alignment per suballocation. The idea
>>> is that several users can use the same upload buffer, but each may
>>> need a different alignment. Finally, it allows specifying PIPE_USAGE,
>>> which usually affects memory placement.
>>>
>>> Please review.
>>>
>>> Marek
>>> ___
>>> mesa-dev mailing list
>>> mesa-dev@lists.freedesktop.org
>>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>>
>>
>> Hi Marek,
>>
>> This patch series, or the commit
>>
>>
>>
>> http://cgit.freedesktop.org/mesa/mesa/commit/?id=36c93a6fae275614b6004ec5ab085774d527e1bc
>
>
> I bisected and it turns out that 36c93a6fae275614b6004ec5ab085774d527e1bc is
> the bad commit, and not the u_upload_mgr series.
>
> In particular the issue steams from the hunk:
> +
> +   /* We uploaded modified constants, need to invalidate them. */
> +   st->dirty.mesa |= _NEW_PROGRAM_CONSTANTS;
>
> I suspect that drawing the text via glBitmap suddently became a huge hotspot
> becuase of this.  If one disables the help text (pressing h) there's no
> performance regression (frame rate is the same).
>
>
>
> The need for the constants is explained above:
>
>   /* As an optimization, Mesa's fragment programs will sometimes get the
>   * primary color from a statevar/constant rather than a varying
> variable.
>   * when that's the case, we need to ensure that we use the 'color'
>   * parameter and not the current attribute color (which may have
> changed
>   * through glRasterPos and state validation.
>   * So, we force the proper color here.  Not elegant, but it works.
>   */
>  {
> GLfloat colorSave[4];
> COPY_4V(colorSave, ctx->Current.Attrib[VERT_ATTRIB_COLOR0]);
> COPY_4V(ctx->Current.Attrib[VERT_ATTRIB_COLOR0], color);
> st_upload_constants(st, st->fp->Base.Base.Parameters,
> PIPE_SHADER_FRAGMENT);
> COPY_4V(ctx->Current.Attrib[VERT_ATTRIB_COLOR0], colorSave);
>  }
>
> I wonder if putting colors in constants as opposed to vertex buffer with
> zero stride or an INT_MAX instancing divisor is really a good idea.

It's a good idea from the GPU perspective.

Using a vertex buffer is:
- one load instruction per thread in vertex shaders (64 per thread
group, though the cache should hide 63 of them in theory)
- the vertex shader output must be written (64 per thread group), so
on-chip memory must be allocated, which reduces parallelism just like
register usage reduces parallelism
- the pixel shader must load and interpolate the input (interpolation
= 3 on-chip vec4 loads per thread, 2 MAD instructions per thread, all
multiplied by 64)

Using a constant buffer is:
- 1 load instruction per entire pixel shader thread group on GCN,
because the constant load executes on a separate scalar unit and not
on SIMD units. This is best for performance and low power.


> Anyway, if this only affects llvmpipe, no biggie, but if this change makes
> text rendering via glBitmap much slower for all HW drivers, then we should
> probably revisit this.

IIRC, ipers is CPU-bound, so any GPU improvement won't be seen here,
but any additional overhead in st/mesa will hurt. Also, do we have
know any real apps using glBitmap?

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/8] tgsi: provide a way to encode memory qualifiers for SSBO

2016-01-04 Thread Marek Olšák
On Sun, Jan 3, 2016 at 5:37 AM, Ilia Mirkin  wrote:
> Each load/store on most hardware can specify what caching to do. Since
> SSBO allows individual variables to also have separate caching modes,
> allow loads/stores to have the qualifiers instead of attempting to
> encode them in declarations.
>
> Signed-off-by: Ilia Mirkin 
> ---
>  src/gallium/auxiliary/tgsi/tgsi_build.c| 50 +++-
>  src/gallium/auxiliary/tgsi/tgsi_dump.c | 10 ++
>  src/gallium/auxiliary/tgsi/tgsi_parse.c|  4 +++
>  src/gallium/auxiliary/tgsi/tgsi_parse.h|  1 +
>  src/gallium/auxiliary/tgsi/tgsi_strings.c  |  7 
>  src/gallium/auxiliary/tgsi/tgsi_strings.h  |  2 ++
>  src/gallium/auxiliary/tgsi/tgsi_text.c | 27 +++
>  src/gallium/auxiliary/tgsi/tgsi_ureg.c | 53 
> ++
>  src/gallium/auxiliary/tgsi/tgsi_ureg.h | 13 
>  src/gallium/include/pipe/p_shader_tokens.h | 16 -
>  10 files changed, 181 insertions(+), 2 deletions(-)
>
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c 
> b/src/gallium/auxiliary/tgsi/tgsi_build.c
> index bb9d0cb..ea20746 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_build.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
> @@ -620,7 +620,8 @@ tgsi_default_instruction( void )
> instruction.NumSrcRegs = 1;
> instruction.Label = 0;
> instruction.Texture = 0;
> -   instruction.Padding  = 0;
> +   instruction.Memory = 0;
> +   instruction.Padding = 0;
>
> return instruction;
>  }
> @@ -766,6 +767,34 @@ tgsi_build_instruction_texture(
> return instruction_texture;
>  }
>
> +static struct tgsi_instruction_memory
> +tgsi_default_instruction_memory( void )
> +{
> +   struct tgsi_instruction_memory instruction_memory;
> +
> +   instruction_memory.Qualifier = 0;
> +   instruction_memory.Padding = 0;
> +
> +   return instruction_memory;
> +}
> +
> +static struct tgsi_instruction_memory
> +tgsi_build_instruction_memory(
> +   unsigned qualifier,
> +   struct tgsi_token *prev_token,
> +   struct tgsi_instruction *instruction,
> +   struct tgsi_header *header )
> +{
> +   struct tgsi_instruction_memory instruction_memory;
> +
> +   instruction_memory.Qualifier = qualifier;
> +   instruction_memory.Padding = 0;
> +   instruction->Memory = 1;
> +
> +   instruction_grow( instruction, header );
> +
> +   return instruction_memory;
> +}
>
>  static struct tgsi_texture_offset
>  tgsi_default_texture_offset( void )
> @@ -1012,6 +1041,7 @@ tgsi_default_full_instruction( void )
> full_instruction.Predicate = tgsi_default_instruction_predicate();
> full_instruction.Label = tgsi_default_instruction_label();
> full_instruction.Texture = tgsi_default_instruction_texture();
> +   full_instruction.Memory = tgsi_default_instruction_memory();
> for( i = 0;  i < TGSI_FULL_MAX_TEX_OFFSETS; i++ ) {
>full_instruction.TexOffsets[i] = tgsi_default_texture_offset();
> }
> @@ -1123,6 +1153,24 @@ tgsi_build_full_instruction(
>   prev_token = (struct tgsi_token *) texture_offset;
>}
> }
> +
> +   if (full_inst->Instruction.Memory) {
> +  struct tgsi_instruction_memory *instruction_memory;
> +
> +  if( maxsize <= size )
> + return 0;
> +  instruction_memory =
> + (struct  tgsi_instruction_memory *) [size];
> +  size++;
> +
> +  *instruction_memory = tgsi_build_instruction_memory(
> + full_inst->Memory.Qualifier,
> + prev_token,
> + instruction,
> + header );
> +  prev_token = (struct tgsi_token  *) instruction_memory;
> +   }
> +
> for( i = 0;  i <   full_inst->Instruction.NumDstRegs; i++ ) {
>const struct tgsi_full_dst_register *reg = _inst->Dst[i];
>struct tgsi_dst_register *dst_register;
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c 
> b/src/gallium/auxiliary/tgsi/tgsi_dump.c
> index de3aae5..2ad29b9 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
> @@ -624,6 +624,16 @@ iter_instruction(
>}
> }
>
> +   if (inst->Instruction.Memory) {
> +  uint32_t qualifier = inst->Memory.Qualifier;
> +  while (qualifier) {
> + int bit = ffs(qualifier) - 1;
> + qualifier &= ~(1U << bit);
> + TXT(", ");
> + ENM(bit, tgsi_memory_names);
> +  }
> +   }
> +
> switch (inst->Instruction.Opcode) {
> case TGSI_OPCODE_IF:
> case TGSI_OPCODE_UIF:
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c 
> b/src/gallium/auxiliary/tgsi/tgsi_parse.c
> index 9a52bbb..ae95ebd 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_parse.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
> @@ -195,6 +195,10 @@ tgsi_parse_token(
>   }
>}
>
> +  if (inst->Instruction.Memory) {
> + next_token(ctx, >Memory);
> +  }
> +
>assert( inst->Instruction.NumDstRegs <= TGSI_FULL_MAX_DST_REGISTERS );
>
>for (i = 0; i < 

[Mesa-dev] [GSoC] RFC / Suggestions for project

2016-01-04 Thread Thomas Helland
Hi everyone, and Happy New Year!

As most of you are probably aware I participated in GSoC last summer.
(Loop analysis and range propagation for NIR).
This was a very valuable and fun experience,
and something I'd like to have another shot at.

So why am I writing in this early? Last year I was a bit late to the party,
but this year I want to be on top of things and well prepared.
I was hoping that I could spend some time alongside my masters thesis
identifying good candidates for tasks for GSoC, and doing
some work on confirming that the task will provide a decent win.

My code for last years GSoC has not yet landed upstream.
Partly because I have not had the time to pick it up yet,
partly because the range propagation pass was mind-bending
and hard to review, and partly because there was no major wins/benefits.
That's why I want to do some initial investigation, to ensure the
work will be fruitfull and benefitial, and not end up bitortting.
I'm hoping to find time to clean up last years code, adopt to the cursor sutff,
and resend as it seems it might be helpfull at least for Eric.
(Possibly also others that use NIR and have hardware without free saturates)

So what are my ideas for GSoC?
While I haven't got any specific ideas, I've got some thoughts
on what I want a potential GSoC project to result in.
If possible, I'd like to work on code that benefts all backends.
Maybe implementing an extension or something?
Lowering GLSL overhead or memory consumption?
Unifying some TGSI and mesa-main stuff?
(I recall some discussion about unifying format handling?)

I could offcourse continue where I left of last year,
however I'm not sure if this is time well spent?
Last years implementation indicates that
the "return on investment" might not be adequate?
My induction variable detection couldn't do better than the one in GLSL,
and the range propagation didn't really give much advantage.
Loops in shaders are simple and far apart it seems, so loop unrolling
in GLSL might be good enough?

I had a look at last years discussion on the mailing list about
possible ideas for GSoC, and it seems a lot of it is under way.
FP64 emulation / softfloat, and SPIR'-V for NIR are the main two
With the current pace I also wouldn't be surprised if most of
the extensions are in progress by May.
Does anyone have any suggestions for things I could investigate?
I expect X.org will be applying for GSoC this year also?

This summer I'll also have access to more hardware;
an optimus laptop (ivy bridge + nvidia), and a desktop with a HD6870,
so both nouveau VC0, i965, and R600 are possible targets.
I might also invest in a new AMD gpu this spring, so radeonsi
might also be a possible target for my work.

I'll be working on computer vision for my masters thesis,
specifically comparing a part to a 3D model to detect
damages, dents, chipped corners, etc (quality control).
C++ will be the language of choice, so my C++ skills
should be greatly improved by the summer,
making me better qualified this year than last year.
(Hopefully my lack of C experience last year didn't scare you of)

Regards
Thomas Helland
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] ipers performance regression (Was: u_upload_mgr changes)

2016-01-04 Thread Jose Fonseca

On 04/01/16 14:03, Marek Olšák wrote:

On Mon, Jan 4, 2016 at 2:09 PM, Jose Fonseca  wrote:

On 04/01/16 12:25, Jose Fonseca wrote:


On 21/12/15 22:35, Marek Olšák wrote:


Hi,

This patch series adds more flexibility to u_upload_mgr. First, it
adds the ability to specify the alignment per suballocation. The idea
is that several users can use the same upload buffer, but each may
need a different alignment. Finally, it allows specifying PIPE_USAGE,
which usually affects memory placement.

Please review.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev



Hi Marek,

This patch series, or the commit



http://cgit.freedesktop.org/mesa/mesa/commit/?id=36c93a6fae275614b6004ec5ab085774d527e1bc



I bisected and it turns out that 36c93a6fae275614b6004ec5ab085774d527e1bc is
the bad commit, and not the u_upload_mgr series.

In particular the issue steams from the hunk:
+
+   /* We uploaded modified constants, need to invalidate them. */
+   st->dirty.mesa |= _NEW_PROGRAM_CONSTANTS;

I suspect that drawing the text via glBitmap suddently became a huge hotspot
becuase of this.  If one disables the help text (pressing h) there's no
performance regression (frame rate is the same).



The need for the constants is explained above:

   /* As an optimization, Mesa's fragment programs will sometimes get the
   * primary color from a statevar/constant rather than a varying
variable.
   * when that's the case, we need to ensure that we use the 'color'
   * parameter and not the current attribute color (which may have
changed
   * through glRasterPos and state validation.
   * So, we force the proper color here.  Not elegant, but it works.
   */
  {
 GLfloat colorSave[4];
 COPY_4V(colorSave, ctx->Current.Attrib[VERT_ATTRIB_COLOR0]);
 COPY_4V(ctx->Current.Attrib[VERT_ATTRIB_COLOR0], color);
 st_upload_constants(st, st->fp->Base.Base.Parameters,
 PIPE_SHADER_FRAGMENT);
 COPY_4V(ctx->Current.Attrib[VERT_ATTRIB_COLOR0], colorSave);
  }

I wonder if putting colors in constants as opposed to vertex buffer with
zero stride or an INT_MAX instancing divisor is really a good idea.


It's a good idea from the GPU perspective.

Using a vertex buffer is:
- one load instruction per thread in vertex shaders (64 per thread
group, though the cache should hide 63 of them in theory)
- the vertex shader output must be written (64 per thread group), so
on-chip memory must be allocated, which reduces parallelism just like
register usage reduces parallelism
- the pixel shader must load and interpolate the input (interpolation
= 3 on-chip vec4 loads per thread, 2 MAD instructions per thread, all
multiplied by 64)

Using a constant buffer is:
- 1 load instruction per entire pixel shader thread group on GCN,
because the constant load executes on a separate scalar unit and not
on SIMD units. This is best for performance and low power.


Ok. So maybe the solution is to avoid touching the constants unless 
absolutely necessary (ie, they actually changed and/or will be actually 
needed.)





Anyway, if this only affects llvmpipe, no biggie, but if this change makes
text rendering via glBitmap much slower for all HW drivers, then we should
probably revisit this.


IIRC, ipers is CPU-bound, so any GPU improvement won't be seen here,
but any additional overhead in st/mesa will hurt.


Note the issue seems to be isolated to drawing text.  So maybe if we 
disable all but the text it might better allow to zoom in on this issue.


(That is, I think I noticed this only on ipers because it is one of the 
few demos I bench continously that has a help dialog enabled by default.)


> Also, do we have

know any real apps using glBitmap?


Yes, Brian Paul came across real world apps that use glBitmap for text 
drawing or thin wrappers around them like wglUseFontBitmaps/glXUseXFont, 
and IIRC performance did matter.


Jose
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/6] i965/gen6-7: Implement stall and flushes required prior to switching pipelines.

2016-01-04 Thread Kenneth Graunke
On Sunday, January 3, 2016 3:41:08 PM PST Francisco Jerez wrote:
> Kenneth Graunke  writes:
> 
> > On Saturday, January 2, 2016 10:48:02 PM PST Francisco Jerez wrote:
> >> Switching the current pipeline while it's not completely idle or the
> >> read and write caches aren't flushed can lead to corruption.  Fixes
> >> misrendering of at least the following Khronos CTS test:
> >> 
> >>  ES31-CTS.shader_image_load_store.basic-allTargets-store-fs
> >> 
> >> The stall and flushes are no longer required on Gen8+.
> >> 
> >> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93323
> >> ---
> >>  src/mesa/drivers/dri/i965/brw_misc_state.c | 28 
+++
> > +
> >>  1 file changed, 28 insertions(+)
> >> 
> >> diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/
drivers/
> > dri/i965/brw_misc_state.c
> >> index 7d53d18..75540c1 100644
> >> --- a/src/mesa/drivers/dri/i965/brw_misc_state.c
> >> +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
> >> @@ -886,6 +886,34 @@ brw_emit_select_pipeline(struct brw_context *brw, 
enum 
> > brw_pipeline pipeline)
> >>  
> >>   brw->ctx.NewDriverState |= BRW_NEW_CC_STATE;
> >>}
> >> +
> >> +   } else if (brw->gen >= 6) {
> >> +  /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
> >> +   * PIPELINE_SELECT [DevBWR+]":
> >
> > Can we cite the public docs?
> >
> 
> The public docs for PIPELINE_SELECT seemed rather inaccurate.  The IVB
> version I have in front of me right now is missing this one workaround,
> and the BDW version mentions it incorrectly.  Sigh...
> 
> >> +   *
> >> +   *   Project: DEVSNB+
> >> +   *
> >> +   *   Software must ensure all the write caches are flushed through 
a
> >> +   *   stalling PIPE_CONTROL command followed by another 
PIPE_CONTROL
> >> +   *   command to invalidate read only caches prior to programming
> >> +   *   MI_PIPELINE_SELECT command to change the Pipeline Select 
Mode.
> >> +   */
> >> +  const unsigned dc_flush =
> >> + brw->gen >= 7 ? PIPE_CONTROL_DATA_CACHE_INVALIDATE : 0;
> >
> > I was going to suggest doing a brw_emit_post_sync_nonzero_flush first
> > on Sandybridge, but I forgot that we now just emit that at the start
> > of every state upload.  Fairly moot anyway since we don't do GPGPU on
> > Sandybridge anyway.
> >
> Hmm, that sounds very sensible to me, it would be rather fragile for
> this function to rely on a flush with post-sync op having been done
> previously, even if at this point this will only be called once at
> context creation on SNB -- Although for the same reason it seems rather
> fragile for brw_emit_pipe_control_flush() to assume that the workaround
> has been applied already.  I'd be inclined to change
> brw_emit_pipe_control_flush() to emit the post-sync op when needed on
> SNB just like we do for other PIPE_CONTROL workarounds on Gen7 and Gen8.

Yeah, that's probably a better idea - these PIPE_CONTROL helpers didn't
exist back in the day.  I originally moved the SNB workaround to the
start of every draw operation because we were forgetting it in various
places, and it was really fragile.   Putting it in the pipe control
emitter functions would also solve the "I forgot it" problem.

That said, what we have seems to work...so...*shrug*... 

> 
> >> +
> >> +  brw_emit_pipe_control_flush(brw,
> >> +  PIPE_CONTROL_RENDER_TARGET_FLUSH |
> >> +  PIPE_CONTROL_DEPTH_CACHE_FLUSH |
> >> +  dc_flush |
> >> +  PIPE_CONTROL_NO_WRITE |
> >> +  PIPE_CONTROL_CS_STALL);
> >
> > Why RENDER_TARGET_FLUSH, DEPTH_CACHE_FLUSH, DATA_CACHE_INVALIDATE,
> > and NO_WRITE?  The cited workaround explains a CS Stall and the RO
> > invalidations below, but I'm not seeing why the others are needed.
> >
> It also says that "software must ensure all the write caches are
> flushed".

Oh, somehow I misread that as basically "Software must ensure that
 by doing a stalling PIPE_CONTROL."  But that alone
definitely doesn't flush write caches.  Thanks, that makes total sense.

Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/5] st/mesa: expose ARB_indirect_parameters when the backend driver allows

2016-01-04 Thread Marek Olšák
Patches 1-4:

Reviewed-by: Marek Olšák 

Marek

On Sat, Jan 2, 2016 at 9:38 PM, Ilia Mirkin  wrote:
> Signed-off-by: Ilia Mirkin 
> ---
>  src/mesa/state_tracker/st_cb_bufferobjects.c | 1 +
>  src/mesa/state_tracker/st_extensions.c   | 1 +
>  2 files changed, 2 insertions(+)
>
> diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c 
> b/src/mesa/state_tracker/st_cb_bufferobjects.c
> index 5d20b26..e775453 100644
> --- a/src/mesa/state_tracker/st_cb_bufferobjects.c
> +++ b/src/mesa/state_tracker/st_cb_bufferobjects.c
> @@ -230,6 +230,7 @@ st_bufferobj_data(struct gl_context *ctx,
>bind = PIPE_BIND_CONSTANT_BUFFER;
>break;
> case GL_DRAW_INDIRECT_BUFFER:
> +   case GL_PARAMETER_BUFFER_ARB:
>bind = PIPE_BIND_COMMAND_ARGS_BUFFER;
>break;
> default:
> diff --git a/src/mesa/state_tracker/st_extensions.c 
> b/src/mesa/state_tracker/st_extensions.c
> index 90eb677..3c198ec 100644
> --- a/src/mesa/state_tracker/st_extensions.c
> +++ b/src/mesa/state_tracker/st_extensions.c
> @@ -452,6 +452,7 @@ void st_init_extensions(struct pipe_screen *screen,
>{ o(ARB_draw_instanced),   PIPE_CAP_TGSI_INSTANCEID
>   },
>{ o(ARB_fragment_program_shadow),  PIPE_CAP_TEXTURE_SHADOW_MAP 
>   },
>{ o(ARB_framebuffer_object),   
> PIPE_CAP_MIXED_FRAMEBUFFER_SIZES  },
> +  { o(ARB_indirect_parameters),  
> PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS   },
>{ o(ARB_instanced_arrays), 
> PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR  },
>{ o(ARB_occlusion_query),  PIPE_CAP_OCCLUSION_QUERY
>   },
>{ o(ARB_occlusion_query2), PIPE_CAP_OCCLUSION_QUERY
>   },
> --
> 2.4.10
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] i965/wm: use proper API buffer size for the surfaces.

2016-01-04 Thread Ilia Mirkin
Buffer range is a property of the binding, not the buffer. What if one
buffer is bound twice with diff ranges to diff bindings?

Also please don't make core changes in commits with a driver prefix... I
nearly missed this one because it said i965.
On Dec 15, 2015 6:52 AM, "Iago Toral Quiroga"  wrote:

> From: Samuel Iglesias Gonsálvez 
>
> Commit 5bb5eeea fix a bug indicating that the surfaces should have the
> API buffer size. Hovewer it picked the wrong value.
>
> This patch adds a new variable, which takes into account
> glBindBufferRange() values. Fixes the following regressions:
>
>
> ES31-CTS.shader_storage_buffer_object.advanced-unsizedArrayLength-cs-std430-vec-bindrangeOffset
>
> ES31-CTS.shader_storage_buffer_object.advanced-unsizedArrayLength-cs-std430-vec-bindrangeSize
>
> Signed-off-by: Samuel Iglesias Gonsálvez 
> ---
>  src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 12 
>  src/mesa/drivers/dri/i965/intel_buffer_objects.c |  1 +
>  src/mesa/main/bufferobj.c|  4 +++-
>  src/mesa/main/mtypes.h   |  1 +
>  4 files changed, 13 insertions(+), 5 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> index c4ebbf3..9072860 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> @@ -949,12 +949,14 @@ brw_upload_ubo_surfaces(struct brw_context *brw,
>} else {
>   struct intel_buffer_object *intel_bo =
>  intel_buffer_object(binding->BufferObject);
> + GLsizeiptrARB size = MIN2(binding->BufferObject->BufferRangeSize,
> +   binding->BufferObject->Size -
> binding->Offset);
>   drm_intel_bo *bo =
>  intel_bufferobj_buffer(brw, intel_bo,
> binding->Offset,
> -   binding->BufferObject->Size -
> binding->Offset);
> +   size);
>   brw_create_constant_surface(brw, bo, binding->Offset,
> - binding->BufferObject->Size -
> binding->Offset,
> + size,
>   _surf_offsets[i]);
>}
> }
> @@ -971,12 +973,14 @@ brw_upload_ubo_surfaces(struct brw_context *brw,
>} else {
>   struct intel_buffer_object *intel_bo =
>  intel_buffer_object(binding->BufferObject);
> + GLsizeiptrARB size = MIN2(binding->BufferObject->BufferRangeSize,
> +   binding->BufferObject->Size -
> binding->Offset);
>   drm_intel_bo *bo =
>  intel_bufferobj_buffer(brw, intel_bo,
> binding->Offset,
> -   binding->BufferObject->Size -
> binding->Offset);
> +   size);
>   brw_create_buffer_surface(brw, bo, binding->Offset,
> -   binding->BufferObject->Size -
> binding->Offset,
> +   size,
> _surf_offsets[i]);
>}
> }
> diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c
> b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
> index 7a5b3fc..b26c939 100644
> --- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c
> +++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
> @@ -198,6 +198,7 @@ brw_buffer_data(struct gl_context *ctx,
> (void) target;
>
> intel_obj->Base.Size = size;
> +   intel_obj->Base.BufferRangeSize = size;
> intel_obj->Base.Usage = usage;
> intel_obj->Base.StorageFlags = storageFlags;
>
> diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
> index e0639c8..7f42673 100644
> --- a/src/mesa/main/bufferobj.c
> +++ b/src/mesa/main/bufferobj.c
> @@ -43,7 +43,7 @@
>  #include "glformats.h"
>  #include "texstore.h"
>  #include "transformfeedback.h"
> -
> +#include "macros.h"
>
>  /* Debug flags */
>  /*#define VBO_DEBUG*/
> @@ -2838,6 +2838,7 @@ bind_buffer_range_uniform_buffer(struct gl_context
> *ctx,
>
> _mesa_reference_buffer_object(ctx, >UniformBuffer, bufObj);
> bind_uniform_buffer(ctx, index, bufObj, offset, size, GL_FALSE);
> +   bufObj->BufferRangeSize = MIN2(size, bufObj->Size);
>  }
>
>  /**
> @@ -2873,6 +2874,7 @@ bind_buffer_range_shader_storage_buffer(struct
> gl_context *ctx,
>
> _mesa_reference_buffer_object(ctx, >ShaderStorageBuffer, bufObj);
> bind_shader_storage_buffer(ctx, index, bufObj, offset, size, GL_FALSE);
> +   bufObj->BufferRangeSize = MIN2(size, bufObj->Size);
>  }
>
>  /**
> diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
> index acacae0..91f9f5e 100644
> --- a/src/mesa/main/mtypes.h
> +++ b/src/mesa/main/mtypes.h
> @@ -1268,6 +1268,7 @@ struct gl_buffer_object
> GLenum 

Re: [Mesa-dev] ipers performance regression (Was: u_upload_mgr changes)

2016-01-04 Thread Jose Fonseca

On 04/01/16 12:25, Jose Fonseca wrote:

On 21/12/15 22:35, Marek Olšák wrote:

Hi,

This patch series adds more flexibility to u_upload_mgr. First, it
adds the ability to specify the alignment per suballocation. The idea
is that several users can use the same upload buffer, but each may
need a different alignment. Finally, it allows specifying PIPE_USAGE,
which usually affects memory placement.

Please review.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev



Hi Marek,

This patch series, or the commit


http://cgit.freedesktop.org/mesa/mesa/commit/?id=36c93a6fae275614b6004ec5ab085774d527e1bc


I bisected and it turns out that 
36c93a6fae275614b6004ec5ab085774d527e1bc is the bad commit, and not the 
u_upload_mgr series.


In particular the issue steams from the hunk:
+
+   /* We uploaded modified constants, need to invalidate them. */
+   st->dirty.mesa |= _NEW_PROGRAM_CONSTANTS;

I suspect that drawing the text via glBitmap suddently became a huge 
hotspot becuase of this.  If one disables the help text (pressing h) 
there's no performance regression (frame rate is the same).




The need for the constants is explained above:

  /* As an optimization, Mesa's fragment programs will sometimes get the
  * primary color from a statevar/constant rather than a varying 
variable.

  * when that's the case, we need to ensure that we use the 'color'
  * parameter and not the current attribute color (which may have 
changed

  * through glRasterPos and state validation.
  * So, we force the proper color here.  Not elegant, but it works.
  */
 {
GLfloat colorSave[4];
COPY_4V(colorSave, ctx->Current.Attrib[VERT_ATTRIB_COLOR0]);
COPY_4V(ctx->Current.Attrib[VERT_ATTRIB_COLOR0], color);
st_upload_constants(st, st->fp->Base.Base.Parameters,
PIPE_SHADER_FRAGMENT);
COPY_4V(ctx->Current.Attrib[VERT_ATTRIB_COLOR0], colorSave);
 }

I wonder if putting colors in constants as opposed to vertex buffer with 
zero stride or an INT_MAX instancing divisor is really a good idea.



Anyway, if this only affects llvmpipe, no biggie, but if this change 
makes text rendering via glBitmap much slower for all HW drivers, then 
we should probably revisit this.



Jose
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] i965/wm: use binding size for ubo/ssbo when automatic size is unset

2016-01-04 Thread Samuel Iglesias Gonsálvez
On Mon, 2016-01-04 at 20:01 -0500, Ilia Mirkin wrote:
> Signed-off-by: Ilia Mirkin 
> Cc: Samuel Iglesias Gonsálvez 
> Cc: Marta Lofstedt 
> ---
> 
> I have not tested whether this fixes your CTS tests (as I don't have
> access
> to them). But it should... and if it doesn't, it'll be a variation of
> the
> logic used here.
> 

It fixes both CTS tests:

ES31-CTS.shader_storage_buffer_object.advanced-unsizedArrayLength-cs-
std430-vec-bindrangeOffset

ES31-CTS.shader_storage_buffer_object.advanced-unsizedArrayLength-cs-
std430-vec-bindrangeSize

This series is:

Reviewed-by: Samuel Iglesias Gonsálvez 

Thanks a lot for fixing this,

Sam

>  src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 14 ++
>  1 file changed, 10 insertions(+), 4 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> index 76dc577..5ab2f7f 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> @@ -946,12 +946,15 @@ brw_upload_ubo_surfaces(struct brw_context
> *brw,
>    } else {
>   struct intel_buffer_object *intel_bo =
>  intel_buffer_object(binding->BufferObject);
> + GLsizeiptr size = binding->BufferObject->Size - binding-
> >Offset;
> + if (!binding->AutomaticSize)
> +size = MIN2(size, binding->Size);
>   drm_intel_bo *bo =
>  intel_bufferobj_buffer(brw, intel_bo,
> binding->Offset,
> -   binding->BufferObject->Size -
> binding->Offset);
> +   size);
>   brw_create_constant_surface(brw, bo, binding->Offset,
> - binding->BufferObject->Size -
> binding->Offset,
> + size,
>   _surf_offsets[i]);
>    }
> }
> @@ -968,12 +971,15 @@ brw_upload_ubo_surfaces(struct brw_context
> *brw,
>    } else {
>   struct intel_buffer_object *intel_bo =
>  intel_buffer_object(binding->BufferObject);
> + GLsizeiptr size = binding->BufferObject->Size - binding-
> >Offset;
> + if (!binding->AutomaticSize)
> +size = MIN2(size, binding->Size);
>   drm_intel_bo *bo =
>  intel_bufferobj_buffer(brw, intel_bo,
> binding->Offset,
> -   binding->BufferObject->Size -
> binding->Offset);
> +   size);
>   brw_create_buffer_surface(brw, bo, binding->Offset,
> -   binding->BufferObject->Size -
> binding->Offset,
> +   size,
> _surf_offsets[i]);
>    }
> }
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 27512] Illegal instruction _mesa_x86_64_transform_points4_general

2016-01-04 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=27512

--- Comment #6 from mhard...@gmail.com ---
I am experiencing a crash with a SIGILL, Illegal instruction in Debian when
using Kodi.

[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
Core was generated by `/usr/lib/x86_64-linux-gnu/kodi/kodi.bin --standalone'.
Program terminated with signal SIGILL, Illegal instruction.
#0  _mesa_x86_64_transform_points4_general () at x86-64/xform4.S:72
72  prefetch 16(%rdx)
[Current thread is 1 (Thread 0x7f9054aeb9c0 (LWP 791))]


Thread 1 (Thread 0x7f9054aeb9c0 (LWP 791)):
#0  _mesa_x86_64_transform_points4_general () at x86-64/xform4.S:72
#1  0x7f902577102d in run_vertex_stage (ctx=0x1ae3248, stage=) at
 tnl/t_vb_vertex.c:160
#2  0x7f902575fc62 in _tnl_run_pipeline (ctx=ctx@entry=0x1ae3248) at
tnl/t_pipel
ine.c:241
#3  0x7f90258f856f in intelRunPipeline (ctx=0x1ae3248) at intel_tris.c:1086
#4  0x7f902575f27c in _tnl_draw_prims (ctx=0x1ae3248, prim=0x1b53938,
nr_prims=1
, ib=0x0, index_bounds_valid=, min_index=0, max_index=7,
tfb_vertcoun
t=0x0, stream=0, indirect=0x0) at tnl/t_draw.c:521
#5  0x7f9025745504 in vbo_exec_vtx_flush (exec=0x1b53158,
keepUnmapped=keepUnmap
ped@entry=0 '\000') at vbo/vbo_exec_draw.c:422
#6  0x7f902572732f in vbo_exec_wrap_buffers (exec=exec@entry=0x1b53158) at
vbo/vbo_exec_api.c:104
#7  0x7f90257278e3 in vbo_exec_wrap_upgrade_vertex (exec=0x1b53158,
attr=attr@entry=3, newSize=newSize@entry=4) at vbo/vbo_exec_api.c:280
#8  0x7f9025727e73 in vbo_exec_fixup_vertex (ctx=ctx@entry=0x1ae3248,
attr=attr@entry=3, newSize=newSize@entry=4, newType=newType@entry=5126) at
vbo/vbo_exec_api.c:406
#9  0x7f902572fe6e in vbo_Color4f (x=, y=,
z=, w=) at vbo/vbo_attrib_tmp.h:402
#10 0x009a7535 in CLinuxRendererGL::RenderUpdate(bool, unsigned int,
unsigned int) ()
#11 0x0099ff84 in CXBMCRenderManager::PresentSingle(bool, unsigned int,
unsigned int) ()
#12 0x009a02f2 in CXBMCRenderManager::Render(bool, unsigned int,
unsigned int, bool) ()
#13 0x00eb63a8 in CGUIWindowFullScreen::Render() ()
#14 0x0081f239 in CGUIControl::DoRender() ()
#15 0x008008a4 in CGUIWindow::DoRender() ()
#16 0x0080661e in CGUIWindowManager::RenderPass() const ()
#17 0x00806853 in CGUIWindowManager::Render() ()
#18 0x00d09d33 in CApplication::RenderNoPresent() ()
#19 0x00d0df31 in CApplication::Render() ()
#20 0x00dae551 in CXBApplicationEx::Run() ()
#21 0x00db3dfb in XBMC_Run ()
#22 0x006cb2e8 in main ()

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] ipers performance regression (Was: u_upload_mgr changes)

2016-01-04 Thread Brian Paul

On 01/04/2016 07:24 AM, Jose Fonseca wrote:

On 04/01/16 14:03, Marek Olšák wrote:



 > Also, do we have

know any real apps using glBitmap?


Yes, Brian Paul came across real world apps that use glBitmap for text
drawing or thin wrappers around them like wglUseFontBitmaps/glXUseXFont,
and IIRC performance did matter.


Yes, glBitmap performance is important for us in several apps.

-Brian

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 93570] the image of llvmpipe has a low quality on arm (with too many points on it)

2016-01-04 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=93570

--- Comment #10 from Icenowy Zheng  ---
I've finally built a git version of mesa with a svn version of llvm.

Now the image is fixed, but the performace is 1/4 to the original.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/8] tgsi: add ureg support for image decls

2016-01-04 Thread Rob Clark
On Sat, Jan 2, 2016 at 11:37 PM, Ilia Mirkin  wrote:
> Signed-off-by: Ilia Mirkin 
> ---
>  src/gallium/auxiliary/tgsi/tgsi_build.c| 62 +
>  src/gallium/auxiliary/tgsi/tgsi_dump.c | 10 +--
>  src/gallium/auxiliary/tgsi/tgsi_parse.c|  4 +-
>  src/gallium/auxiliary/tgsi/tgsi_parse.h|  2 +-
>  src/gallium/auxiliary/tgsi/tgsi_strings.c  |  4 +-
>  src/gallium/auxiliary/tgsi/tgsi_text.c | 10 +--
>  src/gallium/auxiliary/tgsi/tgsi_ureg.c | 77 
> ++
>  src/gallium/auxiliary/tgsi/tgsi_ureg.h |  7 ++
>  src/gallium/drivers/ilo/shader/toy_tgsi.c  |  8 +--
>  .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  | 12 +++-
>  src/gallium/drivers/svga/svga_tgsi_vgpu10.c|  2 +
>  src/gallium/include/pipe/p_shader_tokens.h |  7 +-

missing src/gallium/docs/source/tgsi.rst ?

BR,
-R

>  12 files changed, 153 insertions(+), 52 deletions(-)
>
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c 
> b/src/gallium/auxiliary/tgsi/tgsi_build.c
> index fdb7feb..bb9d0cb 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_build.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
> @@ -259,36 +259,39 @@ tgsi_build_declaration_semantic(
> return ds;
>  }
>
> -static struct tgsi_declaration_resource
> -tgsi_default_declaration_resource(void)
> +static struct tgsi_declaration_image
> +tgsi_default_declaration_image(void)
>  {
> -   struct tgsi_declaration_resource dr;
> +   struct tgsi_declaration_image di;
>
> -   dr.Resource = TGSI_TEXTURE_BUFFER;
> -   dr.Raw = 0;
> -   dr.Writable = 0;
> -   dr.Padding = 0;
> +   di.Resource = TGSI_TEXTURE_BUFFER;
> +   di.Raw = 0;
> +   di.Writable = 0;
> +   di.Format = 0;
> +   di.Padding = 0;
>
> -   return dr;
> +   return di;
>  }
>
> -static struct tgsi_declaration_resource
> -tgsi_build_declaration_resource(unsigned texture,
> -unsigned raw,
> -unsigned writable,
> -struct tgsi_declaration *declaration,
> -struct tgsi_header *header)
> +static struct tgsi_declaration_image
> +tgsi_build_declaration_image(unsigned texture,
> + unsigned format,
> + unsigned raw,
> + unsigned writable,
> + struct tgsi_declaration *declaration,
> + struct tgsi_header *header)
>  {
> -   struct tgsi_declaration_resource dr;
> +   struct tgsi_declaration_image di;
>
> -   dr = tgsi_default_declaration_resource();
> -   dr.Resource = texture;
> -   dr.Raw = raw;
> -   dr.Writable = writable;
> +   di = tgsi_default_declaration_image();
> +   di.Resource = texture;
> +   di.Format = format;
> +   di.Raw = raw;
> +   di.Writable = writable;
>
> declaration_grow(declaration, header);
>
> -   return dr;
> +   return di;
>  }
>
>  static struct tgsi_declaration_sampler_view
> @@ -364,7 +367,7 @@ tgsi_default_full_declaration( void )
> full_declaration.Range = tgsi_default_declaration_range();
> full_declaration.Semantic = tgsi_default_declaration_semantic();
> full_declaration.Interp = tgsi_default_declaration_interp();
> -   full_declaration.Resource = tgsi_default_declaration_resource();
> +   full_declaration.Image = tgsi_default_declaration_image();
> full_declaration.SamplerView = tgsi_default_declaration_sampler_view();
> full_declaration.Array = tgsi_default_declaration_array();
>
> @@ -454,20 +457,21 @@ tgsi_build_full_declaration(
>   header );
> }
>
> -   if (full_decl->Declaration.File == TGSI_FILE_RESOURCE) {
> -  struct tgsi_declaration_resource *dr;
> +   if (full_decl->Declaration.File == TGSI_FILE_IMAGE) {
> +  struct tgsi_declaration_image *di;
>
>if (maxsize <= size) {
>   return  0;
>}
> -  dr = (struct tgsi_declaration_resource *)[size];
> +  di = (struct tgsi_declaration_image *)[size];
>size++;
>
> -  *dr = tgsi_build_declaration_resource(full_decl->Resource.Resource,
> -full_decl->Resource.Raw,
> -full_decl->Resource.Writable,
> -declaration,
> -header);
> +  *di = tgsi_build_declaration_image(full_decl->Image.Resource,
> + full_decl->Image.Format,
> + full_decl->Image.Raw,
> + full_decl->Image.Writable,
> + declaration,
> + header);
> }
>
> if (full_decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c 
> 

Re: [Mesa-dev] [PATCH 2/2] glsl: Handle bits=32 case in bitfieldInsert/bitfieldExtract.

2016-01-04 Thread Matt Turner
On Wed, Dec 30, 2015 at 4:26 PM, Ilia Mirkin  wrote:
> On Wed, Dec 30, 2015 at 3:26 PM, Matt Turner  wrote:
>> The OpenGL specifications for these functions say:
>>
>>The result will be undefined if  or  is negative, or if
>>the sum of  and  is greater than the number of bits
>>used to store the operand.
>>
>> Therefore passing bits=32, offset=0 is legal and defined in GLSL.
>>
>> But the earlier DX11/SM5 bfi/ibfe/ubfe opcodes are specified to accept a
>> bitfield width ranging from 0-31. As such, Intel and AMD instructions
>> read only the low 5 bits of the width operand, making them not compliant
>> with the GLSL spec, so we have to special case the bits=32 case.
>>
>> Checking that offset=0 is not necessary, since for any other value,
>>  +  will be greater than 32, which is specified as
>> generating an undefined result.
>>
>> Fixes:
>>ES31-CTS.shader_bitfield_operation.bitfieldInsert.uint_2
>>ES31-CTS.shader_bitfield_operation.bitfieldInsert.uvec4_3
>>ES31-CTS.shader_bitfield_operation.bitfieldExtract.uvec3_0
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92595
>> ---
>> Yuck. Suggestions welcome.
>
> Can you make a piglit test? Want to see if nvidia has the same
> problem. According to
> http://docs.nvidia.com/cuda/parallel-thread-execution/#integer-arithmetic-instructions-bfe,
> offset/bits can actually be up to 255 (although I can't fully imagine
> why one might want that). However perhaps the HW differs.

I just sent: [PATCH] arb_gpu_shader5: Test corner cases of
bitfieldInsert/bitfieldExtract.

It's not totally tested (as in, I haven't fixed i965 to make it pass
because I found out that the bfi2 instruction is also broken...) but I
am curious to see what the proprietary NVIDIA driver does.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] glsl: Handle bits=32 case in bitfieldInsert/bitfieldExtract.

2016-01-04 Thread Matt Turner
On Wed, Dec 30, 2015 at 8:55 PM, Roland Scheidegger  wrote:
> Am 30.12.2015 um 21:26 schrieb Matt Turner:
>> The OpenGL specifications for these functions say:
>>
>>The result will be undefined if  or  is negative, or if
>>the sum of  and  is greater than the number of bits
>>used to store the operand.
>>
>> Therefore passing bits=32, offset=0 is legal and defined in GLSL.
>>
>> But the earlier DX11/SM5 bfi/ibfe/ubfe opcodes are specified to accept a
>> bitfield width ranging from 0-31. As such, Intel and AMD instructions
>> read only the low 5 bits of the width operand, making them not compliant
>> with the GLSL spec, so we have to special case the bits=32 case.
>>
>> Checking that offset=0 is not necessary, since for any other value,
>>  +  will be greater than 32, which is specified as
>> generating an undefined result.
> What about offset=32, bits=0, will that work?
>
>>
>> Fixes:
>>ES31-CTS.shader_bitfield_operation.bitfieldInsert.uint_2
>>ES31-CTS.shader_bitfield_operation.bitfieldInsert.uvec4_3
>>ES31-CTS.shader_bitfield_operation.bitfieldExtract.uvec3_0
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92595
>> ---
>> Yuck. Suggestions welcome.
> Do blob drivers do that correctly? Otherwise make it a spec bug?
> (Albeit I can see from a theoretical point of view why those 0/32 cases
> make sense so it probably really is intentional.)

Intentional might be an overstatement (given that hardware from major
vendors couldn't implement it with the intended instructions...), but
I don't think it's a spec bug and I really don't think it's fixable
there since it's tested in the ES 3.1 conformance suite.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] llvmpipe: don't store eo as 64bit int

2016-01-04 Thread Brian Paul

Looks OK to me.

For the series, Reviewed-by: Brian Paul 


On 01/02/2016 01:39 PM, srol...@vmware.com wrote:

From: Roland Scheidegger 

eo, just like dcdx and dcdy, cannot overflow 32bit.
Store it as unsigned though just in case (it cannot be negative, but
in theory twice as big as dcdx or dcdy so this gives it one more bit).
This doesn't really change anything, albeit it might help minimally on
32bit archs.
---
  src/gallium/drivers/llvmpipe/lp_rast.h |  2 +-
  src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h |  4 ++--
  src/gallium/drivers/llvmpipe/lp_setup.c|  5 +
  src/gallium/drivers/llvmpipe/lp_setup_tri.c| 16 
  4 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h 
b/src/gallium/drivers/llvmpipe/lp_rast.h
index c19f931..db45cbb 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast.h
@@ -115,7 +115,7 @@ struct lp_rast_plane {
 int32_t dcdy;

 /* one-pixel sized trivial reject offsets for each plane */
-   int64_t eo;
+   uint32_t eo;
  };

  /**
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h 
b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
index 52f6e99..e0aea94 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
@@ -82,7 +82,7 @@ TAG(do_block_16)(struct lp_rasterizer_task *task,
const int64_t dcdx = -IMUL64(plane[j].dcdx, 4);
const int64_t dcdy = IMUL64(plane[j].dcdy, 4);
const int64_t cox = IMUL64(plane[j].eo, 4);
-  const int64_t ei = plane[j].dcdy - plane[j].dcdx - plane[j].eo;
+  const int64_t ei = plane[j].dcdy - plane[j].dcdx - (int64_t)plane[j].eo;
const int64_t cio = IMUL64(ei, 4) - 1;

BUILD_MASKS(c[j] + cox,
@@ -182,7 +182,7 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task,
   const int64_t dcdx = -IMUL64(plane[j].dcdx, 16);
   const int64_t dcdy = IMUL64(plane[j].dcdy, 16);
   const int64_t cox = IMUL64(plane[j].eo, 16);
- const int64_t ei = plane[j].dcdy - plane[j].dcdx - plane[j].eo;
+ const int64_t ei = plane[j].dcdy - plane[j].dcdx - 
(int64_t)plane[j].eo;
   const int64_t cio = IMUL64(ei, 16) - 1;

   BUILD_MASKS(c[j] + cox,
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c 
b/src/gallium/drivers/llvmpipe/lp_setup.c
index ddbb88e..bd85051 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -486,6 +486,11 @@ lp_setup_try_clear_zs(struct lp_setup_context *setup,
 depth,
 stencil);

+   /*
+* XXX: should make a full mask here for things like D24X8,
+* otherwise we'll do a read-modify-write clear later which
+* should be unnecessary.
+*/
 zsmask = util_pack64_mask_z_stencil(setup->fb.zsbuf->format,
 zmask32,
 smask8);
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c 
b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index 0c40fb3..98973de 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -521,19 +521,19 @@ do_triangle_ccw(struct lp_setup_context *setup,
 }

 if (0) {
-  debug_printf("p0: %"PRIx64"/%08x/%08x/%"PRIx64"\n",
+  debug_printf("p0: %"PRIx64"/%08x/%08x/%08x\n",
 plane[0].c,
 plane[0].dcdx,
 plane[0].dcdy,
 plane[0].eo);
-
-  debug_printf("p1: %"PRIx64"/%08x/%08x/%"PRIx64"\n",
+
+  debug_printf("p1: %"PRIx64"/%08x/%08x/%08x\n",
 plane[1].c,
 plane[1].dcdx,
 plane[1].dcdy,
 plane[1].eo);
-
-  debug_printf("p2: %"PRIx64"/%08x/%08x/%"PRIx64"\n",
+
+  debug_printf("p2: %"PRIx64"/%08x/%08x/%08x\n",
 plane[2].c,
 plane[2].dcdx,
 plane[2].dcdy,
@@ -594,7 +594,7 @@ do_triangle_ccw(struct lp_setup_context *setup,
  static inline uint32_t
  floor_pot(uint32_t n)
  {
-#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86)
+#if defined(PIPE_CC_GCC) && (defined(PIPE_ARCH_X86) || 
defined(PIPE_ARCH_X86_64))
 if (n == 0)
return 0;

@@ -742,9 +742,9 @@ lp_setup_bin_triangle( struct lp_setup_context *setup,

   ei[i] = (plane[i].dcdy -
plane[i].dcdx -
-  plane[i].eo) << TILE_ORDER;
+  (int64_t)plane[i].eo) << TILE_ORDER;

- eo[i] = plane[i].eo << TILE_ORDER;
+ eo[i] = (int64_t)plane[i].eo << TILE_ORDER;
   xstep[i] = -(((int64_t)plane[i].dcdx) << TILE_ORDER);
   ystep[i] = ((int64_t)plane[i].dcdy) << TILE_ORDER;
}



___
mesa-dev mailing list

Re: [Mesa-dev] [PATCH 1/8] draw: rework hanndling of non-existing outputs in emit code

2016-01-04 Thread Brian Paul

I didn't do a detailed review, but these changes look like good clean-ups.

For the series, Reviewed-by: Brian Paul 


On 12/21/2015 08:00 PM, srol...@vmware.com wrote:

From: Roland Scheidegger 

Previously the code would just redirect requests for attributes which
don't exist to use output 0. Rework this to output all zeros instead which
seems more useful - in particular some extensions like
ARB_fragment_layer_viewport require 0 in the fs even if it wasn't output by
previous stages. That way, drivers don't have to special case this depending
if the vs/gs outputs some attribute or not.
---
  src/gallium/auxiliary/draw/draw_pipe_vbuf.c | 52 +
  src/gallium/auxiliary/draw/draw_pt_emit.c   | 12 +++
  src/gallium/auxiliary/draw/draw_vertex.h|  4 +--
  3 files changed, 45 insertions(+), 23 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c 
b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
index f36706c..81c4fed 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
@@ -74,9 +74,10 @@ struct vbuf_stage {
 unsigned max_indices;
 unsigned nr_indices;

-   /* Cache point size somewhere it's address won't change:
+   /* Cache point size somewhere its address won't change:
  */
 float point_size;
+   float zero4[4];

 struct translate_cache *cache;
  };
@@ -205,6 +206,7 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim )
 struct translate_key hw_key;
 unsigned dst_offset;
 unsigned i;
+   const struct vertex_info *vinfo;

 vbuf->render->set_primitive(vbuf->render, prim);

@@ -215,27 +217,33 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim )
  * state change.
  */
 vbuf->vinfo = vbuf->render->get_vertex_info(vbuf->render);
-   vbuf->vertex_size = vbuf->vinfo->size * sizeof(float);
+   vinfo = vbuf->vinfo;
+   vbuf->vertex_size = vinfo->size * sizeof(float);

 /* Translate from pipeline vertices to hw vertices.
  */
 dst_offset = 0;

-   for (i = 0; i < vbuf->vinfo->num_attribs; i++) {
+   for (i = 0; i < vinfo->num_attribs; i++) {
unsigned emit_sz = 0;
unsigned src_buffer = 0;
enum pipe_format output_format;
-  unsigned src_offset = (vbuf->vinfo->attrib[i].src_index * 4 * 
sizeof(float) );
+  unsigned src_offset = (vinfo->attrib[i].src_index * 4 * sizeof(float) );

-  output_format = draw_translate_vinfo_format(vbuf->vinfo->attrib[i].emit);
-  emit_sz = draw_translate_vinfo_size(vbuf->vinfo->attrib[i].emit);
+  output_format = draw_translate_vinfo_format(vinfo->attrib[i].emit);
+  emit_sz = draw_translate_vinfo_size(vinfo->attrib[i].emit);

/* doesn't handle EMIT_OMIT */
assert(emit_sz != 0);

-  if (vbuf->vinfo->attrib[i].emit == EMIT_1F_PSIZE) {
-src_buffer = 1;
-src_offset = 0;
+  if (vinfo->attrib[i].emit == EMIT_1F_PSIZE) {
+ src_buffer = 1;
+ src_offset = 0;
+  }
+  else if (vinfo->attrib[i].src_index == 255) {
+ /* elements which don't exist will get assigned zeros */
+ src_buffer = 2;
+ src_offset = 0;
}

hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL;
@@ -249,7 +257,7 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim )
dst_offset += emit_sz;
 }

-   hw_key.nr_elements = vbuf->vinfo->num_attribs;
+   hw_key.nr_elements = vinfo->num_attribs;
 hw_key.output_stride = vbuf->vertex_size;

 /* Don't bother with caching at this stage:
@@ -261,6 +269,7 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim )
vbuf->translate = translate_cache_find(vbuf->cache, _key);

vbuf->translate->set_buffer(vbuf->translate, 1, >point_size, 0, 
~0);
+  vbuf->translate->set_buffer(vbuf->translate, 2, >zero4[0], 0, ~0);
 }

 vbuf->point_size = vbuf->stage.draw->rasterizer->point_size;
@@ -428,7 +437,7 @@ struct draw_stage *draw_vbuf_stage( struct draw_context 
*draw,
 struct vbuf_stage *vbuf = CALLOC_STRUCT(vbuf_stage);
 if (!vbuf)
goto fail;
-
+
 vbuf->stage.draw = draw;
 vbuf->stage.name = "vbuf";
 vbuf->stage.point = vbuf_first_point;
@@ -437,29 +446,30 @@ struct draw_stage *draw_vbuf_stage( struct draw_context 
*draw,
 vbuf->stage.flush = vbuf_flush;
 vbuf->stage.reset_stipple_counter = vbuf_reset_stipple_counter;
 vbuf->stage.destroy = vbuf_destroy;
-
+
 vbuf->render = render;
 vbuf->max_indices = MIN2(render->max_indices, UNDEFINED_VERTEX_ID-1);

-   vbuf->indices = (ushort *) align_malloc( vbuf->max_indices *
-   sizeof(vbuf->indices[0]),
-   16 );
+   vbuf->indices = (ushort *) align_malloc(vbuf->max_indices *
+sizeof(vbuf->indices[0]),
+16);
 if (!vbuf->indices)
goto fail;

 vbuf->cache = translate_cache_create();
-   if 

Re: [Mesa-dev] [PATCH 2/2] glsl: Handle bits=32 case in bitfieldInsert/bitfieldExtract.

2016-01-04 Thread Ilia Mirkin
On Mon, Jan 4, 2016 at 12:44 PM, Matt Turner  wrote:
> On Wed, Dec 30, 2015 at 4:26 PM, Ilia Mirkin  wrote:
>> On Wed, Dec 30, 2015 at 3:26 PM, Matt Turner  wrote:
>>> The OpenGL specifications for these functions say:
>>>
>>>The result will be undefined if  or  is negative, or if
>>>the sum of  and  is greater than the number of bits
>>>used to store the operand.
>>>
>>> Therefore passing bits=32, offset=0 is legal and defined in GLSL.
>>>
>>> But the earlier DX11/SM5 bfi/ibfe/ubfe opcodes are specified to accept a
>>> bitfield width ranging from 0-31. As such, Intel and AMD instructions
>>> read only the low 5 bits of the width operand, making them not compliant
>>> with the GLSL spec, so we have to special case the bits=32 case.
>>>
>>> Checking that offset=0 is not necessary, since for any other value,
>>>  +  will be greater than 32, which is specified as
>>> generating an undefined result.
>>>
>>> Fixes:
>>>ES31-CTS.shader_bitfield_operation.bitfieldInsert.uint_2
>>>ES31-CTS.shader_bitfield_operation.bitfieldInsert.uvec4_3
>>>ES31-CTS.shader_bitfield_operation.bitfieldExtract.uvec3_0
>>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92595
>>> ---
>>> Yuck. Suggestions welcome.
>>
>> Can you make a piglit test? Want to see if nvidia has the same
>> problem. According to
>> http://docs.nvidia.com/cuda/parallel-thread-execution/#integer-arithmetic-instructions-bfe,
>> offset/bits can actually be up to 255 (although I can't fully imagine
>> why one might want that). However perhaps the HW differs.
>
> I just sent: [PATCH] arb_gpu_shader5: Test corner cases of
> bitfieldInsert/bitfieldExtract.
>
> It's not totally tested (as in, I haven't fixed i965 to make it pass
> because I found out that the bfi2 instruction is also broken...) but I
> am curious to see what the proprietary NVIDIA driver does.

I'm curious too. On nvc0 the new bitfieldExtract tests still pass, but
bitfieldInsert now fails.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 93570] the image of llvmpipe has a low quality on arm (with too many points on it)

2016-01-04 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=93570

Rob Clark  changed:

   What|Removed |Added

 CC||robcl...@freedesktop.org

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/8] tgsi: add ureg support for image decls

2016-01-04 Thread Rob Clark
On Mon, Jan 4, 2016 at 11:03 AM, Ilia Mirkin  wrote:
> On Mon, Jan 4, 2016 at 10:30 AM, Rob Clark  wrote:
>> On Sat, Jan 2, 2016 at 11:37 PM, Ilia Mirkin  wrote:
>>> Signed-off-by: Ilia Mirkin 
>>> ---
>>>  src/gallium/auxiliary/tgsi/tgsi_build.c| 62 +
>>>  src/gallium/auxiliary/tgsi/tgsi_dump.c | 10 +--
>>>  src/gallium/auxiliary/tgsi/tgsi_parse.c|  4 +-
>>>  src/gallium/auxiliary/tgsi/tgsi_parse.h|  2 +-
>>>  src/gallium/auxiliary/tgsi/tgsi_strings.c  |  4 +-
>>>  src/gallium/auxiliary/tgsi/tgsi_text.c | 10 +--
>>>  src/gallium/auxiliary/tgsi/tgsi_ureg.c | 77 
>>> ++
>>>  src/gallium/auxiliary/tgsi/tgsi_ureg.h |  7 ++
>>>  src/gallium/drivers/ilo/shader/toy_tgsi.c  |  8 +--
>>>  .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  | 12 +++-
>>>  src/gallium/drivers/svga/svga_tgsi_vgpu10.c|  2 +
>>>  src/gallium/include/pipe/p_shader_tokens.h |  7 +-
>>
>> missing src/gallium/docs/source/tgsi.rst ?
>
> What sort of change were you hoping to see in there? FWIW I have a
> later patch which updates the LOAD/etc ops to talk about buffer/image
> things.

I suspect at a minimum s/RESOURCE/IMAGE/g.  Although if more docs
coming in later patch that is even better.

BR,
-R
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 93570] the image of llvmpipe has a low quality on arm (with too many points on it)

2016-01-04 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=93570

--- Comment #12 from Icenowy Zheng  ---
Yes. And it's still faster than softpipe.

It seems that loading a scene cost too much time, as when the animation
started, it's smooth. (but the result is low fps number)

 bugzilla-dae...@freedesktop.org编写 

https://bugs.freedesktop.org/show_bug.cgi?id=93570

--- Comment #11 from Rob Clark  ---
(In reply to Icenowy Zheng from comment #10)
> I've finally built a git version of mesa with a svn version of llvm.
>
> Now the image is fixed, but the performace is 1/4 to the original.

just a sanity check, but you still see "llvmpipe" in the GL_RENDERER string
print out from glmark2?  Ie. something like:

  GL_RENDERER:   Gallium 0.4 on llvmpipe (LLVM 3.7, 128 bits)

just to double check to make sure you aren't falling back to swrast..

--
You are receiving this mail because:
You reported the bug.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 93570] the image of llvmpipe has a low quality on arm (with too many points on it)

2016-01-04 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=93570

Peter  changed:

   What|Removed |Added

 CC||pbrobin...@gmail.com

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/8] tgsi: add ureg support for image decls

2016-01-04 Thread Ilia Mirkin
On Mon, Jan 4, 2016 at 10:30 AM, Rob Clark  wrote:
> On Sat, Jan 2, 2016 at 11:37 PM, Ilia Mirkin  wrote:
>> Signed-off-by: Ilia Mirkin 
>> ---
>>  src/gallium/auxiliary/tgsi/tgsi_build.c| 62 +
>>  src/gallium/auxiliary/tgsi/tgsi_dump.c | 10 +--
>>  src/gallium/auxiliary/tgsi/tgsi_parse.c|  4 +-
>>  src/gallium/auxiliary/tgsi/tgsi_parse.h|  2 +-
>>  src/gallium/auxiliary/tgsi/tgsi_strings.c  |  4 +-
>>  src/gallium/auxiliary/tgsi/tgsi_text.c | 10 +--
>>  src/gallium/auxiliary/tgsi/tgsi_ureg.c | 77 
>> ++
>>  src/gallium/auxiliary/tgsi/tgsi_ureg.h |  7 ++
>>  src/gallium/drivers/ilo/shader/toy_tgsi.c  |  8 +--
>>  .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  | 12 +++-
>>  src/gallium/drivers/svga/svga_tgsi_vgpu10.c|  2 +
>>  src/gallium/include/pipe/p_shader_tokens.h |  7 +-
>
> missing src/gallium/docs/source/tgsi.rst ?

What sort of change were you hoping to see in there? FWIW I have a
later patch which updates the LOAD/etc ops to talk about buffer/image
things.

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 93570] the image of llvmpipe has a low quality on arm (with too many points on it)

2016-01-04 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=93570

--- Comment #11 from Rob Clark  ---
(In reply to Icenowy Zheng from comment #10)
> I've finally built a git version of mesa with a svn version of llvm.
> 
> Now the image is fixed, but the performace is 1/4 to the original.

just a sanity check, but you still see "llvmpipe" in the GL_RENDERER string
print out from glmark2?  Ie. something like:

  GL_RENDERER:   Gallium 0.4 on llvmpipe (LLVM 3.7, 128 bits)

just to double check to make sure you aren't falling back to swrast..

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/8] tgsi: add ureg support for image decls

2016-01-04 Thread Ilia Mirkin
On Mon, Jan 4, 2016 at 11:14 AM, Rob Clark  wrote:
> On Mon, Jan 4, 2016 at 11:03 AM, Ilia Mirkin  wrote:
>> On Mon, Jan 4, 2016 at 10:30 AM, Rob Clark  wrote:
>>> On Sat, Jan 2, 2016 at 11:37 PM, Ilia Mirkin  wrote:
 Signed-off-by: Ilia Mirkin 
 ---
  src/gallium/auxiliary/tgsi/tgsi_build.c| 62 +
  src/gallium/auxiliary/tgsi/tgsi_dump.c | 10 +--
  src/gallium/auxiliary/tgsi/tgsi_parse.c|  4 +-
  src/gallium/auxiliary/tgsi/tgsi_parse.h|  2 +-
  src/gallium/auxiliary/tgsi/tgsi_strings.c  |  4 +-
  src/gallium/auxiliary/tgsi/tgsi_text.c | 10 +--
  src/gallium/auxiliary/tgsi/tgsi_ureg.c | 77 
 ++
  src/gallium/auxiliary/tgsi/tgsi_ureg.h |  7 ++
  src/gallium/drivers/ilo/shader/toy_tgsi.c  |  8 +--
  .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  | 12 +++-
  src/gallium/drivers/svga/svga_tgsi_vgpu10.c|  2 +
  src/gallium/include/pipe/p_shader_tokens.h |  7 +-
>>>
>>> missing src/gallium/docs/source/tgsi.rst ?
>>
>> What sort of change were you hoping to see in there? FWIW I have a
>> later patch which updates the LOAD/etc ops to talk about buffer/image
>> things.
>
> I suspect at a minimum s/RESOURCE/IMAGE/g.  Although if more docs
> coming in later patch that is even better.

Yeah, I believe I update the relevant stuff. I do keep instances of
the word "resource" though, sometimes to refer to either an IMAGE or a
BUFFER (defined in the next patch). Ops like LOAD behave slightly
differently depending on the resource type they are given. I think of
both images and buffers as "resources". However there should be no
(uncommented) references to RESOURCE left after these patches.

Cheers,

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] glsl: Handle bits=32 case in bitfieldInsert/bitfieldExtract.

2016-01-04 Thread Ilia Mirkin
On Mon, Jan 4, 2016 at 12:52 PM, Ilia Mirkin  wrote:
> On Mon, Jan 4, 2016 at 12:44 PM, Matt Turner  wrote:
>> On Wed, Dec 30, 2015 at 4:26 PM, Ilia Mirkin  wrote:
>>> On Wed, Dec 30, 2015 at 3:26 PM, Matt Turner  wrote:
 The OpenGL specifications for these functions say:

The result will be undefined if  or  is negative, or if
the sum of  and  is greater than the number of bits
used to store the operand.

 Therefore passing bits=32, offset=0 is legal and defined in GLSL.

 But the earlier DX11/SM5 bfi/ibfe/ubfe opcodes are specified to accept a
 bitfield width ranging from 0-31. As such, Intel and AMD instructions
 read only the low 5 bits of the width operand, making them not compliant
 with the GLSL spec, so we have to special case the bits=32 case.

 Checking that offset=0 is not necessary, since for any other value,
  +  will be greater than 32, which is specified as
 generating an undefined result.

 Fixes:
ES31-CTS.shader_bitfield_operation.bitfieldInsert.uint_2
ES31-CTS.shader_bitfield_operation.bitfieldInsert.uvec4_3
ES31-CTS.shader_bitfield_operation.bitfieldExtract.uvec3_0
 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92595
 ---
 Yuck. Suggestions welcome.
>>>
>>> Can you make a piglit test? Want to see if nvidia has the same
>>> problem. According to
>>> http://docs.nvidia.com/cuda/parallel-thread-execution/#integer-arithmetic-instructions-bfe,
>>> offset/bits can actually be up to 255 (although I can't fully imagine
>>> why one might want that). However perhaps the HW differs.
>>
>> I just sent: [PATCH] arb_gpu_shader5: Test corner cases of
>> bitfieldInsert/bitfieldExtract.
>>
>> It's not totally tested (as in, I haven't fixed i965 to make it pass
>> because I found out that the bfi2 instruction is also broken...) but I
>> am curious to see what the proprietary NVIDIA driver does.
>
> I'm curious too. On nvc0 the new bitfieldExtract tests still pass, but
> bitfieldInsert now fails.

And on softpipe (which uses tgsi_exec), BFE fails while BFI passes. Great :)

http://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/auxiliary/tgsi/tgsi_exec.c#n4073

These definitions might have come from the DX11 op pseudocode... but
the question is why does BFI pass? Shouldn't it also fail? Do I need
to & 31 something on nvc0 to make it all work?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] glsl: Handle bits=32 case in bitfieldInsert/bitfieldExtract.

2016-01-04 Thread Matt Turner
On Mon, Jan 4, 2016 at 12:52 PM, Ilia Mirkin  wrote:
> On Mon, Jan 4, 2016 at 12:44 PM, Matt Turner  wrote:
>> On Wed, Dec 30, 2015 at 4:26 PM, Ilia Mirkin  wrote:
>>> On Wed, Dec 30, 2015 at 3:26 PM, Matt Turner  wrote:
 The OpenGL specifications for these functions say:

The result will be undefined if  or  is negative, or if
the sum of  and  is greater than the number of bits
used to store the operand.

 Therefore passing bits=32, offset=0 is legal and defined in GLSL.

 But the earlier DX11/SM5 bfi/ibfe/ubfe opcodes are specified to accept a
 bitfield width ranging from 0-31. As such, Intel and AMD instructions
 read only the low 5 bits of the width operand, making them not compliant
 with the GLSL spec, so we have to special case the bits=32 case.

 Checking that offset=0 is not necessary, since for any other value,
  +  will be greater than 32, which is specified as
 generating an undefined result.

 Fixes:
ES31-CTS.shader_bitfield_operation.bitfieldInsert.uint_2
ES31-CTS.shader_bitfield_operation.bitfieldInsert.uvec4_3
ES31-CTS.shader_bitfield_operation.bitfieldExtract.uvec3_0
 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92595
 ---
 Yuck. Suggestions welcome.
>>>
>>> Can you make a piglit test? Want to see if nvidia has the same
>>> problem. According to
>>> http://docs.nvidia.com/cuda/parallel-thread-execution/#integer-arithmetic-instructions-bfe,
>>> offset/bits can actually be up to 255 (although I can't fully imagine
>>> why one might want that). However perhaps the HW differs.
>>
>> I just sent: [PATCH] arb_gpu_shader5: Test corner cases of
>> bitfieldInsert/bitfieldExtract.
>>
>> It's not totally tested (as in, I haven't fixed i965 to make it pass
>> because I found out that the bfi2 instruction is also broken...) but I
>> am curious to see what the proprietary NVIDIA driver does.
>
> I'm curious too. On nvc0 the new bitfieldExtract tests still pass, but
> bitfieldInsert now fails.

FWIW, that's the same behavior I see with this patch on i965. I traced
it to the bfi2 instruction returning all zeros unexpectedly for the
bits=32, offset=0 case.

I would have expected the hardware to implement that operation as

bfi dst, mask, insert, base === dst := (insert & mask) | (base & ~mask)

but the PRM says it actually calculates offset from the bitmask by "UD
offset = LZD(reverse(src0.chan[n]))-1;" where LZD is "leading zero
detect", and then uses that value as a shift argument in a later
computation. I could see LZD producing a bad result when there are no
zeros in the mask.

I'm trying to get i965 to pass the test at the moment.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nir: few missing struct names

2016-01-04 Thread Rob Clark
From: Rob Clark 

nir.h is a bit inconsistent about 'typedef struct {} nir_foo' vs
'typedef struct nir_foo {} nir_foo'.  But missing struct name tags is
inconvenient when you need a fwd declaration without pulling in all
of nir.

So add missing struct name tag for nir_variable, and a couple other
spots where it would likely be useful.

Signed-off-by: Rob Clark 
---
 src/glsl/nir/nir.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 4286738..bedcc0d 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -139,7 +139,7 @@ typedef enum {
  * ir_variable - it should be easy to translate between the two.
  */
 
-typedef struct {
+typedef struct nir_variable {
struct exec_node node;
 
/**
@@ -349,7 +349,7 @@ typedef struct {
 #define nir_foreach_variable(var, var_list) \
foreach_list_typed(nir_variable, var, node, var_list)
 
-typedef struct {
+typedef struct nir_register {
struct exec_node node;
 
unsigned num_components; /** < number of vector components */
@@ -443,7 +443,7 @@ nir_instr_is_last(nir_instr *instr)
return exec_node_is_tail_sentinel(exec_node_get_next(>node));
 }
 
-typedef struct {
+typedef struct nir_ssa_def {
/** for debugging only, can be NULL */
const char* name;
 
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev