Mesa (master): llvmpipe: improve rasterization discard logic

2018-05-22 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 7b89fcec416ed7e6ddadec2438aab63609d825f8
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=7b89fcec416ed7e6ddadec2438aab63609d825f8

Author: Roland Scheidegger 
Date:   Tue May 22 02:12:38 2018 +0200

llvmpipe: improve rasterization discard logic

This unifies the explicit rasterization discard as well as the implicit
rasterization disabled logic (which we need for another state tracker),
which really should do the exact same thing.
We'll now toss out the prims early on in setup with (implicit or
explicit) discard, rather than do setup and binning with them, which
was entirely pointless.
(We should eventually get rid of implicit discard, which should also
enable us to discard stuff already in draw, hence draw would be
able to skip the pointless clip and fallback stages in this case.)
We still need separate logic for only null ps - this is not the same
as rasterization discard. But simplify the logic there and don't count
primitives simply when there's an empty fs, regardless of depth/stencil
tests, which seems perfectly acceptable by d3d10.
While here, also fix statistics for primitives if face culling is
enabled.
No piglit changes.

Reviewed-by: Brian Paul 
Reviewed-by: Jose Fonseca 

---

 src/gallium/drivers/llvmpipe/lp_context.h   |  1 -
 src/gallium/drivers/llvmpipe/lp_jit.c   |  1 +
 src/gallium/drivers/llvmpipe/lp_jit.h   |  5 +++
 src/gallium/drivers/llvmpipe/lp_rast.c  | 12 +++-
 src/gallium/drivers/llvmpipe/lp_rast_priv.h |  6 
 src/gallium/drivers/llvmpipe/lp_scene.c |  5 ++-
 src/gallium/drivers/llvmpipe/lp_scene.h | 10 +++---
 src/gallium/drivers/llvmpipe/lp_setup.c | 18 ++-
 src/gallium/drivers/llvmpipe/lp_setup_line.c| 28 +++--
 src/gallium/drivers/llvmpipe/lp_setup_point.c   | 22 -
 src/gallium/drivers/llvmpipe/lp_setup_tri.c | 29 -
 src/gallium/drivers/llvmpipe/lp_setup_vbuf.c|  2 +-
 src/gallium/drivers/llvmpipe/lp_state_derived.c | 22 ++---
 src/gallium/drivers/llvmpipe/lp_state_fs.c  | 41 -
 src/gallium/drivers/llvmpipe/lp_state_fs.h  |  5 ---
 15 files changed, 118 insertions(+), 89 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_context.h 
b/src/gallium/drivers/llvmpipe/lp_context.h
index 54d98fdbf7..7a2f253984 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_context.h
@@ -136,7 +136,6 @@ struct llvmpipe_context {
struct blitter_context *blitter;
 
unsigned tex_timestamp;
-   boolean no_rast;
 
/** List of all fragment shader variants */
struct lp_fs_variant_list_item fs_variants_list;
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c 
b/src/gallium/drivers/llvmpipe/lp_jit.c
index a2762f39a0..e2309f4715 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.c
+++ b/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -212,6 +212,7 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp)
   elem_types[LP_JIT_THREAD_DATA_CACHE] =
 LLVMPointerType(lp_build_format_cache_type(gallivm), 0);
   elem_types[LP_JIT_THREAD_DATA_COUNTER] = LLVMInt64TypeInContext(lc);
+  elem_types[LP_JIT_THREAD_DATA_INVOCATIONS] = LLVMInt64TypeInContext(lc);
   elem_types[LP_JIT_THREAD_DATA_RASTER_STATE_VIEWPORT_INDEX] =
 LLVMInt32TypeInContext(lc);
 
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h 
b/src/gallium/drivers/llvmpipe/lp_jit.h
index 9db26f2cba..312d1a1281 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.h
+++ b/src/gallium/drivers/llvmpipe/lp_jit.h
@@ -192,6 +192,7 @@ struct lp_jit_thread_data
 {
struct lp_build_format_cache *cache;
uint64_t vis_counter;
+   uint64_t ps_invocations;
 
/*
 * Non-interpolated rasterizer state passed through to the fragment shader.
@@ -205,6 +206,7 @@ struct lp_jit_thread_data
 enum {
LP_JIT_THREAD_DATA_CACHE = 0,
LP_JIT_THREAD_DATA_COUNTER,
+   LP_JIT_THREAD_DATA_INVOCATIONS,
LP_JIT_THREAD_DATA_RASTER_STATE_VIEWPORT_INDEX,
LP_JIT_THREAD_DATA_COUNT
 };
@@ -216,6 +218,9 @@ enum {
 #define lp_jit_thread_data_counter(_gallivm, _ptr) \
lp_build_struct_get_ptr(_gallivm, _ptr, LP_JIT_THREAD_DATA_COUNTER, 
"counter")
 
+#define lp_jit_thread_data_invocations(_gallivm, _ptr) \
+   lp_build_struct_get_ptr(_gallivm, _ptr, LP_JIT_THREAD_DATA_INVOCATIONS, 
"invocs")
+
 #define lp_jit_thread_data_raster_state_viewport_index(_gallivm, _ptr) \
lp_build_struct_get(_gallivm, _ptr, \
LP_JIT_THREAD_DATA_RASTER_STATE_VIEWPORT_INDEX, \
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c 
b/src/gallium/drivers/llvmpipe/lp_rast.c
index 939944aa79..9d4f9f8d02 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -107,7 +107,7 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task,
 task->scene->fb.heigh

Mesa (master): draw: get rid of special logic to not emit null tris

2018-05-18 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 6f558fb0f79d88eb1749740e8bddb7e8b313fdf4
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=6f558fb0f79d88eb1749740e8bddb7e8b313fdf4

Author: Roland Scheidegger 
Date:   Thu May 17 03:45:02 2018 +0200

draw: get rid of special logic to not emit null tris

I've confirmed after 77554d220d6d74b4d913dc37ea3a874e9dc550e4 we no
longer need this to pass some tests from another api (as we no longer
generate the bogus extra null tris in the first place).

Reviewed-by: Jose Fonseca 

---

 src/gallium/auxiliary/draw/draw_pipe_clip.c | 38 -
 1 file changed, 38 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c 
b/src/gallium/auxiliary/draw/draw_pipe_clip.c
index 46118b6e67..2a9c944dc1 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_clip.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c
@@ -210,30 +210,6 @@ static void interp(const struct clip_stage *clip,
 }
 
 /**
- * Checks whether the specified triangle is empty and if it is returns
- * true, otherwise returns false.
- * Triangle is considered null/empty if its area is equal to zero.
- */
-static inline boolean
-is_tri_null(const struct clip_stage *clip, const struct prim_header *header)
-{
-   const unsigned pos_attr = clip->pos_attr;
-   float x1 = header->v[1]->data[pos_attr][0] - 
header->v[0]->data[pos_attr][0];
-   float y1 = header->v[1]->data[pos_attr][1] - 
header->v[0]->data[pos_attr][1];
-   float z1 = header->v[1]->data[pos_attr][2] - 
header->v[0]->data[pos_attr][2];
-
-   float x2 = header->v[2]->data[pos_attr][0] - 
header->v[0]->data[pos_attr][0];
-   float y2 = header->v[2]->data[pos_attr][1] - 
header->v[0]->data[pos_attr][1];
-   float z2 = header->v[2]->data[pos_attr][2] - 
header->v[0]->data[pos_attr][2];
-
-   float vx = y1 * z2 - z1 * y2;
-   float vy = x1 * z2 - z1 * x2;
-   float vz = x1 * y2 - y1 * x2;
-
-   return (vx*vx  + vy*vy + vz*vz) == 0.f;
-}
-
-/**
  * Emit a post-clip polygon to the next pipeline stage.  The polygon
  * will be convex and the provoking vertex will always be vertex[0].
  */
@@ -247,7 +223,6 @@ static void emit_poly(struct draw_stage *stage,
struct prim_header header;
unsigned i;
ushort edge_first, edge_middle, edge_last;
-   boolean tri_emitted = FALSE;
 
if (stage->draw->rasterizer->flatshade_first) {
   edge_first  = DRAW_PIPE_EDGE_FLAG_0;
@@ -269,7 +244,6 @@ static void emit_poly(struct draw_stage *stage,
header.pad = 0;
 
for (i = 2; i < n; i++, header.flags = edge_middle) {
-  boolean tri_null;
   /* order the triangle verts to respect the provoking vertex mode */
   if (stage->draw->rasterizer->flatshade_first) {
  header.v[0] = inlist[0];  /* the provoking vertex */
@@ -282,18 +256,6 @@ static void emit_poly(struct draw_stage *stage,
  header.v[2] = inlist[0];  /* the provoking vertex */
   }
 
-  tri_null = is_tri_null(clipper, &header);
-  /*
-   * If we ever generated a tri (regardless if it had area or not),
-   * skip all subsequent null tris.
-   * FIXME: I think this logic was hiding bugs elsewhere. It should
-   * be possible now to always emit all tris.
-   */
-  if (tri_null && tri_emitted) {
- continue;
-  }
-  tri_emitted = TRUE;
-
   if (!edgeflags[i-1]) {
  header.flags &= ~edge_middle;
   }

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): gallivm: Use alloca_undef with array type instead of alloca_array

2018-05-15 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: e01af38d6faf5dfd0f4ac6548ae03c27cca1dede
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=e01af38d6faf5dfd0f4ac6548ae03c27cca1dede

Author: Roland Scheidegger 
Date:   Tue May 15 04:35:50 2018 +0200

gallivm: Use alloca_undef with array type instead of alloca_array

Use a single allocation of array type instead of the old-style array
allocation for the temp and immediate arrays.
Probably only makes a difference if they aren't used indirectly (so,
if we used them solely because there's too many temps or immediates).
In this case the sroa and early-cse passes can sometimes do some
optimizations which they otherwise cannot.
(As a side note, for the temp reg array, we actually really should
use one allocation per array id, not just one for everything.)
Note that the instcombine pass would actually promote such
allocations to single alloc of array type as well, but it's too late
for some artificial shaders we've seen to help (we don't want to run
instcombine at the beginning due to its cost, hence would need
another sroa/cse pass after instcombine). sroa/early-cse help there
because they can actually eliminate all of the huge shader, reducing
it to a single const output (don't ask...).
(Interestingly, instcombine also removes all the bitcasts we do on that
allocation for single-value gathering, and in the end directly indexes
into the single vector elements, which according to spec is only
semi-valid, but this happens regardless. Another thing instcombine also
does is use inbound GEPs, which is probably something we should do
manually as well - for indirectly indexed reg files llvm may not be
able to figure it out on its own, but we should be able to guarantee
all pointers are always inbound. In any case, by the looks of it
using single allocation with array type seems to be the right thing
to do even for ordinary shaders.)
No piglit change.

Reviewed-by: Jose Fonseca 

---

 src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 61 +
 1 file changed, 33 insertions(+), 28 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index e411f906c7..83d7dbea9a 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -741,7 +741,8 @@ static void lp_exec_mask_store(struct lp_exec_mask *mask,
 
assert(lp_check_value(bld_store->type, val));
assert(LLVMGetTypeKind(LLVMTypeOf(dst_ptr)) == LLVMPointerTypeKind);
-   assert(LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val));
+   assert(LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val) ||
+  LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(dst_ptr))) == 
LLVMArrayTypeKind);
 
if (exec_mask) {
   LLVMValueRef res, dst;
@@ -852,7 +853,14 @@ get_file_ptr(struct lp_build_tgsi_soa_context *bld,
 
if (bld->indirect_files & (1 << file)) {
   LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, 
index * 4 + chan);
-  return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
+  if (LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(var_of_array))) == 
LLVMArrayTypeKind) {
+ LLVMValueRef gep[2];
+ gep[0] = lp_build_const_int32(bld->bld_base.base.gallivm, 0);
+ gep[1] = lindex;
+ return LLVMBuildGEP(builder, var_of_array, gep, 2, "");
+  } else {
+ return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
+  }
}
else {
   assert(index <= bld->bld_base.info->file_max[file]);
@@ -1352,21 +1360,20 @@ emit_fetch_immediate(
  /* Gather values from the immediate register array */
  res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2);
   } else {
- LLVMValueRef lindex = lp_build_const_int32(gallivm,
-reg->Register.Index * 4 + swizzle);
- LLVMValueRef imms_ptr =  LLVMBuildGEP(builder,
-bld->imms_array, &lindex, 1, 
"");
+ LLVMValueRef gep[2];
+ gep[0] = lp_build_const_int32(gallivm, 0);
+ gep[1] = lp_build_const_int32(gallivm, reg->Register.Index * 4 + 
swizzle);
+ LLVMValueRef imms_ptr = LLVMBuildGEP(builder,
+  bld->imms_array, gep, 2, "");
  res = LLVMBuildLoad(builder, imms_ptr, "");
 
  if (tgsi_type_is_64bit(stype)) {
-LLVMValueRef lindex1;
 LLVMValueRef imms_ptr2;
 LLVMValueRef res2;
-
-lindex1 = lp_build_const_int32(gallivm,
-   reg->Register.Index * 4 + swizzle + 
1);
+gep[1] = lp_build_const_int32(gallivm,
+  reg->

Mesa (master): llvmpipe: Fix random number generation for unit tests

2018-05-13 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: cf3fb42fb5eb6130693a4be0a7b5ea06b184ce2d
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=cf3fb42fb5eb6130693a4be0a7b5ea06b184ce2d

Author: Roland Scheidegger 
Date:   Mon May  7 21:04:27 2018 +0200

llvmpipe: Fix random number generation for unit tests

We were never producing negative numbers for signed types.
Also fix only producing half the valid range for uint32, and
properly clamp signed values.

Because this now also properly tests snorm with actually negative
values, need to increase eps for such conversions. I believe these
cannot actually be hit in ordinary operation (e.g. if a snorm texture
is sampled and output to snorm RT, it will still go through snorm->float
and float->snorm conversion), so don't bother to do anything to fix
the bad accuracy (might be quite complex).
Basically, the issue is for something like snorm16->snorm8 that in the
end this will just use a 8 bit arithmetic right shift.
But the math behind it says we should actually do a division by 32767 / 127, 
which
is ~258, not 256. So the result can be one bit off (values have too large
magnitude), and furthermore, the shift has incorrect rounding (always rounds
down). For positive numbers, these errors have different direction, but
for negative ones they have the same, hence for some values the error will
be 2 bit in the end.

Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=106232

---

 src/gallium/drivers/llvmpipe/lp_test_conv.c |  8 
 src/gallium/drivers/llvmpipe/lp_test_main.c | 13 +++--
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c 
b/src/gallium/drivers/llvmpipe/lp_test_conv.c
index 6e58a03151..a4f313a0bb 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_conv.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c
@@ -211,6 +211,14 @@ test_one(unsigned verbose,
assert(src_type.length * num_srcs == dst_type.length * num_dsts);
 
eps = MAX2(lp_const_eps(src_type), lp_const_eps(dst_type));
+   if (dst_type.norm && dst_type.sign && src_type.sign && !src_type.floating) {
+  /*
+   * This is quite inaccurate due to shift being used.
+   * I don't think it's possible to hit such conversions with
+   * llvmpipe though.
+   */
+  eps *= 2;
+   }
 
context = LLVMContextCreate();
gallivm = gallivm_create("test_module", context);
diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c 
b/src/gallium/drivers/llvmpipe/lp_test_main.c
index 518ca274e7..5ec0dd347b 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_main.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_main.c
@@ -147,6 +147,7 @@ write_elem(struct lp_type type, void *dst, unsigned index, 
double value)
   if(type.sign) {
  long long lvalue = (long long)value;
  lvalue = MIN2(lvalue, ((long long)1 << (type.width - 1)) - 1);
+ lvalue = MAX2(lvalue, -((long long)1 << (type.width - 1)));
  switch(type.width) {
  case 8:
 *((int8_t *)dst + index) = (int8_t)lvalue;
@@ -200,16 +201,24 @@ random_elem(struct lp_type type, void *dst, unsigned 
index)
   }
   else {
  unsigned long long mask;
-if (type.fixed)
+ if (type.fixed)
 mask = ((unsigned long long)1 << (type.width / 2)) - 1;
  else if (type.sign)
 mask = ((unsigned long long)1 << (type.width - 1)) - 1;
  else
 mask = ((unsigned long long)1 << type.width) - 1;
  value += (double)(mask & rand());
+ if (!type.fixed && !type.sign && type.width == 32) {
+/*
+ * rand only returns half the possible range
+ * XXX 64bit values...
+ */
+if(rand() & 1)
+   value += (double)0x8000;
+ }
   }
}
-   if(!type.sign)
+   if(type.sign)
   if(rand() & 1)
  value = -value;
write_elem(type, dst, index, value);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): draw: simplify clip null tri logic

2018-04-24 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 98578df27bbf682f254d59a3a7d63b5f1b98f838
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=98578df27bbf682f254d59a3a7d63b5f1b98f838

Author: Roland Scheidegger 
Date:   Tue Apr 24 18:12:34 2018 +0200

draw: simplify clip null tri logic

Simplifies the logic when to emit null tris (albeit the reasons why we
have to do this remain unclear).
This is strictly just logic simplification, the behavior doesn't change
at all.

Reviewed-by: Jose Fonseca 

---

 src/gallium/auxiliary/draw/draw_pipe_clip.c | 20 +---
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c 
b/src/gallium/auxiliary/draw/draw_pipe_clip.c
index 4cfa54b2e1..ff80363a51 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_clip.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c
@@ -252,8 +252,7 @@ static void emit_poly(struct draw_stage *stage,
struct prim_header header;
unsigned i;
ushort edge_first, edge_middle, edge_last;
-   boolean last_tri_was_null = FALSE;
-   boolean tri_was_not_null = FALSE;
+   boolean tri_emitted = FALSE;
 
if (stage->draw->rasterizer->flatshade_first) {
   edge_first  = DRAW_PIPE_EDGE_FLAG_0;
@@ -289,17 +288,16 @@ static void emit_poly(struct draw_stage *stage,
   }
 
   tri_null = is_tri_null(clipper, &header);
-  /* If we generated a triangle with an area, aka. non-null triangle,
-   * or if the previous triangle was also null then skip all subsequent
-   * null triangles */
-  if ((tri_was_not_null && tri_null) || (last_tri_was_null && tri_null)) {
- last_tri_was_null = tri_null;
+  /*
+   * If we ever generated a tri (regardless if it had area or not),
+   * skip all subsequent null tris.
+   * FIXME: it is unclear why we always have to emit at least one
+   * tri. Maybe this is hiding bugs elsewhere.
+   */
+  if (tri_null && tri_emitted) {
  continue;
   }
-  last_tri_was_null = tri_null;
-  if (!tri_null) {
- tri_was_not_null = TRUE;
-  }
+  tri_emitted = TRUE;
 
   if (!edgeflags[i-1]) {
  header.flags &= ~edge_middle;

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): draw: fix different sign logic when clipping

2018-04-24 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 77554d220d6d74b4d913dc37ea3a874e9dc550e4
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=77554d220d6d74b4d913dc37ea3a874e9dc550e4

Author: Roland Scheidegger 
Date:   Tue Apr 24 18:25:55 2018 +0200

draw: fix different sign logic when clipping

The logic was flawed, since mul(x,y) will be <= 0 (exactly 0) when
the sign is the same but both numbers are sufficiently small
(if the product is smaller than 2^-128).
This could apparently lead to emitting a sufficient amount of
additional bogus vertices to overflow the allocated array for them,
hitting an assertion (still safe with release builds since we just
aborted clipping after the assertion in this case - I'm however unsure
if this is now really no longer possible, so that code stays).
Not sure if the additional vertices could cause other grief, I didn't
see anything wrong even when hitting the assertion.

Essentially, both +-0 are treated as positive (the vertex is considered
to be inside the clip volume for this plane), so integrate the logic
determining different sign into the branch there.

Reviewed-by: Jose Fonseca 

---

 src/gallium/auxiliary/draw/draw_pipe_clip.c | 17 -
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c 
b/src/gallium/auxiliary/draw/draw_pipe_clip.c
index ff80363a51..46118b6e67 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_clip.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c
@@ -47,11 +47,6 @@
 /** Set to 1 to enable printing of coords before/after clipping */
 #define DEBUG_CLIP 0
 
-
-#ifndef DIFFERENT_SIGNS
-#define DIFFERENT_SIGNS(x, y) ((x) * (y) <= 0.0F && (x) - (y) != 0.0F)
-#endif
-
 #define MAX_CLIPPED_VERTICES ((2 * (6 + PIPE_MAX_CLIP_PLANES))+1)
 
 
@@ -291,8 +286,8 @@ static void emit_poly(struct draw_stage *stage,
   /*
* If we ever generated a tri (regardless if it had area or not),
* skip all subsequent null tris.
-   * FIXME: it is unclear why we always have to emit at least one
-   * tri. Maybe this is hiding bugs elsewhere.
+   * FIXME: I think this logic was hiding bugs elsewhere. It should
+   * be possible now to always emit all tris.
*/
   if (tri_null && tri_emitted) {
  continue;
@@ -478,6 +473,7 @@ do_clip_tri(struct draw_stage *stage,
   for (i = 1; i <= n; i++) {
  struct vertex_header *vert = inlist[i];
  boolean *edge = &inEdges[i];
+ boolean different_sign;
 
  float dp = getclipdist(clipper, vert, plane_idx);
 
@@ -490,9 +486,12 @@ do_clip_tri(struct draw_stage *stage,
return;
 outEdges[outcount] = *edge_prev;
 outlist[outcount++] = vert_prev;
+different_sign = dp < 0.0f;
+ } else {
+different_sign = !(dp < 0.0f);
  }
 
- if (DIFFERENT_SIGNS(dp, dp_prev)) {
+ if (different_sign) {
 struct vertex_header *new_vert;
 boolean *new_edge;
 
@@ -510,7 +509,7 @@ do_clip_tri(struct draw_stage *stage,
 
 if (dp < 0.0f) {
/* Going out of bounds.  Avoid division by zero as we
-* know dp != dp_prev from DIFFERENT_SIGNS, above.
+* know dp != dp_prev from different_sign, above.
 */
float t = dp / (dp - dp_prev);
interp( clipper, new_vert, t, vert, vert_prev, viewport_index );

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): gallivm: remove LICM pass

2018-04-23 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 45b8f620a545bcdb8a4942bafd505c9418f6d9f2
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=45b8f620a545bcdb8a4942bafd505c9418f6d9f2

Author: Roland Scheidegger 
Date:   Mon Apr 23 04:39:00 2018 +0200

gallivm: remove LICM pass

LICM is simply too expensive, even though it presumably can help quite
a bit in some cases.
It was definitely cheaper in llvm 3.3, though as far as I can tell with
llvm 3.3 it failed to do anything in most cases. early-cse also actually
seems to cause licm to be able to move things when it previously couldn't,
which causes noticeable compile time increases.
There's more loop passes in llvm, but I'm not sure which ones are helpful,
and I couldn't find anything which would roughly do what the old licm in
llvm 3.3 did, so ditch it.

Reviewed-by: Jose Fonseca 

---

 src/gallium/auxiliary/gallivm/lp_bld_init.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c 
b/src/gallium/auxiliary/gallivm/lp_bld_init.c
index ca36b467ca..a9968649c0 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -144,7 +144,15 @@ create_pass_manager(struct gallivm_state *gallivm)
   LLVMAddScalarReplAggregatesPass(gallivm->passmgr);
   LLVMAddEarlyCSEPass(gallivm->passmgr);
   LLVMAddCFGSimplificationPass(gallivm->passmgr);
-  LLVMAddLICMPass(gallivm->passmgr);
+  /*
+   * FIXME: LICM is potentially quite useful. However, for some
+   * rather crazy shaders the compile time can reach _hours_ per shader,
+   * due to licm implying lcssa (since llvm 3.5), which can take forever.
+   * Even for sane shaders, the cost of licm is rather high (and not just
+   * due to lcssa, licm itself too), though mostly only in cases when it
+   * can actually move things, so having to disable it is a pity.
+   * LLVMAddLICMPass(gallivm->passmgr);
+   */
   LLVMAddReassociatePass(gallivm->passmgr);
   LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr);
   LLVMAddConstantPropagationPass(gallivm->passmgr);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): gallivm: (trivial) do division by 1000 with int64

2018-04-23 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: e89cf59c27e4a66379f21ee0b574deaa078d0975
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=e89cf59c27e4a66379f21ee0b574deaa078d0975

Author: Roland Scheidegger 
Date:   Mon Apr 23 04:52:48 2018 +0200

gallivm: (trivial) do division by 1000 with int64

Conversion to int can otherwise overflow if compile times are over
~71min. (Yes this can happen...)

Reviewed-by: Jose Fonseca 

---

 src/gallium/auxiliary/gallivm/lp_bld_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c 
b/src/gallium/auxiliary/gallivm/lp_bld_init.c
index a9968649c0..800b2616c0 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -604,7 +604,7 @@ gallivm_compile_module(struct gallivm_state *gallivm)
 
if (gallivm_debug & GALLIVM_DEBUG_PERF) {
   int64_t time_end = os_time_get();
-  int time_msec = (int)(time_end - time_begin) / 1000;
+  int time_msec = (int)((time_end - time_begin) / 1000);
   assert(gallivm->module_name);
   debug_printf("optimizing module %s took %d msec\n",
gallivm->module_name, time_msec);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): gallivm: add early cse pass

2018-04-23 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 8b9ab674b982289b1c18c93598139ec1e4bf829a
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=8b9ab674b982289b1c18c93598139ec1e4bf829a

Author: Roland Scheidegger 
Date:   Mon Apr 23 04:32:56 2018 +0200

gallivm: add early cse pass

This pass is quite cheap, and can simplify the IR quite a bit for our
generated IR.
In particular on a variety of shaders I've found the time saved by
other passes due to the simplified IR more than makes up for the cost
of this pass, and on top of that the end result is actually better.
The only downside I've found is this enables the LICM pass to move some
things out of the main shader loop (in the case I've seen, instanced
vertex fetch (which is constant within the jit shader) plus the derived
instructions in the shader) which it couldn't do before for some reason.
This would actually be desirable but can increase compile time
considerably (licm seems to have considerable cost when it actually can
move things out of loops, due to alias analysis). But blaming early cse
for this seems inappropriate. (Note that the first two sroa / earlycse
passes are similar to what a standard llvm opt -O1/-O2 pipeline would
do, albeit this has some more passes even before but I don't think
they'd do much for us.)
It also in particular helps some crazy shader used for driver
verification (don't ask...) a lot (about factor of 6 faster in compile
time) (due to simplfiying the ir before LICM is run).
While here, also move licm behind simplifycfg. For some shaders there
seems to be very significant compile time gains (we've seen a factor
of 1 albeit that was a really crazy shader you'd certainly never
see in a real app), beause LICM is quite expensive and there's cases
where running simplifycfg (along with sroa and early-cse) before licm
reduces IR complexity significantly. (I'm not entirely sure if it would
make sense to also run it afterwards.)

Reviewed-by: Jose Fonseca 

---

 src/gallium/auxiliary/gallivm/lp_bld_init.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c 
b/src/gallium/auxiliary/gallivm/lp_bld_init.c
index dae9d01552..ca36b467ca 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -137,13 +137,14 @@ create_pass_manager(struct gallivm_state *gallivm)
}
 
if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) {
-  /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
-   * but there are more on SVN.
-   * TODO: Add more passes.
+  /*
+   * TODO: Evaluate passes some more - keeping in mind
+   * both quality of generated code and compile times.
*/
   LLVMAddScalarReplAggregatesPass(gallivm->passmgr);
-  LLVMAddLICMPass(gallivm->passmgr);
+  LLVMAddEarlyCSEPass(gallivm->passmgr);
   LLVMAddCFGSimplificationPass(gallivm->passmgr);
+  LLVMAddLICMPass(gallivm->passmgr);
   LLVMAddReassociatePass(gallivm->passmgr);
   LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr);
   LLVMAddConstantPropagationPass(gallivm->passmgr);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): gallivm: dump bitcode before optimization

2018-04-23 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: e8e1d287a3c3030f1a94dcf67381dfd7ae3eba14
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=e8e1d287a3c3030f1a94dcf67381dfd7ae3eba14

Author: Roland Scheidegger 
Date:   Mon Apr 23 06:22:45 2018 +0200

gallivm: dump bitcode before optimization

If we dump the bitcode for off-line debug purposes, we really want the
pre-optimized bitcode, otherwise it's useless in identifying problems
with IR optimization (if you have a shader which takes an hour to do
IR optimization, it's also nice you don't have to wait that hour...).
Also, print out the function passes for opt which correspond to what
was used for jit compilation (and also the opt level for codegen).
Using opt/llc this way should then pretty much mimic what was done
for jit. (When specifying something like -time-passes
-debug-pass=[Structure|Arguments] (for either opt or llc) that also
gives very useful information in which passes all the time was spent,
and which passes are really run along with the order - llvm will add
passes due to dependencies on its own, and of course -O2 for llc
comes with a ~100 pass list.)

Reviewed-by: Jose Fonseca 

---

 src/gallium/auxiliary/gallivm/lp_bld_init.c | 33 +
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c 
b/src/gallium/auxiliary/gallivm/lp_bld_init.c
index 800b2616c0..1f0a01cde6 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -141,6 +141,10 @@ create_pass_manager(struct gallivm_state *gallivm)
* TODO: Evaluate passes some more - keeping in mind
* both quality of generated code and compile times.
*/
+  /*
+   * NOTE: if you change this, don't forget to change the output
+   * with GALLIVM_DEBUG_DUMP_BC in gallivm_compile_module.
+   */
   LLVMAddScalarReplAggregatesPass(gallivm->passmgr);
   LLVMAddEarlyCSEPass(gallivm->passmgr);
   LLVMAddCFGSimplificationPass(gallivm->passmgr);
@@ -577,6 +581,22 @@ gallivm_compile_module(struct gallivm_state *gallivm)
   gallivm->builder = NULL;
}
 
+   /* Dump bitcode to a file */
+   if (gallivm_debug & GALLIVM_DEBUG_DUMP_BC) {
+  char filename[256];
+  assert(gallivm->module_name);
+  util_snprintf(filename, sizeof(filename), "ir_%s.bc", 
gallivm->module_name);
+  LLVMWriteBitcodeToFile(gallivm->module, filename);
+  debug_printf("%s written\n", filename);
+  debug_printf("Invoke as \"opt %s %s | llc -O%d %s%s\"\n",
+   gallivm_debug & GALLIVM_DEBUG_NO_OPT ? "-mem2reg" :
+   "-sroa -early-cse -simplifycfg -reassociate "
+   "-mem2reg -constprop -instcombine -gvn",
+   filename, gallivm_debug & GALLIVM_DEBUG_NO_OPT ? 0 : 2,
+   (HAVE_LLVM >= 0x0305) ? "[-mcpu=<-mcpu option>] " : "",
+   "[-mattr=<-mattr option(s)>]");
+   }
+
if (gallivm_debug & GALLIVM_DEBUG_PERF)
   time_begin = os_time_get();
 
@@ -610,19 +630,6 @@ gallivm_compile_module(struct gallivm_state *gallivm)
gallivm->module_name, time_msec);
}
 
-   /* Dump byte code to a file */
-   if (gallivm_debug & GALLIVM_DEBUG_DUMP_BC) {
-  char filename[256];
-  assert(gallivm->module_name);
-  util_snprintf(filename, sizeof(filename), "ir_%s.bc", 
gallivm->module_name);
-  LLVMWriteBitcodeToFile(gallivm->module, filename);
-  debug_printf("%s written\n", filename);
-  debug_printf("Invoke as \"llc %s%s -o - %s\"\n",
-   (HAVE_LLVM >= 0x0305) ? "[-mcpu=<-mcpu option>] " : "",
-   "[-mattr=<-mattr option(s)>]",
-   filename);
-   }
-
if (use_mcjit) {
   /* Setting the module's DataLayout to an empty string will cause the
* ExecutionEngine to copy to the DataLayout string from its target

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): scons: need to split CC or things might fail

2018-03-19 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: e10dc12f6f2f7513d96bbea87b93b8e338222188
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=e10dc12f6f2f7513d96bbea87b93b8e338222188

Author: Jose Fonseca 
Date:   Mon Mar 19 16:41:57 2018 +0100

scons: need to split CC or things might fail

We've seen this fail internally.

Reviewed-by: Roland Scheidegger 

---

 scons/gallium.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/scons/gallium.py b/scons/gallium.py
index ef3b2ee81a..75200b89c1 100755
--- a/scons/gallium.py
+++ b/scons/gallium.py
@@ -134,7 +134,9 @@ def check_cc(env, cc, expr, cpp_opt = '-E'):
 source.write('#if !(%s)\n#error\n#endif\n' % expr)
 source.close()
 
-pipe = SCons.Action._subproc(env, [env['CC'], cpp_opt, source.name],
+# sys.stderr.write('%r %s %s\n' % (env['CC'], cpp_opt, source.name));
+
+pipe = SCons.Action._subproc(env, env.Split(env['CC']) + [cpp_opt, 
source.name],
  stdin = 'devnull',
  stderr = 'devnull',
  stdout = 'devnull')

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): r600: fix abs for op3 sources

2018-03-13 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 274f8bf05ef526d65f01614313dda65bc7ec7a87
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=274f8bf05ef526d65f01614313dda65bc7ec7a87

Author: Roland Scheidegger 
Date:   Tue Mar 13 03:40:42 2018 +0100

r600: fix abs for op3 sources

If a src was referencing the same temp as the dst, the per-component
copy code didn't work.
e.g.
  cndge r0.xy, r0.xx, |r2|, r3
got expanded into
  mov  r12.x, |r2|
  cndge r0.x, r0.x, r12, r3
  mov  r12.y, |r2|
  cndge r0.y, r0.x, r12, r3
hence for the second cndge r0.x was mistakenly the previous cndge result.
Fix this by doing all the movs first, so there's no bogus alu.last in between.

Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=102905

Tested-by: 
Reviewed-by: Dave Airlie 

---

 src/gallium/drivers/r600/r600_shader.c | 110 +
 1 file changed, 56 insertions(+), 54 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 6b5c42f86d..bd511c76ac 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -7076,33 +7076,42 @@ static int tgsi_helper_copy(struct r600_shader_ctx 
*ctx, struct tgsi_full_instru
 }
 
 static int tgsi_make_src_for_op3(struct r600_shader_ctx *ctx,
- unsigned temp, int chan,
+ unsigned writemask,
  struct r600_bytecode_alu_src *bc_src,
  const struct r600_shader_src *shader_src)
 {
struct r600_bytecode_alu alu;
-   int r;
+   int i, r;
+   int lasti = tgsi_last_instruction(writemask);
+   int temp_reg = 0;
 
-   r600_bytecode_src(bc_src, shader_src, chan);
+   r600_bytecode_src(&bc_src[0], shader_src, 0);
+   r600_bytecode_src(&bc_src[1], shader_src, 1);
+   r600_bytecode_src(&bc_src[2], shader_src, 2);
+   r600_bytecode_src(&bc_src[3], shader_src, 3);
 
-   /* op3 operands don't support abs modifier */
if (bc_src->abs) {
-   assert(temp!=0);  /* we actually need the extra register, 
make sure it is allocated. */
-   memset(&alu, 0, sizeof(struct r600_bytecode_alu));
-   alu.op = ALU_OP1_MOV;
-   alu.dst.sel = temp;
-   alu.dst.chan = chan;
-   alu.dst.write = 1;
+   temp_reg = r600_get_temp(ctx);
 
-   alu.src[0] = *bc_src;
-   alu.last = true; // sufficient?
-   r = r600_bytecode_add_alu(ctx->bc, &alu);
-   if (r)
-   return r;
-
-   memset(bc_src, 0, sizeof(*bc_src));
-   bc_src->sel = temp;
-   bc_src->chan = chan;
+   for (i = 0; i < lasti + 1; i++) {
+   if (!(writemask & (1 << i)))
+   continue;
+   memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+   alu.op = ALU_OP1_MOV;
+   alu.dst.sel = temp_reg;
+   alu.dst.chan = i;
+   alu.dst.write = 1;
+   alu.src[0] = bc_src[i];
+   if (i == lasti) {
+   alu.last = 1;
+   }
+   r = r600_bytecode_add_alu(ctx->bc, &alu);
+   if (r)
+   return r;
+   memset(&bc_src[i], 0, sizeof(*bc_src));
+   bc_src[i].sel = temp_reg;
+   bc_src[i].chan = i;
+   }
}
return 0;
 }
@@ -7111,9 +7120,9 @@ static int tgsi_op3_dst(struct r600_shader_ctx *ctx, int 
dst)
 {
struct tgsi_full_instruction *inst = 
&ctx->parse.FullToken.FullInstruction;
struct r600_bytecode_alu alu;
+   struct r600_bytecode_alu_src srcs[4][4];
int i, j, r;
int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
-   int temp_regs[4];
unsigned op = ctx->inst_info->op;
 
if (op == ALU_OP3_MULADD_IEEE &&
@@ -7121,10 +7130,12 @@ static int tgsi_op3_dst(struct r600_shader_ctx *ctx, 
int dst)
op = ALU_OP3_MULADD;
 
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
-   temp_regs[j] = 0;
-   if (ctx->src[j].abs)
-   temp_regs[j] = r600_get_temp(ctx);
+   r = tgsi_make_src_for_op3(ctx, inst->Dst[0].Register.WriteMask,
+ srcs[j], &ctx->src[j]);
+   if (r)
+   return r;
}
+
for (i = 0; i < lasti + 1; i++) {
if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
continue;
@@ -7132,9 +7143,7 @@ static int tgsi_op3

Mesa (master): u_blit: (trivial) u_blit.h needs to include p_defines.h

2018-03-10 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 0f0a6fa21dc240fca99a3a110a6c0dfac6d2a376
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=0f0a6fa21dc240fca99a3a110a6c0dfac6d2a376

Author: Roland Scheidegger 
Date:   Sat Mar 10 02:48:42 2018 +0100

u_blit: (trivial) u_blit.h needs to include p_defines.h

(For the pipe_tex_filter enum)

Reviewed-by: Mathias Fröhlich 
Reviewed-by: Jose Fonseca 

---

 src/gallium/auxiliary/util/u_blit.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/auxiliary/util/u_blit.h 
b/src/gallium/auxiliary/util/u_blit.h
index 085ea63570..004ceaecd8 100644
--- a/src/gallium/auxiliary/util/u_blit.h
+++ b/src/gallium/auxiliary/util/u_blit.h
@@ -31,6 +31,7 @@
 
 
 #include "pipe/p_compiler.h"
+#include "pipe/p_defines.h"
 
 
 #ifdef __cplusplus

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): draw: fix alpha value for very short aa lines

2018-03-09 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: d62f0df3541ab9ee7a4999f0ecedc52f8d1ab8cc
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=d62f0df3541ab9ee7a4999f0ecedc52f8d1ab8cc

Author: Roland Scheidegger 
Date:   Fri Mar  9 05:27:25 2018 +0100

draw: fix alpha value for very short aa lines

The logic would not work correctly for line lengths smaller than 1.0,
even a degenerated line with length 0 would still produce a fragment
with anyhwere between alpha 0.0 and 0.5.

Reviewed-by: Brian Paul 

---

 src/gallium/auxiliary/draw/draw_pipe_aaline.c  | 25 -
 src/gallium/auxiliary/draw/draw_pipe_stipple.c |  1 -
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c 
b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index 14a4b2f4b0..66a943aff4 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -370,7 +370,30 @@ aaline_line(struct draw_stage *stage, struct prim_header 
*header)
float t_l, t_w;
uint i;
 
-   half_length = 0.5f * sqrtf(dx * dx + dy * dy) + 0.5f;
+   half_length = 0.5f * sqrtf(dx * dx + dy * dy);
+
+   if (half_length < 0.5f) {
+  /*
+   * The logic we use for "normal" sized segments is incorrect
+   * for very short segments (basically because we only have
+   * one value to interpolate, not a distance to each endpoint).
+   * Therefore, we calculate half_length differently, so that for
+   * original line length (near) 0, we get alpha 0 - otherwise
+   * max alpha would still be 0.5. This also prevents us from
+   * artifacts due to degenerated lines (the endpoints being
+   * identical, which would still receive anywhere from alpha
+   * 0-0.5 otherwise) (at least the pstipple stage may generate
+   * such lines due to float inaccuracies if line length is very
+   * close to a integer).
+   * Might not be fully accurate neither (because the "strength" of
+   * the line is going to be determined by how close to the pixel
+   * center those 1 or 2 fragments are) but it's probably the best
+   * we can do.
+   */
+  half_length = 2.0f * half_length;
+   } else {
+  half_length = half_length + 0.5f;
+   }
 
t_w = half_width;
t_l = 0.5f;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_stipple.c 
b/src/gallium/auxiliary/draw/draw_pipe_stipple.c
index 3a44e96add..d30572cc61 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_stipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c
@@ -150,7 +150,6 @@ stipple_line(struct draw_stage *stage, struct prim_header 
*header)
if (header->flags & DRAW_PIPE_RESET_STIPPLE)
   stipple->counter = 0;
 
-
/* XXX ToDo: instead of iterating pixel-by-pixel, use a look-up table.
 */
for (i = 0; i < length; i++) {

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): draw: fix line stippling with aa lines

2018-03-07 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 8ba3750d3d953a9e6a2a0564e2d3d5efc42316e1
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=8ba3750d3d953a9e6a2a0564e2d3d5efc42316e1

Author: Roland Scheidegger 
Date:   Tue Mar  6 21:33:16 2018 +0100

draw: fix line stippling with aa lines

In contrast to non-aa, where stippling is based on either dx or dy
(depending on if it's a x or y major line), stippling is based on
actual distance with smooth lines, so adjust for this.

(It looks like there's some minor artifacts with mesa demos
line-sample and stippling, it looks like the line endpoints
aren't quite right with aa + stippling - maybe due to the
integer math in the stipple stage, but I can't quite pinpoint it.)

Reviewed-by: Brian Paul 
Reviewed-by: Jose Fonseca 

---

 src/gallium/auxiliary/draw/draw_pipe_stipple.c | 17 +
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_pipe_stipple.c 
b/src/gallium/auxiliary/draw/draw_pipe_stipple.c
index 3a84d6c3ea..3a44e96add 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_stipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c
@@ -50,6 +50,7 @@ struct stipple_stage {
float counter;
uint pattern;
uint factor;
+   bool smooth;
 };
 
 
@@ -133,12 +134,19 @@ stipple_line(struct draw_stage *stage, struct prim_header 
*header)
float y0 = pos0[1];
float y1 = pos1[1];
 
-   float dx = x0 > x1 ? x0 - x1 : x1 - x0;
-   float dy = y0 > y1 ? y0 - y1 : y1 - y0;
-
-   float length = MAX2(dx, dy);
+   float length;
int i;
 
+   if (stipple->smooth) {
+  float dx = x1 - x0;
+  float dy = y1 - y0;
+  length = sqrtf(dx*dx + dy*dy);
+   } else {
+  float dx = x0 > x1 ? x0 - x1 : x1 - x0;
+  float dy = y0 > y1 ? y0 - y1 : y1 - y0;
+  length = MAX2(dx, dy);
+   }
+
if (header->flags & DRAW_PIPE_RESET_STIPPLE)
   stipple->counter = 0;
 
@@ -205,6 +213,7 @@ stipple_first_line(struct draw_stage *stage,
 
stipple->pattern = draw->rasterizer->line_stipple_pattern;
stipple->factor = draw->rasterizer->line_stipple_factor + 1;
+   stipple->smooth = draw->rasterizer->line_smooth;
 
stage->line = stipple_line;
stage->line(stage, header);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): draw: simplify (and correct) aaline fallback (v2)

2018-03-07 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: dbb2cf388b79538ed572ecb3b8b3b5cb1f8fab0e
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=dbb2cf388b79538ed572ecb3b8b3b5cb1f8fab0e

Author: Roland Scheidegger 
Date:   Tue Mar  6 19:16:45 2018 +0100

draw: simplify (and correct) aaline fallback (v2)

The motivation actually was to get rid of the additional tex
instruction, since that requires the draw fallback code to intercept
all sampler / view calls (even if the fallback is never hit).
Basically, the idea is to use coverage of the pixel to calculate
the alpha value, and coverage is simply based on the distance
to the center of the line (in both line direction, which is useful
for wide lines, as well as perpendicular to the line).
This is much closer to what hw supporting this natively actually does.
It also fixes an issue with line width not quite being correct, as
well as endpoints getting stretched too far (in line direction) with
wide lines, which is apparent with mesa demo line-sample.
(For llvmpipe, it would probably make sense to do something like this
directly when drawing lines, since rendering two tris is twice as
expensive as a line, but it would need some changes with state
management.)
Since we're no longer relying on mipmapping to get the alpha value,
we also don't need to draw 3 rects (6 tris), one is sufficient.

There's still issues (as before):
- quite sure it's not correct without half_pixel_center, but can't test
this with GL.
- aaline + line stipple is incorrect (evident with line-sample demo).
Looking at the spec the stipple pattern should actually be based on
distance (not just dx or dy for x/y major lines as without aa).
- outputs (other than pos + the one used for line aa) should be
reinterpolated since we actually increase line length by half a pixel
(but there's no tests which would care).

v2: simplify the math (should be equivalent), don't need immediate
v3: use float versions of atan2,cos,sin, minor cleanups

Reviewed-by: Brian Paul 
Reviewed-by: Jose Fonseca 

---

 src/gallium/auxiliary/draw/draw_pipe_aaline.c | 514 ++
 1 file changed, 105 insertions(+), 409 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c 
b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index a859dbc02b..14a4b2f4b0 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -1,6 +1,6 @@
 /**
  *
- * Copyright 2007 VMware, Inc.
+ * Copyright 2007-2018 VMware, Inc.
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -26,7 +26,7 @@
  **/
 
 /**
- * AA line stage:  AA lines are converted to texture mapped triangles.
+ * AA line stage:  AA lines are converted triangles (with extra generic)
  *
  * Authors:  Brian Paul
  */
@@ -40,7 +40,6 @@
 #include "util/u_format.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
-#include "util/u_sampler.h"
 
 #include "tgsi/tgsi_transform.h"
 #include "tgsi/tgsi_dump.h"
@@ -55,19 +54,6 @@
 
 
 /**
- * Size for the alpha texture used for antialiasing
- */
-#define TEXTURE_SIZE_LOG2  5   /* 32 x 32 */
-
-/**
- * Max texture level for the alpha texture used for antialiasing
- *
- * Don't use the 1x1 and 2x2 mipmap levels.
- */
-#define MAX_TEXTURE_LEVEL  (TEXTURE_SIZE_LOG2 - 2)
-
-
-/**
  * Subclass of pipe_shader_state to carry extra fragment shader info.
  */
 struct aaline_fragment_shader
@@ -75,8 +61,7 @@ struct aaline_fragment_shader
struct pipe_shader_state state;
void *driver_fs;
void *aaline_fs;
-   uint sampler_unit;
-   int generic_attrib;  /**< texcoord/generic used for texture */
+   int generic_attrib;  /**< generic used for distance */
 };
 
 
@@ -89,26 +74,16 @@ struct aaline_stage
 
float half_line_width;
 
-   /** For AA lines, this is the vertex attrib slot for the new texcoords */
-   uint tex_slot;
+   /** For AA lines, this is the vertex attrib slot for new generic */
+   uint coord_slot;
/** position, not necessarily output zero */
uint pos_slot;
 
-   void *sampler_cso;
-   struct pipe_resource *texture;
-   struct pipe_sampler_view *sampler_view;
-   uint num_samplers;
-   uint num_sampler_views;
-
 
/*
 * Currently bound state
 */
struct aaline_fragment_shader *fs;
-   struct {
-  void *sampler[PIPE_MAX_SAMPLERS];
-  struct pipe_sampler_view *sampler_views[PIPE_MAX_SHADER_SAMPLER_VIEWS];
-   } state;
 
/*
 * Driver interface/override functions
@@ -117,15 +92,6 @@ struct aaline_stage
 const struct pipe_shader_state *);
void (*driver_bind_fs_state)(struct pipe_context *, void *);
void (*driver_delete_fs_state)(struct pipe_context *, void *);
-

Mesa (master): tgsi/scan: use wrap-around shift behavior explicitly for file_mask

2018-03-05 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 06e724c7b4ade29868531edb20900859f566a077
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=06e724c7b4ade29868531edb20900859f566a077

Author: Roland Scheidegger 
Date:   Fri Mar  2 03:00:41 2018 +0100

tgsi/scan: use wrap-around shift behavior explicitly for file_mask

The comment said it will only represent the lowest 32 regs. This was
not entirely true in practice, since at least on x86 you'll get
masked shifts (unless the compiler could recognize it already and toss
it out). It turns out this actually works out alright (presumably
noone uses it for temp regs) when increasing max sampler views, so
make that behavior explicit.
Albeit it feels a bit hacky (but in any case, explicit behavior there
is better than undefined behavior).

Reviewed-by: Jose Fonseca 
Reviewed-by: Brian Paul 

---

 src/gallium/auxiliary/tgsi/tgsi_scan.c | 7 +--
 src/gallium/drivers/llvmpipe/lp_state_fs.c | 7 ++-
 src/gallium/drivers/swr/swr_shader.cpp | 2 +-
 3 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c 
b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index c35eff25ba..4a2b354063 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -585,8 +585,11 @@ scan_declaration(struct tgsi_shader_info *info,
   int buffer;
   unsigned index, target, type;
 
-  /* only first 32 regs will appear in this bitfield */
-  info->file_mask[file] |= (1 << reg);
+  /*
+   * only first 32 regs will appear in this bitfield, if larger
+   * bits will wrap around.
+   */
+  info->file_mask[file] |= (1u << (reg & 31));
   info->file_count[file]++;
   info->file_max[file] = MAX2(info->file_max[file], (int)reg);
 
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c 
b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 603fd84f6b..66645b07ac 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -3323,7 +3323,12 @@ make_variant_key(struct llvmpipe_context *lp,
if (shader->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
   key->nr_sampler_views = 
shader->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
   for(i = 0; i < key->nr_sampler_views; ++i) {
- if(shader->info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1 << i)) {
+ /*
+  * Note sview may exceed what's representable by file_mask.
+  * This will still work, the only downside is that not actually
+  * used views may be included in the shader key.
+  */
+ if(shader->info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1u << (i & 
31))) {
 lp_sampler_static_texture_state(&key->state[i].texture_state,
 
lp->sampler_views[PIPE_SHADER_FRAGMENT][i]);
  }
diff --git a/src/gallium/drivers/swr/swr_shader.cpp 
b/src/gallium/drivers/swr/swr_shader.cpp
index e5fb679f8b..477fa7f2db 100644
--- a/src/gallium/drivers/swr/swr_shader.cpp
+++ b/src/gallium/drivers/swr/swr_shader.cpp
@@ -98,7 +98,7 @@ swr_generate_sampler_key(const struct lp_tgsi_info &info,
   key.nr_sampler_views =
  info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
   for (unsigned i = 0; i < key.nr_sampler_views; i++) {
- if (info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1 << i)) {
+ if (info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1u << (i & 31))) {
 const struct pipe_sampler_view *view =
ctx->sampler_views[shader_type][i];
 lp_sampler_static_texture_state(

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): gallium: increase PIPE_MAX_SHADER_SAMPLER_VIEWS to 128

2018-03-05 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: cf4a92fda29ca2ab76179287bdd76f4a6183dd0e
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=cf4a92fda29ca2ab76179287bdd76f4a6183dd0e

Author: Roland Scheidegger 
Date:   Tue Feb 27 03:38:17 2018 +0100

gallium: increase PIPE_MAX_SHADER_SAMPLER_VIEWS to 128

Some state trackers require 128.
(There are no plans to increase PIPE_MAX_SAMPLERS too, since with gl
state tracker it's unlikely more than 32 will be needed, if you need
more use bindless.)

---

 src/gallium/include/pipe/p_state.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/include/pipe/p_state.h 
b/src/gallium/include/pipe/p_state.h
index 640e6ed26d..4dce399f84 100644
--- a/src/gallium/include/pipe/p_state.h
+++ b/src/gallium/include/pipe/p_state.h
@@ -64,7 +64,7 @@ extern "C" {
 #define PIPE_MAX_SAMPLERS 32
 #define PIPE_MAX_SHADER_INPUTS80 /* 32 GENERIC + 32 PATCH + 16 others */
 #define PIPE_MAX_SHADER_OUTPUTS   80 /* 32 GENERIC + 32 PATCH + 16 others */
-#define PIPE_MAX_SHADER_SAMPLER_VIEWS 32
+#define PIPE_MAX_SHADER_SAMPLER_VIEWS 128
 #define PIPE_MAX_SHADER_BUFFERS   32
 #define PIPE_MAX_SHADER_IMAGES32
 #define PIPE_MAX_TEXTURE_LEVELS   16

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): r600: fix color export mask

2018-03-05 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 434523cf2a6738b0250de2a0e36e93b13f88832a
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=434523cf2a6738b0250de2a0e36e93b13f88832a

Author: Roland Scheidegger 
Date:   Mon Mar  5 20:12:32 2018 +0100

r600: fix color export mask

The r600 code (not the eg one) forgot to copy the ps_color_export_mask
in commit 5b14e06d8b42e2b08ebc52b6c314ef8647d87a1f when updating the
pixel state, leading to misrenderings (probably with MRT).

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105262

Tested-by: LoneVVolf 
Tested-by: Pavel Vinogradov 

---

 src/gallium/drivers/r600/r600_state.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/r600/r600_state.c 
b/src/gallium/drivers/r600/r600_state.c
index 7f6da1a3ed..923817119f 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -2525,6 +2525,7 @@ void r600_update_ps_state(struct pipe_context *ctx, 
struct r600_pipe_shader *sha
}
 
shader->nr_ps_color_outputs = num_cout;
+   shader->ps_color_export_mask = rshader->ps_color_export_mask;
 
spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) |
S_0286CC_PERSP_GRADIENT_ENA(1)|

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): draw: don't needlessly iterate through all sampler view slots

2018-02-28 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 89ae5def8cea9311727ac80d7274f80650279373
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=89ae5def8cea9311727ac80d7274f80650279373

Author: Roland Scheidegger 
Date:   Sun Feb 25 04:26:37 2018 +0100

draw: don't needlessly iterate through all sampler view slots

We already stored the highest (potentially) used number.

Reviewed-by: Jose Fonseca 
Reviewed-by: Brian Paul 

---

 src/gallium/auxiliary/draw/draw_context.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/draw/draw_context.c 
b/src/gallium/auxiliary/draw/draw_context.c
index 9791ec5506..e887272e15 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -973,7 +973,7 @@ draw_set_sampler_views(struct draw_context *draw,
 
for (i = 0; i < num; ++i)
   draw->sampler_views[shader_stage][i] = views[i];
-   for (i = num; i < PIPE_MAX_SHADER_SAMPLER_VIEWS; ++i)
+   for (i = num; i < draw->num_sampler_views[shader_stage]; ++i)
   draw->sampler_views[shader_stage][i] = NULL;
 
draw->num_sampler_views[shader_stage] = num;

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): cso: don't cycle through PIPE_MAX_SHADER_SAMPLER_VIEWS on context destroy

2018-02-28 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: b923f21eaadb77ee70e1bf4c5e2f9aee2a5fa205
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=b923f21eaadb77ee70e1bf4c5e2f9aee2a5fa205

Author: Roland Scheidegger 
Date:   Wed Feb 28 03:01:23 2018 +0100

cso: don't cycle through PIPE_MAX_SHADER_SAMPLER_VIEWS on context destroy

There's no point, we know the highest non-null one.

Reviewed-by: Brian Paul 
Reviewed-by: Jose Fonseca 

---

 src/gallium/auxiliary/cso_cache/cso_context.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c 
b/src/gallium/auxiliary/cso_cache/cso_context.c
index 1b5d4b5598..3fa57f16ff 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -407,8 +407,10 @@ void cso_destroy_context( struct cso_context *ctx )
  ctx->pipe->set_stream_output_targets(ctx->pipe, 0, NULL, NULL);
}
 
-   for (i = 0; i < PIPE_MAX_SHADER_SAMPLER_VIEWS; i++) {
+   for (i = 0; i < ctx->nr_fragment_views; i++) {
   pipe_sampler_view_reference(&ctx->fragment_views[i], NULL);
+   }
+   for (i = 0; i < ctx->nr_fragment_views_saved; i++) {
   pipe_sampler_view_reference(&ctx->fragment_views_saved[i], NULL);
}
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): softpipe: don't iterate through PIPE_MAX_SHADER_SAMPLER_VIEWS

2018-02-28 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 26103487b54a1c1121132cc040927619cce45262
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=26103487b54a1c1121132cc040927619cce45262

Author: Roland Scheidegger 
Date:   Wed Feb 28 04:28:29 2018 +0100

softpipe: don't iterate through PIPE_MAX_SHADER_SAMPLER_VIEWS

We were setting view to NULL if the iteration was larger than i.
But in fact if the view is NULL the code did nothing anyway...

Reviewed-by: Brian Paul 
Reviewed-by: Jose Fonseca 

---

 src/gallium/drivers/softpipe/sp_state_sampler.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c 
b/src/gallium/drivers/softpipe/sp_state_sampler.c
index c10fd918fd..751eb76e84 100644
--- a/src/gallium/drivers/softpipe/sp_state_sampler.c
+++ b/src/gallium/drivers/softpipe/sp_state_sampler.c
@@ -181,8 +181,8 @@ prepare_shader_sampling(
if (!num)
   return;
 
-   for (i = 0; i < PIPE_MAX_SHADER_SAMPLER_VIEWS; i++) {
-  struct pipe_sampler_view *view = i < num ? views[i] : NULL;
+   for (i = 0; i < num; i++) {
+  struct pipe_sampler_view *view = views[i];
 
   if (view) {
  struct pipe_resource *tex = view->texture;

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): tgsi: Recognize RET in main for tgsi_transform

2018-02-13 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: f6718baabc7d6fed0d41f72fb22e57c0d67fbf1d
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=f6718baabc7d6fed0d41f72fb22e57c0d67fbf1d

Author: Roland Scheidegger 
Date:   Tue Feb 13 18:56:34 2018 +0100

tgsi: Recognize RET in main for tgsi_transform

Shaders coming from dx10 state trackers have a RET before the END.
And the epilog needs to be placed before the RET (otherwise it will
get ignored).
Hence figure out if a RET is in main, in this case we'll place
the epilog there rather than before the END.
(At a closer look, there actually seem to be problems with control
flow in general with output redirection, that would need another
look. It's enough however to fix draw's aa line emulation in some
internal bug - lines tend to be drawn with trivial shaders, moving
either a constant color or a vertex color directly to the output).

v2: add assert so buggy handling of RET in main is detected

Reviewed-by: Brian Paul 
Reviewed-by: Jose Fonseca 

---

 src/gallium/auxiliary/tgsi/tgsi_transform.c | 62 +
 1 file changed, 55 insertions(+), 7 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.c 
b/src/gallium/auxiliary/tgsi/tgsi_transform.c
index ffdad1338c..a13cf90a27 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_transform.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_transform.c
@@ -110,6 +110,9 @@ tgsi_transform_shader(const struct tgsi_token *tokens_in,
 {
uint procType;
boolean first_instruction = TRUE;
+   boolean epilog_emitted = FALSE;
+   int cond_stack = 0;
+   int call_stack = 0;
 
/* input shader */
struct tgsi_parse_context parse;
@@ -166,22 +169,66 @@ tgsi_transform_shader(const struct tgsi_token *tokens_in,
  {
 struct tgsi_full_instruction *fullinst
= &parse.FullToken.FullInstruction;
+unsigned opcode = fullinst->Instruction.Opcode;
 
 if (first_instruction && ctx->prolog) {
ctx->prolog(ctx);
 }
 
-/* XXX Note: we may also want to look for a main/top-level
- * TGSI_OPCODE_RET instruction in the future.
+/*
+ * XXX Note: we handle the case of ret in main.
+ * However, the output redirections done by transform
+ * have their limits with control flow and will generally
+ * not work correctly. e.g.
+ * if (cond) {
+ *oColor = x;
+ *ret;
+ * }
+ * oColor = y;
+ * end;
+ * If the color output is redirected to a temp and modified
+ * by a transform, this will not work (the oColor assignment
+ * in the conditional will never make it to the actual output).
  */
-if (fullinst->Instruction.Opcode == TGSI_OPCODE_END
-&& ctx->epilog) {
-   /* Emit caller's epilog */
-   ctx->epilog(ctx);
-   /* Emit END */
+if ((opcode == TGSI_OPCODE_END || opcode == TGSI_OPCODE_RET) &&
+ call_stack == 0 && ctx->epilog && !epilog_emitted) {
+   if (opcode == TGSI_OPCODE_RET && cond_stack != 0) {
+  assert(!"transform ignoring RET in main");
+   } else {
+  assert(cond_stack == 0);
+  /* Emit caller's epilog */
+  ctx->epilog(ctx);
+  epilog_emitted = TRUE;
+   }
+   /* Emit END (or RET) */
ctx->emit_instruction(ctx, fullinst);
 }
 else {
+   switch (opcode) {
+   case TGSI_OPCODE_IF:
+   case TGSI_OPCODE_UIF:
+   case TGSI_OPCODE_SWITCH:
+   case TGSI_OPCODE_BGNLOOP:
+  cond_stack++;
+  break;
+   case TGSI_OPCODE_CAL:
+  call_stack++;
+  break;
+   case TGSI_OPCODE_ENDIF:
+   case TGSI_OPCODE_ENDSWITCH:
+   case TGSI_OPCODE_ENDLOOP:
+  assert(cond_stack > 0);
+  cond_stack--;
+  break;
+   case TGSI_OPCODE_ENDSUB:
+  assert(call_stack > 0);
+  call_stack--;
+  break;
+   case TGSI_OPCODE_BGNSUB:
+   case TGSI_OPCODE_RET:
+   default:
+  break;
+   }
if (ctx->transform_instruction)
   ctx->transform_instruction(ctx, fullinst);
else
@@ -231,6 +278,7 @@ tgsi_transform_shader(const struct tgsi_token *tokens_in,
  assert( 0 );
   }
}
+   assert(call_stack == 0);
 
tgsi_parse_free (&parse);
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): r600: partly fix sampleMaskIn value

2018-02-07 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: b936f4d1ca0d2ab1e828ff6a6e617f12469687fa
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=b936f4d1ca0d2ab1e828ff6a6e617f12469687fa

Author: Roland Scheidegger 
Date:   Sun Feb  4 23:54:26 2018 +0100

r600: partly fix sampleMaskIn value

The hw gives us coverage for pixel, not for individual fragment shader
invocations, in case execution isn't per pixel (eg, unlike cm, actually
cannot do "real" minSampleShading, it's either per-pixel or per-fragment,
but it doesn't really make a difference here).
Also, with msaa disabled, the hw still gives us a mask corresponding to
the number of samples, where GL requires this to be 1.
Fix this up by masking the sampleMaskIn bits with the bit corresponding to
the sampleID, if we know this shader is always executed at per-sample
granularity. (In case of a per-sample frequency shader and msaa disabled,
the sampleID will always be 0, so this works just fine there.)
Fixing this for the minSampleShading case will need a shader key (radeonsi
uses the prolog part for) (for eg, could get away with a single bit, cm
would need more bits depending on sample/invocation ratio, or read the
bits from a uniform), unless we'd want to always use a sample mask uniform
(which is probably not a good idea, as it would make the ordinary common
msaa case slower for no good reason).
This fixes some parts of piglit arb_sample_shading-samplemask (with fixed
test), in particular those which use a sampleID, still failing others
as expected.

Reviewed-by: Dave Airlie 

---

 src/gallium/drivers/r600/r600_shader.c | 54 ++
 1 file changed, 54 insertions(+)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 8b9bb47a2e..26f9ddb940 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -1138,6 +1138,11 @@ static int allocate_system_value_inputs(struct 
r600_shader_ctx *ctx, int gpr_off
 
tgsi_parse_free(&parse);
 
+   if (ctx->info.reads_samplemask &&
+   (ctx->info.uses_linear_sample || ctx->info.uses_linear_sample)) {
+   inputs[1].enabled = true;
+   }
+
if (ctx->bc->chip_class >= EVERGREEN) {
int num_baryc = 0;
/* assign gpr to each interpolator according to priority */
@@ -3503,8 +3508,57 @@ static int r600_shader_from_tgsi(struct r600_context 
*rctx,
r = eg_load_helper_invocation(&ctx);
if (r)
return r;
+   }
+
+   /*
+* XXX this relies on fixed_pt_position_gpr only being present when
+* this shader should be executed per sample. Should be the case for 
now...
+*/
+   if (ctx.fixed_pt_position_gpr != -1 && ctx.info.reads_samplemask) {
+   /*
+* Fix up sample mask. The hw always gives us coverage mask for
+* the pixel. However, for per-sample shading, we need the
+* coverage for the shader invocation only.
+* Also, with disabled msaa, only the first bit should be set
+* (luckily the same fixup works for both problems).
+* For now, we can only do it if we know this shader is always
+* executed per sample (due to usage of bits in the shader
+* forcing per-sample execution).
+* If the fb is not multisampled, we'd do unnecessary work but
+* it should still be correct.
+* It will however do nothing for sample shading according
+* to MinSampleShading.
+*/
+   struct r600_bytecode_alu alu;
+   int tmp = r600_get_temp(&ctx);
+   assert(ctx.face_gpr != -1);
+   memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+
+   alu.op = ALU_OP2_LSHL_INT;
+   alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
+   alu.src[0].value = 0x1;
+   alu.src[1].sel = ctx.fixed_pt_position_gpr;
+   alu.src[1].chan = 3;
+   alu.dst.sel = tmp;
+   alu.dst.chan = 0;
+   alu.dst.write = 1;
+   alu.last = 1;
+   if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
+   return r;
 
+   memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+   alu.op = ALU_OP2_AND_INT;
+   alu.src[0].sel = tmp;
+   alu.src[1].sel = ctx.face_gpr;
+   alu.src[1].chan = 2;
+   alu.dst.sel = ctx.face_gpr;
+   alu.dst.chan = 2;
+   alu.dst.write = 1;
+   alu.last = 1;
+   if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
+   return r;
}
+
if (ctx.fragcoord_input >= 0)

Mesa (master): mesa: (trivial) remove unused ignore_sample_qualifier_parameter

2018-02-07 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 6fd3c395907731baadcf70978cf392c673ebc96f
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=6fd3c395907731baadcf70978cf392c673ebc96f

Author: Roland Scheidegger 
Date:   Sun Feb  4 00:32:05 2018 +0100

mesa: (trivial) remove unused ignore_sample_qualifier_parameter

This parameter for _mesa_get_min_incations_per_fragment() was once used
by the intel driver, but it's long gone.

Reviewed-by: Brian Paul 
Reviewed-by: Dave Airlie 

---

 src/mesa/program/program.c| 11 ---
 src/mesa/program/program.h|  3 +--
 src/mesa/state_tracker/st_atom_msaa.c |  2 +-
 3 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c
index 220efc3539..6aba3cb3f1 100644
--- a/src/mesa/program/program.c
+++ b/src/mesa/program/program.c
@@ -515,8 +515,7 @@ _mesa_find_free_register(const GLboolean used[],
  */
 GLint
 _mesa_get_min_invocations_per_fragment(struct gl_context *ctx,
-   const struct gl_program *prog,
-   bool ignore_sample_qualifier)
+   const struct gl_program *prog)
 {
/* From ARB_sample_shading specification:
 * "Using gl_SampleID in a fragment shader causes the entire shader
@@ -534,11 +533,9 @@ _mesa_get_min_invocations_per_fragment(struct gl_context 
*ctx,
* "Use of the "sample" qualifier on a fragment shader input
*  forces per-sample shading"
*/
-  if (prog->info.fs.uses_sample_qualifier && !ignore_sample_qualifier)
- return MAX2(_mesa_geometric_samples(ctx->DrawBuffer), 1);
-
-  if (prog->info.system_values_read & (SYSTEM_BIT_SAMPLE_ID |
-   SYSTEM_BIT_SAMPLE_POS))
+  if (prog->info.fs.uses_sample_qualifier ||
+  (prog->info.system_values_read & (SYSTEM_BIT_SAMPLE_ID |
+SYSTEM_BIT_SAMPLE_POS)))
  return MAX2(_mesa_geometric_samples(ctx->DrawBuffer), 1);
   else if (ctx->Multisample.SampleShading)
  return MAX2(ceil(ctx->Multisample.MinSampleShadingValue *
diff --git a/src/mesa/program/program.h b/src/mesa/program/program.h
index 376da7b2d4..659385f55b 100644
--- a/src/mesa/program/program.h
+++ b/src/mesa/program/program.h
@@ -108,8 +108,7 @@ _mesa_find_free_register(const GLboolean used[],
 
 extern GLint
 _mesa_get_min_invocations_per_fragment(struct gl_context *ctx,
-   const struct gl_program *prog,
-   bool ignore_sample_qualifier);
+   const struct gl_program *prog);
 
 static inline GLuint
 _mesa_program_enum_to_shader_stage(GLenum v)
diff --git a/src/mesa/state_tracker/st_atom_msaa.c 
b/src/mesa/state_tracker/st_atom_msaa.c
index 589e328ac5..556c7c5889 100644
--- a/src/mesa/state_tracker/st_atom_msaa.c
+++ b/src/mesa/state_tracker/st_atom_msaa.c
@@ -77,5 +77,5 @@ st_update_sample_shading(struct st_context *st)
   return;
 
cso_set_min_samples(st->cso_context,
- _mesa_get_min_invocations_per_fragment(st->ctx, &st->fp->Base, 
false));
+ _mesa_get_min_invocations_per_fragment(st->ctx, &st->fp->Base));
 }

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): r600: clean up fragment shader input scan code

2018-02-07 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 07d724326aba7945117e5ee3711df7f73dfb
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=07d724326aba7945117e5ee3711df7f73dfb

Author: Roland Scheidegger 
Date:   Sun Feb  4 23:38:28 2018 +0100

r600: clean up fragment shader input scan code

For some reason, we were iterating through the code twice (first just for
instructions needing barycentrics, then for instructions and input dcls).
Move things around slightly so this is no longer necessary.
There also was a unnedeed enabling of the fixed_pt_position_gpr - this is only
needed if the per-sample interpolation comes from an input, not from an
instruction (just move the assert where it belongs) (since the sample id to
sample from comes from a tgsi src in this case, and isn't sampleID).
Otherwise there should be no functional change.

Reviewed-by: Dave Airlie 

---

 src/gallium/drivers/r600/r600_shader.c | 75 +++---
 1 file changed, 23 insertions(+), 52 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index c3bcb9b77d..8b9bb47a2e 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -,7 +,6 @@ static int allocate_system_value_inputs(struct 
r600_shader_ctx *ctx, int gpr_off
 
if (inst->Instruction.Opcode == 
TGSI_OPCODE_INTERP_SAMPLE) {
location = TGSI_INTERPOLATE_LOC_CENTER;
-   inputs[1].enabled = true; /* needs 
SAMPLEID */
} else if (inst->Instruction.Opcode == 
TGSI_OPCODE_INTERP_OFFSET) {
location = TGSI_INTERPOLATE_LOC_CENTER;
/* Needs sample positions, currently 
those are always available */
@@ -1139,6 +1138,19 @@ static int allocate_system_value_inputs(struct 
r600_shader_ctx *ctx, int gpr_off
 
tgsi_parse_free(&parse);
 
+   if (ctx->bc->chip_class >= EVERGREEN) {
+   int num_baryc = 0;
+   /* assign gpr to each interpolator according to priority */
+   for (i = 0; i < ARRAY_SIZE(ctx->eg_interpolators); i++) {
+   if (ctx->eg_interpolators[i].enabled) {
+   ctx->eg_interpolators[i].ij_index = num_baryc;
+   num_baryc++;
+   }
+   }
+   num_baryc = (num_baryc + 1) >> 1;
+   gpr_offset += num_baryc;
+   }
+
for (i = 0; i < ARRAY_SIZE(inputs); i++) {
boolean enabled = inputs[i].enabled;
int *reg = inputs[i].reg;
@@ -1165,18 +1177,21 @@ static int allocate_system_value_inputs(struct 
r600_shader_ctx *ctx, int gpr_off
  * for evergreen we need to scan the shader to find the number of GPRs we need 
to
  * reserve for interpolation and system values
  *
- * we need to know if we are going to emit
- * any sample or centroid inputs
+ * we need to know if we are going to emit any sample or centroid inputs
  * if perspective and linear are required
 */
 static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
 {
unsigned i;
-   int num_baryc;
-   struct tgsi_parse_context parse;
 
memset(&ctx->eg_interpolators, 0, sizeof(ctx->eg_interpolators));
 
+   /*
+* Could get this information from the shader info. But right now
+* we interpolate all declared inputs, whereas the shader info will
+* only contain the bits if the inputs are actually used, so it might
+* not be safe...
+*/
for (i = 0; i < ctx->info.num_inputs; i++) {
int k;
/* skip position/face/mask/sampleid */
@@ -1193,53 +1208,9 @@ static int evergreen_gpr_count(struct r600_shader_ctx 
*ctx)
ctx->eg_interpolators[k].enabled = TRUE;
}
 
-   if (tgsi_parse_init(&parse, ctx->tokens) != TGSI_PARSE_OK) {
-   return 0;
-   }
-
-   /* need to scan shader for system values and 
interpolateAtSample/Offset/Centroid */
-   while (!tgsi_parse_end_of_tokens(&parse)) {
-   tgsi_parse_token(&parse);
-
-   if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) {
-   const struct tgsi_full_instruction *inst = 
&parse.FullToken.FullInstruction;
-   if (inst->Instruction.Opcode == 
TGSI_OPCODE_INTERP_SAMPLE ||
-   inst->Instruction.Opcode == 
TGSI_OPCODE_INTERP_OFFSET ||
-   inst->Instruction.Opcode == 
TGSI_OPCODE_INTERP_CENTROID)
-   {
-   int interpolate, location, k;
-
-   if (inst->Instruction.Opcode == 
TGSI_OPCODE_

Mesa (master): r600/cm: (trivial) code cleanup for emitting msaa state

2018-02-07 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: becc7faae22cee26888e87d0c23c193b86603c5a
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=becc7faae22cee26888e87d0c23c193b86603c5a

Author: Roland Scheidegger 
Date:   Sat Feb  3 20:11:35 2018 +0100

r600/cm: (trivial) code cleanup for emitting msaa state

No functional change (compile tested only).

Reviewed-by: Dave Airlie 

---

 src/gallium/drivers/r600/cayman_msaa.c  | 14 ++
 src/gallium/drivers/r600/evergreen_state.c  | 10 ++
 src/gallium/drivers/r600/r600_pipe_common.h |  6 ++
 3 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/src/gallium/drivers/r600/cayman_msaa.c 
b/src/gallium/drivers/r600/cayman_msaa.c
index 6bc307a4bc..f97924ac22 100644
--- a/src/gallium/drivers/r600/cayman_msaa.c
+++ b/src/gallium/drivers/r600/cayman_msaa.c
@@ -141,7 +141,7 @@ void cayman_init_msaa(struct pipe_context *ctx)
cayman_get_sample_position(ctx, 16, i, 
rctx->sample_locations_16x[i]);
 }
 
-void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples)
+static void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int 
nr_samples)
 {
switch (nr_samples) {
default:
@@ -202,9 +202,8 @@ void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs 
*cs, int nr_samples)
}
 }
 
-void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
-int ps_iter_samples, int overrast_samples,
-unsigned sc_mode_cntl_1)
+void cayman_emit_msaa_state(struct radeon_winsys_cs *cs, int nr_samples,
+   int ps_iter_samples, int overrast_samples)
 {
int setup_samples = nr_samples > 1 ? nr_samples :
overrast_samples > 1 ? overrast_samples : 0;
@@ -216,6 +215,13 @@ void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, 
int nr_samples,
 *   endcaps.
 */
unsigned sc_line_cntl = S_028BDC_DX10_DIAMOND_TEST_ENA(1);
+   unsigned sc_mode_cntl_1 =
+   EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
+   EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1);
+
+   if (nr_samples > 1) {
+   cayman_emit_msaa_sample_locs(cs, nr_samples);
+   }
 
if (setup_samples > 1) {
/* indexed by log2(nr_samples) */
diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 742ca5babb..fcd742c5f9 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1956,14 +1956,8 @@ static void evergreen_emit_framebuffer_state(struct 
r600_context *rctx, struct r
if (rctx->b.chip_class == EVERGREEN) {
evergreen_emit_msaa_state(rctx, rctx->framebuffer.nr_samples, 
rctx->ps_iter_samples);
} else {
-   unsigned sc_mode_cntl_1 =
-   EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
-   EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1);
-
-   if (rctx->framebuffer.nr_samples > 1)
-   cayman_emit_msaa_sample_locs(cs, 
rctx->framebuffer.nr_samples);
-   cayman_emit_msaa_config(cs, rctx->framebuffer.nr_samples,
-   rctx->ps_iter_samples, 0, 
sc_mode_cntl_1);
+   cayman_emit_msaa_state(cs, rctx->framebuffer.nr_samples,
+  rctx->ps_iter_samples, 0);
}
 }
 
diff --git a/src/gallium/drivers/r600/r600_pipe_common.h 
b/src/gallium/drivers/r600/r600_pipe_common.h
index 86a20f8639..ee8eb54920 100644
--- a/src/gallium/drivers/r600/r600_pipe_common.h
+++ b/src/gallium/drivers/r600/r600_pipe_common.h
@@ -799,10 +799,8 @@ extern const unsigned eg_max_dist_4x;
 void cayman_get_sample_position(struct pipe_context *ctx, unsigned 
sample_count,
unsigned sample_index, float *out_value);
 void cayman_init_msaa(struct pipe_context *ctx);
-void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples);
-void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
-int ps_iter_samples, int overrast_samples,
-unsigned sc_mode_cntl_1);
+void cayman_emit_msaa_state(struct radeon_winsys_cs *cs, int nr_samples,
+   int ps_iter_samples, int overrast_samples);
 
 
 /* Inline helpers. */

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): Revert "gallium: build ddebug, noop, rbug, trace as part of auxiliary"

2018-02-07 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 09f49b9e50ee86e24c7e975f08535aa7237852a1
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=09f49b9e50ee86e24c7e975f08535aa7237852a1

Author: Roland Scheidegger 
Date:   Wed Feb  7 23:47:39 2018 +0100

Revert "gallium: build ddebug, noop, rbug, trace as part of auxiliary"

This reverts commit 6f82b8d8d0a986aac28e7bec47fc313fb950475c.

This broke scons build, and reportedly clover with autotools/meson too.

---

 src/gallium/Makefile.am| 12 +-
 src/gallium/auxiliary/Makefile.am  | 10 +
 .../auxiliary/target-helpers/inline_debug_helper.h | 26 ++
 src/gallium/drivers/ddebug/Makefile.sources| 14 ++--
 src/gallium/drivers/noop/Makefile.sources  |  8 +++
 src/gallium/drivers/rbug/Makefile.sources  | 18 +++
 src/gallium/drivers/trace/Makefile.sources | 26 +++---
 src/gallium/state_trackers/osmesa/Makefile.am  |  3 ++-
 src/gallium/targets/d3dadapter9/Makefile.am|  8 ++-
 src/gallium/targets/dri/Makefile.am| 10 -
 src/gallium/targets/libgl-xlib/Makefile.am |  6 -
 src/gallium/targets/osmesa/Makefile.am |  4 +++-
 src/gallium/targets/pipe-loader/Makefile.am|  6 -
 src/gallium/tests/unit/Makefile.am |  1 +
 14 files changed, 98 insertions(+), 54 deletions(-)

diff --git a/src/gallium/Makefile.am b/src/gallium/Makefile.am
index 81eabef106..af010c89f8 100644
--- a/src/gallium/Makefile.am
+++ b/src/gallium/Makefile.am
@@ -11,6 +11,12 @@ SUBDIRS += auxiliary/pipe-loader
 ## Gallium pipe drivers and their respective winsys'
 ##
 
+SUBDIRS += \
+   drivers/ddebug \
+   drivers/noop \
+   drivers/trace \
+   drivers/rbug
+
 ## freedreno/msm/kgsl
 if HAVE_GALLIUM_FREEDRENO
 SUBDIRS += drivers/freedreno winsys/freedreno/drm
@@ -182,12 +188,6 @@ endif
 
 EXTRA_DIST += \
include \
-   drivers/noop/SConscript \
-   drivers/rbug/README \
-   drivers/rbug/SConscript \
-   drivers/trace/trace.xsl \
-   drivers/trace/README \
-   drivers/trace/SConscript \
state_trackers/README \
state_trackers/wgl targets/libgl-gdi \
targets/graw-gdi targets/graw-null  targets/graw-xlib \
diff --git a/src/gallium/auxiliary/Makefile.am 
b/src/gallium/auxiliary/Makefile.am
index 7af3f3ce42..95a325f96b 100644
--- a/src/gallium/auxiliary/Makefile.am
+++ b/src/gallium/auxiliary/Makefile.am
@@ -1,8 +1,4 @@
 include Makefile.sources
-include $(top_srcdir)/src/gallium/drivers/ddebug/Makefile.sources
-include $(top_srcdir)/src/gallium/drivers/noop/Makefile.sources
-include $(top_srcdir)/src/gallium/drivers/rbug/Makefile.sources
-include $(top_srcdir)/src/gallium/drivers/trace/Makefile.sources
 include $(top_srcdir)/src/gallium/Automake.inc
 
 noinst_LTLIBRARIES = libgallium.la
@@ -23,11 +19,7 @@ AM_CXXFLAGS = \
 libgallium_la_SOURCES = \
$(C_SOURCES) \
$(NIR_SOURCES) \
-   $(GENERATED_SOURCES) \
-   $(DDEBUG_SOURCES) \
-   $(NOOP_SOURCES) \
-   $(RBUG_SOURCES) \
-   $(TRACE_SOURCES)
+   $(GENERATED_SOURCES)
 
 if HAVE_LIBDRM
 
diff --git a/src/gallium/auxiliary/target-helpers/inline_debug_helper.h 
b/src/gallium/auxiliary/target-helpers/inline_debug_helper.h
index 8556376940..2443bf2146 100644
--- a/src/gallium/auxiliary/target-helpers/inline_debug_helper.h
+++ b/src/gallium/auxiliary/target-helpers/inline_debug_helper.h
@@ -11,18 +11,44 @@
  * one or more debug driver: rbug, trace.
  */
 
+#ifdef GALLIUM_DDEBUG
 #include "ddebug/dd_public.h"
+#endif
+
+#ifdef GALLIUM_TRACE
 #include "trace/tr_public.h"
+#endif
+
+#ifdef GALLIUM_RBUG
 #include "rbug/rbug_public.h"
+#endif
+
+#ifdef GALLIUM_NOOP
 #include "noop/noop_public.h"
+#endif
 
+/*
+ * TODO: Audit the following *screen_create() - all of
+ * them should return the original screen on failuire.
+ */
 static inline struct pipe_screen *
 debug_screen_wrap(struct pipe_screen *screen)
 {
+#if defined(GALLIUM_DDEBUG)
screen = ddebug_screen_create(screen);
+#endif
+
+#if defined(GALLIUM_RBUG)
screen = rbug_screen_create(screen);
+#endif
+
+#if defined(GALLIUM_TRACE)
screen = trace_screen_create(screen);
+#endif
+
+#if defined(GALLIUM_NOOP)
screen = noop_screen_create(screen);
+#endif
 
if (debug_get_bool_option("GALLIUM_TESTS", FALSE))
   util_run_tests(screen);
diff --git a/src/gallium/drivers/ddebug/Makefile.sources 
b/src/gallium/drivers/ddebug/Makefile.sources
index d43a75ba40..1bd38274df 100644
--- a/src/gallium/drivers/ddebug/Makefile.sources
+++ b/src/gallium/drivers/ddebug/Makefile.sources
@@ -1,7 +1,7 @@
-DDEBUG_SOURCES := \
-   $(top_srcdir)/src/gallium/drivers/ddebug/dd_context.c \
-   $(top_srcdir)/src/gallium/drivers/ddebug/dd_draw.c \
-   $(top_srcdir)/src/gallium/drivers/ddebug/dd_pipe

Mesa (master): u_blit: (trivial) fix bogus argument order for set_fragment_shader

2018-02-07 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: def09f8db0ce77fc41f5188418e0b06356ce59b7
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=def09f8db0ce77fc41f5188418e0b06356ce59b7

Author: Roland Scheidegger 
Date:   Wed Feb  7 22:02:54 2018 +0100

u_blit: (trivial) fix bogus argument order for set_fragment_shader

Amazingly this still worked sometimes, albeit I'm not even sure why...
This fixes d7bec6f7a6a2a35c80be939db8532011af1e9b67.

---

 src/gallium/auxiliary/util/u_blit.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_blit.c 
b/src/gallium/auxiliary/util/u_blit.c
index 817eeac9f0..de39422e32 100644
--- a/src/gallium/auxiliary/util/u_blit.c
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -603,8 +603,8 @@ util_blit_pixels_tex(struct blit_state *ctx,
cso_set_sampler_views(ctx->cso, PIPE_SHADER_FRAGMENT, 1, &src_sampler_view);
 
/* shaders */
-   set_fragment_shader(ctx, src_xrbias,
-   src_sampler_view->format,
+   set_fragment_shader(ctx, src_sampler_view->format,
+   src_xrbias,
src_sampler_view->texture->target);
set_vertex_shader(ctx);
cso_set_tessctrl_shader_handle(ctx->cso, NULL);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): u_simple_shaders: fix mask handling in util_make_fragment_tex_shader_writemask

2018-02-07 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: afd1e9be1714eb95c3b797becb15bebaad9e6646
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=afd1e9be1714eb95c3b797becb15bebaad9e6646

Author: Roland Scheidegger 
Date:   Wed Feb  7 05:03:42 2018 +0100

u_simple_shaders: fix mask handling in util_make_fragment_tex_shader_writemask

The writemask handling was busted, since writing defaults to output
meant they got overwritten by the tex sampling anyway. Albeit the
affected components were undefined, so maybe with some luck it
still would have worked with some drivers - if not could as well
kill it... (This would have affected u_blitter but not u_blit since
the latter always used xyzw mask.)

Reviewed-by: Brian Paul 

---

 src/gallium/auxiliary/util/u_simple_shaders.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c 
b/src/gallium/auxiliary/util/u_simple_shaders.c
index 967954596b..a301c05762 100644
--- a/src/gallium/auxiliary/util/u_simple_shaders.c
+++ b/src/gallium/auxiliary/util/u_simple_shaders.c
@@ -275,7 +275,7 @@ util_make_fragment_tex_shader_writemask(struct pipe_context 
*pipe,
if (writemask != TGSI_WRITEMASK_XYZW) {
   struct ureg_src imm = ureg_imm4f( ureg, 0, 0, 0, 1 );
 
-  ureg_MOV( ureg, out, imm );
+  ureg_MOV(ureg, temp, imm);
}
 
if (tex_target == TGSI_TEXTURE_BUFFER)

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): u_blit,u_simple_shaders: add shader to convert from xrbias format

2018-02-07 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: d7bec6f7a6a2a35c80be939db8532011af1e9b67
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=d7bec6f7a6a2a35c80be939db8532011af1e9b67

Author: Roland Scheidegger 
Date:   Wed Feb  7 05:18:17 2018 +0100

u_blit,u_simple_shaders: add shader to convert from xrbias format

We need this to handle some oddball dx10 format
(DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM). What you can do with this
format is very limited, hence we don't want to add it as a gallium
format (we could not express the properties of this format as
ordinary format properties neither, so like all special formats
it would need specific code for handling it in any case).
While here, also nuke the array for different shaders for different
writemasks, as it was not actually used (always full masks are
passed in for generating shaders).

Reviewed-by: Brian Paul 
Reviewed-by: Jose Fonseca 

---

 src/gallium/auxiliary/util/u_blit.c   | 40 ++-
 src/gallium/auxiliary/util/u_blit.h   |  3 +-
 src/gallium/auxiliary/util/u_simple_shaders.c | 47 +++
 src/gallium/auxiliary/util/u_simple_shaders.h |  4 +++
 4 files changed, 78 insertions(+), 16 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_blit.c 
b/src/gallium/auxiliary/util/u_blit.c
index 3f92476f0c..817eeac9f0 100644
--- a/src/gallium/auxiliary/util/u_blit.c
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -65,7 +65,7 @@ struct blit_state
struct pipe_vertex_element velem[2];
 
void *vs;
-   void *fs[PIPE_MAX_TEXTURE_TYPES][TGSI_WRITEMASK_XYZW + 1][3];
+   void *fs[PIPE_MAX_TEXTURE_TYPES][4];
 
struct pipe_resource *vbuf;  /**< quad vertices */
unsigned vbuf_slot;
@@ -135,17 +135,15 @@ void
 util_destroy_blit(struct blit_state *ctx)
 {
struct pipe_context *pipe = ctx->pipe;
-   unsigned i, j, k;
+   unsigned i, j;
 
if (ctx->vs)
   pipe->delete_vs_state(pipe, ctx->vs);
 
for (i = 0; i < ARRAY_SIZE(ctx->fs); i++) {
   for (j = 0; j < ARRAY_SIZE(ctx->fs[i]); j++) {
- for (k = 0; k < ARRAY_SIZE(ctx->fs[i][j]); k++) {
-if (ctx->fs[i][j][k])
-   pipe->delete_fs_state(pipe, ctx->fs[i][j][k]);
- }
+ if (ctx->fs[i][j])
+pipe->delete_fs_state(pipe, ctx->fs[i][j]);
   }
}
 
@@ -159,8 +157,9 @@ util_destroy_blit(struct blit_state *ctx)
  * Helper function to set the fragment shaders.
  */
 static inline void
-set_fragment_shader(struct blit_state *ctx, uint writemask,
+set_fragment_shader(struct blit_state *ctx,
 enum pipe_format format,
+boolean src_xrbias,
 enum pipe_texture_target pipe_tex)
 {
enum tgsi_return_type stype;
@@ -177,19 +176,29 @@ set_fragment_shader(struct blit_state *ctx, uint 
writemask,
   idx = 2;
}
 
-   if (!ctx->fs[pipe_tex][writemask][idx]) {
+   if (src_xrbias) {
+  assert(stype == TGSI_RETURN_TYPE_FLOAT);
+  idx = 3;
+  if (!ctx->fs[pipe_tex][idx]) {
+ enum tgsi_texture_type tgsi_tex = util_pipe_tex_to_tgsi_tex(pipe_tex, 
0);
+ ctx->fs[pipe_tex][idx] =
+util_make_fragment_tex_shader_xrbias(ctx->pipe, tgsi_tex);
+  }
+   }
+
+   else if (!ctx->fs[pipe_tex][idx]) {
   unsigned tgsi_tex = util_pipe_tex_to_tgsi_tex(pipe_tex, 0);
 
   /* OpenGL does not allow blits from signed to unsigned integer
* or vice versa. */
-  ctx->fs[pipe_tex][writemask][idx] =
+  ctx->fs[pipe_tex][idx] =
  util_make_fragment_tex_shader_writemask(ctx->pipe, tgsi_tex,
  TGSI_INTERPOLATE_LINEAR,
- writemask,
+ TGSI_WRITEMASK_XYZW,
  stype, stype, false, false);
}
 
-   cso_set_fragment_shader_handle(ctx->cso, ctx->fs[pipe_tex][writemask][idx]);
+   cso_set_fragment_shader_handle(ctx->cso, ctx->fs[pipe_tex][idx]);
 }
 
 
@@ -491,8 +500,8 @@ util_blit_pixels(struct blit_state *ctx,
  * The sampler view's first_layer indicate the layer to use, but for
  * cube maps it must point to the first face.  Face is passed in src_face.
  *
- * The main advantage over util_blit_pixels is that it allows to specify 
swizzles in
- * pipe_sampler_view::swizzle_?.
+ * The main advantage over util_blit_pixels is that it allows to specify
+ * swizzles in pipe_sampler_view::swizzle_?.
  *
  * But there is no control over blitting Z and/or stencil.
  */
@@ -505,7 +514,8 @@ util_blit_pixels_tex(struct blit_state *ctx,
  struct pipe_surface *dst,
  int dstX0, int dstY0,
  int dstX1, int dstY1,
- float z, uint filter)
+ float z, uint filter,
+ boolean src_xrbias)
 {
boolean normaliz

Mesa (master): r600: don't do stack workarounds for hemlock

2018-02-01 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: c2f0e0885776f3f0a18b9db08149564d4b98e5b7
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=c2f0e0885776f3f0a18b9db08149564d4b98e5b7

Author: Roland Scheidegger 
Date:   Tue Jan 30 05:48:27 2018 +0100

r600: don't do stack workarounds for hemlock

By the looks of it it seems hemlock is treated separately to cypress, but
certainly it won't need the stack workarounds cedar/redwood (and
seemingly every other eg chip except cypress/juniper) need.
(Discovered by accident.)

Acked-by: Alex Deucher 

---

 src/gallium/drivers/r600/sb/sb_bc.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/r600/sb/sb_bc.h 
b/src/gallium/drivers/r600/sb/sb_bc.h
index b35671bf0f..a249395474 100644
--- a/src/gallium/drivers/r600/sb/sb_bc.h
+++ b/src/gallium/drivers/r600/sb/sb_bc.h
@@ -665,6 +665,7 @@ public:
return false;
 
switch (hw_chip) {
+   case HW_CHIP_HEMLOCK:
case HW_CHIP_CYPRESS:
case HW_CHIP_JUNIPER:
return false;

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): mesa: restrict formats being supported by target type for formatquery

2018-01-29 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 21fe02d1d369d25021d0be7f558063e103e2dce7
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=21fe02d1d369d25021d0be7f558063e103e2dce7

Author: Roland Scheidegger 
Date:   Sat Jan 27 01:25:26 2018 +0100

mesa: restrict formats being supported by target type for formatquery

The code just considered all formats as being supported if they were either
a valid fbo or texture format.
This was quite awkward since then the query would return "supported" for
e.g. GL_RGB9E5 or compressed formats and target RENDERBUFFER (albeit the driver
could still refuse it in theory). However, when then querying for instance the
internalformat sizes, it would just return 0 (due to the checks being more
strict there).
It was also a problem for texture buffer targets, which have a more restricted
list of formats which are allowed (and again, it would return supported but
then querying sizes would return 0).
So only take validation of formats into account which make sense for a given
target.
Can also toss out some special checks for rgb9e5 later, since we'd never get
there if it wasn't supported in the first place.

Reviewed-by: Alejandro Piñeiro 

---

 src/mesa/main/formatquery.c | 31 +--
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/src/mesa/main/formatquery.c b/src/mesa/main/formatquery.c
index 1846fbc688..303e7b2f8c 100644
--- a/src/mesa/main/formatquery.c
+++ b/src/mesa/main/formatquery.c
@@ -558,15 +558,29 @@ _is_internalformat_supported(struct gl_context *ctx, 
GLenum target,
 * implementation accepts it for any texture specification 
commands, and
 * - unsized or base internal format, if the implementation accepts
 * it for texture or image specification.
+*
+* But also:
+* "If the particualar  and  combination do not make
+* sense, or if a particular type of  is not supported by the
+* implementation the "unsupported" answer should be given. This is not an
+* error.
 */
GLint buffer[1];
 
-   /* At this point an internalformat is valid if it is valid as a texture or
-* as a renderbuffer format. The checks are different because those methods
-* return different values when passing non supported internalformats */
-   if (_mesa_base_tex_format(ctx, internalformat) < 0 &&
-   _mesa_base_fbo_format(ctx, internalformat) == 0)
-  return false;
+   if (target == GL_RENDERBUFFER) {
+  if (_mesa_base_fbo_format(ctx, internalformat) == 0) {
+ return false;
+  }
+   } else if (target == GL_TEXTURE_BUFFER) {
+  if (_mesa_validate_texbuffer_format(ctx, internalformat) ==
+  MESA_FORMAT_NONE) {
+ return false;
+  }
+   } else {
+  if (_mesa_base_tex_format(ctx, internalformat) < 0) {
+ return false;
+  }
+   }
 
/* Let the driver have the final word */
ctx->Driver.QueryInternalFormat(ctx, target, internalformat,
@@ -969,10 +983,7 @@ _mesa_GetInternalformativ(GLenum target, GLenum 
internalformat, GLenum pname,
* and glGetRenderbufferParameteriv functions.
*/
   if (pname == GL_INTERNALFORMAT_SHARED_SIZE) {
- if (_mesa_has_EXT_texture_shared_exponent(ctx) &&
- target != GL_TEXTURE_BUFFER &&
- target != GL_RENDERBUFFER &&
- texformat == MESA_FORMAT_R9G9B9E5_FLOAT) {
+ if (texformat == MESA_FORMAT_R9G9B9E5_FLOAT) {
 buffer[0] = 5;
  }
  goto end;

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): mesa: remove misleading gles checks for formatquery

2018-01-29 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 09dc4f9012b12c51972e32db653f507448b29490
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=09dc4f9012b12c51972e32db653f507448b29490

Author: Roland Scheidegger 
Date:   Sat Jan 27 01:12:52 2018 +0100

mesa: remove misleading gles checks for formatquery

Testing for gles there is just confusing - this is about target being
supported, if it was valid at all was already determined earlier
(in _legal_parameters). It didn't make sense at all in any case, since
it would only have said false there for gles for 2d but not 2d arrays etc.

Reviewed-by: Alejandro Piñeiro 

---

 src/mesa/main/formatquery.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/mesa/main/formatquery.c b/src/mesa/main/formatquery.c
index 61f798c88f..9174fb6b27 100644
--- a/src/mesa/main/formatquery.c
+++ b/src/mesa/main/formatquery.c
@@ -392,14 +392,12 @@ _is_target_supported(struct gl_context *ctx, GLenum 
target)
 * implementation the "unsupported" answer should be given.
 * This is not an error."
 *
-* For OpenGL ES, queries can only be used with GL_RENDERBUFFER or MS.
+* Note that legality of targets has already been verified.
 */
switch(target){
case GL_TEXTURE_1D:
case GL_TEXTURE_2D:
case GL_TEXTURE_3D:
-  if (!_mesa_is_desktop_gl(ctx))
- return false;
   break;
 
case GL_TEXTURE_1D_ARRAY:

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): mesa: skip validation of legality of size/type queries for format queries

2018-01-29 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 3c7aa242f5df6ba45acc7338bad088910e4b7330
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=3c7aa242f5df6ba45acc7338bad088910e4b7330

Author: Roland Scheidegger 
Date:   Sat Jan 27 01:39:35 2018 +0100

mesa: skip validation of legality of size/type queries for format queries

The size/type query is always legal (if we made it that far).
Removing this causes a difference for GL_TEXTURE_BUFFER - the reason is that
these parameters are valid only with GetTexLevelParameter() if gl 3.1 is
supported, but not if only ARB_texture_buffer_object is supported.
However, while the spec says that these queries return "the same information
as querying GetTexLevelParameter" I believe we're not expected to return just
zeros here. By definition, these pnames are always valid (unlike for the
GetTexLevelParameter() function which would return an error without GL 3.1).
The spec is a bit inconsistent there and open to interpretation - while
mentioning the "same information as querying GetTexLevelParameter" is
returned, it also mentions that 0 is returned for size/type if the
target/format is not supported - implying correct results to be returned
if it is supported, regardless that GetTexLevelParameter would return
an error. (Also, the bit about this returning the same as
GetTexLevelParameter also includes querying stencil type, which isn't
even possible with GetTexLevelParameter.)

This breaks some piglit arb_internalformat_query2 tests (which I believe to
be wrong).

Reviewed-by: Alejandro Piñeiro §

---

 src/mesa/main/formatquery.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/mesa/main/formatquery.c b/src/mesa/main/formatquery.c
index 303e7b2f8c..834f8e 100644
--- a/src/mesa/main/formatquery.c
+++ b/src/mesa/main/formatquery.c
@@ -960,9 +960,6 @@ _mesa_GetInternalformativ(GLenum target, GLenum 
internalformat, GLenum pname,
   mesa_format texformat;
 
   if (target != GL_RENDERBUFFER) {
- if (!_mesa_legal_get_tex_level_parameter_target(ctx, target, true))
-goto end;
-
  baseformat = _mesa_base_tex_format(ctx, internalformat);
   } else {
  baseformat = _mesa_base_fbo_format(ctx, internalformat);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): mesa: (trivial) add TODO comment for default results for internal queries

2018-01-29 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 272e7e1bd5c12a3ef36027f0071065b7ce04e9e9
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=272e7e1bd5c12a3ef36027f0071065b7ce04e9e9

Author: Roland Scheidegger 
Date:   Tue Jan 30 01:03:49 2018 +0100

mesa: (trivial) add TODO comment for default results for internal queries

---

 src/mesa/main/formatquery.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/mesa/main/formatquery.c b/src/mesa/main/formatquery.c
index 9174fb6b27..1846fbc688 100644
--- a/src/mesa/main/formatquery.c
+++ b/src/mesa/main/formatquery.c
@@ -700,6 +700,12 @@ _mesa_query_internal_format_default(struct gl_context 
*ctx, GLenum target,
case GL_FRAMEBUFFER_RENDERABLE_LAYERED:
case GL_FRAMEBUFFER_BLEND:
case GL_FILTER:
+  /*
+   * TODO seems a tad optimistic just saying yes to everything here.
+   * Even for combinations which make no sense...
+   * And things like TESS_CONTROL_TEXTURE should definitely default to
+   * NONE if the driver doesn't even support tessellation...
+   */
   params[0] = GL_FULL_SUPPORT;
   break;
case GL_NUM_TILING_TYPES_EXT:

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): gallivm: fix crash with seamless cube filtering with different min/mag filter

2018-01-25 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 4fe662c58f6453b3558de479e7c2bfe4158dc26c
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=4fe662c58f6453b3558de479e7c2bfe4158dc26c

Author: Roland Scheidegger 
Date:   Thu Jan 25 04:30:41 2018 +0100

gallivm: fix crash with seamless cube filtering with different min/mag filter

We are not allowed to modify the incoming coords values, or things may
crash (as we may be inside a llvm conditional and the values may be used
in another branch).
I recently broke this when fixing an issue with NaNs and seamless cube
map filtering, and it causes crashes when doing cubemap filtering
if the min and mag filters are different.
Add const to the pointers passed in to prevent this mishap in the future.

Fixes: a485ad0bcd ("gallivm: fix an issue with NaNs with seamless cube 
filtering")

Reviewed-by: Jose Fonseca 

---

 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 38 +--
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c 
b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index ff8cbf604c..8f760f59fe 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -857,7 +857,7 @@ lp_build_sample_image_nearest(struct 
lp_build_sample_context *bld,
   LLVMValueRef img_stride_vec,
   LLVMValueRef data_ptr,
   LLVMValueRef mipoffsets,
-  LLVMValueRef *coords,
+  const LLVMValueRef *coords,
   const LLVMValueRef *offsets,
   LLVMValueRef colors_out[4])
 {
@@ -1004,7 +1004,7 @@ lp_build_sample_image_linear(struct 
lp_build_sample_context *bld,
  LLVMValueRef img_stride_vec,
  LLVMValueRef data_ptr,
  LLVMValueRef mipoffsets,
- LLVMValueRef *coords,
+ const LLVMValueRef *coords,
  const LLVMValueRef *offsets,
  LLVMValueRef colors_out[4])
 {
@@ -1106,7 +1106,7 @@ lp_build_sample_image_linear(struct 
lp_build_sample_context *bld,
   struct lp_build_if_state edge_if;
   LLVMTypeRef int1t;
   LLVMValueRef new_faces[4], new_xcoords[4][2], new_ycoords[4][2];
-  LLVMValueRef coord, have_edge, have_corner;
+  LLVMValueRef coord0, coord1, have_edge, have_corner;
   LLVMValueRef fall_off_ym_notxm, fall_off_ym_notxp, fall_off_x, 
fall_off_y;
   LLVMValueRef fall_off_yp_notxm, fall_off_yp_notxp;
   LLVMValueRef x0, x1, y0, y1, y0_clamped, y1_clamped;
@@ -1130,20 +1130,20 @@ lp_build_sample_image_linear(struct 
lp_build_sample_context *bld,
* other values might be bogus in the end too).
* So kill off the NaNs here.
*/
-  coords[0] = lp_build_max_ext(coord_bld, coords[0], coord_bld->zero,
-   GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
-  coords[1] = lp_build_max_ext(coord_bld, coords[1], coord_bld->zero,
-   GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
-  coord = lp_build_mul(coord_bld, coords[0], flt_width_vec);
+  coord0 = lp_build_max_ext(coord_bld, coords[0], coord_bld->zero,
+GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
+  coord0 = lp_build_mul(coord_bld, coord0, flt_width_vec);
   /* instead of clamp, build mask if overflowed */
-  coord = lp_build_sub(coord_bld, coord, half);
+  coord0 = lp_build_sub(coord_bld, coord0, half);
   /* convert to int, compute lerp weight */
   /* not ideal with AVX (and no AVX2) */
-  lp_build_ifloor_fract(coord_bld, coord, &x0, &s_fpart);
+  lp_build_ifloor_fract(coord_bld, coord0, &x0, &s_fpart);
   x1 = lp_build_add(ivec_bld, x0, ivec_bld->one);
-  coord = lp_build_mul(coord_bld, coords[1], flt_height_vec);
-  coord = lp_build_sub(coord_bld, coord, half);
-  lp_build_ifloor_fract(coord_bld, coord, &y0, &t_fpart);
+  coord1 = lp_build_max_ext(coord_bld, coords[1], coord_bld->zero,
+GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
+  coord1 = lp_build_mul(coord_bld, coord1, flt_height_vec);
+  coord1 = lp_build_sub(coord_bld, coord1, half);
+  lp_build_ifloor_fract(coord_bld, coord1, &y0, &t_fpart);
   y1 = lp_build_add(ivec_bld, y0, ivec_bld->one);
 
   fall_off[0] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, x0, ivec_bld->zero);
@@ -1747,7 +1747,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context 
*bld,
unsigned img_filter,
unsigned mip_filter,
boolean is_gather,
-   LLVMValueRef *coords,
+   const L

Mesa (master): draw: fix vsplit code when the (post-bias) index value is -1

2018-01-16 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 1f462eaf394517dac98b0c41f09e995f2940fdb8
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=1f462eaf394517dac98b0c41f09e995f2940fdb8

Author: Roland Scheidegger 
Date:   Tue Jan 16 03:01:56 2018 +0100

draw: fix vsplit code when the (post-bias) index value is -1

vsplit_add_cache uses the post-bias index for hashing, but the
vsplit_add_cache_uint/ushort/ubyte ones used the pre-bias index, therefore
the code for handling the special case (because -1 matches the initialization
value of the cache) wasn't actually working.
Commit 78a997f72841310620d18daa9015633343d04db1 actually simplified the
cache logic somewhat, but it looks like this particular problem carried over
(and duplicated to the ushort/ubyte cases, since before only uint needed it).
This could lead to the vsplit cache doing the wrong thing, in particular
later fetch_info might indicate there are 0 values to fetch. This only really
affected edge cases which were bogus to begin with, but it could lead to a
crash with the jit vertex shader, since it cannot handle this case correctly
(the count loop is always executed at least once and we would not allocate
any memory for the shader outputs), so add another assert to catch it there.

Reviewed-by: Brian Paul 
Reviewed-by: Jose Fonseca 

---

 src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c | 1 +
 src/gallium/auxiliary/draw/draw_pt_vsplit.c| 6 +++---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c 
b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
index c6492a18cf..5e0c562256 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
@@ -368,6 +368,7 @@ llvm_pipeline_generic(struct draw_pt_middle_end *middle,
unsigned start_or_maxelt, vid_base;
const unsigned *elts;
 
+   assert(fetch_info->count > 0);
llvm_vert_info.count = fetch_info->count;
llvm_vert_info.vertex_size = fpme->vertex_size;
llvm_vert_info.stride = fpme->vertex_size;
diff --git a/src/gallium/auxiliary/draw/draw_pt_vsplit.c 
b/src/gallium/auxiliary/draw/draw_pt_vsplit.c
index a68d5bf971..3ff077b760 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vsplit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_vsplit.c
@@ -133,7 +133,7 @@ vsplit_add_cache_ubyte(struct vsplit_frontend *vsplit, 
const ubyte *elts,
VSPLIT_CREATE_IDX(elts, start, fetch, elt_bias);
/* unlike the uint case this can only happen with elt_bias */
if (elt_bias && elt_idx == DRAW_MAX_FETCH_IDX && 
!vsplit->cache.has_max_fetch) {
-  unsigned hash = fetch % MAP_SIZE;
+  unsigned hash = elt_idx % MAP_SIZE;
   vsplit->cache.fetches[hash] = 0;
   vsplit->cache.has_max_fetch = TRUE;
}
@@ -148,7 +148,7 @@ vsplit_add_cache_ushort(struct vsplit_frontend *vsplit, 
const ushort *elts,
VSPLIT_CREATE_IDX(elts, start, fetch, elt_bias);
/* unlike the uint case this can only happen with elt_bias */
if (elt_bias && elt_idx == DRAW_MAX_FETCH_IDX && 
!vsplit->cache.has_max_fetch) {
-  unsigned hash = fetch % MAP_SIZE;
+  unsigned hash = elt_idx % MAP_SIZE;
   vsplit->cache.fetches[hash] = 0;
   vsplit->cache.has_max_fetch = TRUE;
}
@@ -168,7 +168,7 @@ vsplit_add_cache_uint(struct vsplit_frontend *vsplit, const 
uint *elts,
VSPLIT_CREATE_IDX(elts, start, fetch, elt_bias);
/* Take care for DRAW_MAX_FETCH_IDX (since cache is initialized to -1). */
if (elt_idx == DRAW_MAX_FETCH_IDX && !vsplit->cache.has_max_fetch) {
-  unsigned hash = fetch % MAP_SIZE;
+  unsigned hash = elt_idx % MAP_SIZE;
   /* force update - any value will do except DRAW_MAX_FETCH_IDX */
   vsplit->cache.fetches[hash] = 0;
   vsplit->cache.has_max_fetch = TRUE;

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): draw: remove VSPLIT_CREATE_IDX macro

2018-01-16 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: b0413cfd8b84634db4a5bf57d550b21d0d2fa8f7
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=b0413cfd8b84634db4a5bf57d550b21d0d2fa8f7

Author: Roland Scheidegger 
Date:   Tue Jan 16 17:55:00 2018 +0100

draw: remove VSPLIT_CREATE_IDX macro

Just inline the little bit of code.

Reviewed-by: Jose Fonseca 
Reviewed-by: Brian Paul 

---

 src/gallium/auxiliary/draw/draw_pt_vsplit.c | 23 ---
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_pt_vsplit.c 
b/src/gallium/auxiliary/draw/draw_pt_vsplit.c
index 3ff077b760..653deab28c 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vsplit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_vsplit.c
@@ -116,21 +116,15 @@ vsplit_get_base_idx(unsigned start, unsigned fetch)
return draw_overflow_uadd(start, fetch, MAX_ELT_IDX);
 }
 
-/*
- * The final element index is just element index plus element bias.
- */
-#define VSPLIT_CREATE_IDX(elts, start, fetch, elt_bias)\
-   unsigned elt_idx;   \
-   elt_idx = vsplit_get_base_idx(start, fetch);\
-   elt_idx = (unsigned)((int)(DRAW_GET_IDX(elts, elt_idx)) + (int)elt_bias);
-
 
 static inline void
 vsplit_add_cache_ubyte(struct vsplit_frontend *vsplit, const ubyte *elts,
unsigned start, unsigned fetch, int elt_bias)
 {
struct draw_context *draw = vsplit->draw;
-   VSPLIT_CREATE_IDX(elts, start, fetch, elt_bias);
+   unsigned elt_idx;
+   elt_idx = vsplit_get_base_idx(start, fetch);
+   elt_idx = (unsigned)((int)(DRAW_GET_IDX(elts, elt_idx)) + elt_bias);
/* unlike the uint case this can only happen with elt_bias */
if (elt_bias && elt_idx == DRAW_MAX_FETCH_IDX && 
!vsplit->cache.has_max_fetch) {
   unsigned hash = elt_idx % MAP_SIZE;
@@ -145,7 +139,9 @@ vsplit_add_cache_ushort(struct vsplit_frontend *vsplit, 
const ushort *elts,
unsigned start, unsigned fetch, int elt_bias)
 {
struct draw_context *draw = vsplit->draw;
-   VSPLIT_CREATE_IDX(elts, start, fetch, elt_bias);
+   unsigned elt_idx;
+   elt_idx = vsplit_get_base_idx(start, fetch);
+   elt_idx = (unsigned)((int)(DRAW_GET_IDX(elts, elt_idx)) + elt_bias);
/* unlike the uint case this can only happen with elt_bias */
if (elt_bias && elt_idx == DRAW_MAX_FETCH_IDX && 
!vsplit->cache.has_max_fetch) {
   unsigned hash = elt_idx % MAP_SIZE;
@@ -165,7 +161,12 @@ vsplit_add_cache_uint(struct vsplit_frontend *vsplit, 
const uint *elts,
   unsigned start, unsigned fetch, int elt_bias)
 {
struct draw_context *draw = vsplit->draw;
-   VSPLIT_CREATE_IDX(elts, start, fetch, elt_bias);
+   unsigned elt_idx;
+   /*
+* The final element index is just element index plus element bias.
+*/
+   elt_idx = vsplit_get_base_idx(start, fetch);
+   elt_idx = (unsigned)((int)(DRAW_GET_IDX(elts, elt_idx)) + elt_bias);
/* Take care for DRAW_MAX_FETCH_IDX (since cache is initialized to -1). */
if (elt_idx == DRAW_MAX_FETCH_IDX && !vsplit->cache.has_max_fetch) {
   unsigned hash = elt_idx % MAP_SIZE;

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): util: fix NORETURN for msvc, add HAVE_FUNC_ATTRIBUTE_NORETURN to c99_compat.h

2018-01-11 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 85377dc55c55d1c5536cdf9a86ce67ebb59b7e77
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=85377dc55c55d1c5536cdf9a86ce67ebb59b7e77

Author: Roland Scheidegger 
Date:   Thu Jan 11 01:49:00 2018 +0100

util: fix NORETURN for msvc, add HAVE_FUNC_ATTRIBUTE_NORETURN to c99_compat.h

We've seen some problems internally due to macro redefinition.
Fix this by adding HAVE_FUNC_ATTRIBUTE_NORETURN to c99_compat.h,
and defining it for msvc.
And avoid redefinition just in case.

Reviewed-by: Brian Paul 
Reviewed-by: Jose Fonseca 

---

 include/c99_compat.h |  1 +
 src/util/macros.h| 12 
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/include/c99_compat.h b/include/c99_compat.h
index cb690c6e2a..81621a7fab 100644
--- a/include/c99_compat.h
+++ b/include/c99_compat.h
@@ -164,6 +164,7 @@ test_c99_compat_h(const void * restrict a,
 #define HAVE_FUNC_ATTRIBUTE_FORMAT 1
 #define HAVE_FUNC_ATTRIBUTE_PACKED 1
 #define HAVE_FUNC_ATTRIBUTE_ALIAS 1
+#define HAVE_FUNC_ATTRIBUTE_NORETURN 1
 
 #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)
/* https://gcc.gnu.org/onlinedocs/gcc-4.3.6/gcc/Other-Builtins.html */
diff --git a/src/util/macros.h b/src/util/macros.h
index d6e37053b1..432d513930 100644
--- a/src/util/macros.h
+++ b/src/util/macros.h
@@ -171,10 +171,14 @@ do {   \
 #define ATTRIBUTE_RETURNS_NONNULL
 #endif
 
-#ifdef HAVE_FUNC_ATTRIBUTE_NORETURN
-#define NORETURN __attribute__((__noreturn__))
-#else
-#define NORETURN
+#ifndef NORETURN
+#  ifdef _MSC_VER
+#define NORETURN __declspec(noreturn)
+#  elif defined HAVE_FUNC_ATTRIBUTE_NORETURN
+#define NORETURN __attribute__((__noreturn__))
+#  else
+#define NORETURN
+#  endif
 #endif
 
 #ifdef __cplusplus

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): mesa: require at least 14 UBOs for GL 4.3

2018-01-11 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 734bef372d80a2ebf5677eb4fbd0e939f2b3cfb4
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=734bef372d80a2ebf5677eb4fbd0e939f2b3cfb4

Author: Roland Scheidegger 
Date:   Thu Jan 11 02:10:25 2018 +0100

mesa: require at least 14 UBOs for GL 4.3

ARB_ubo requires 12 UBOs (per stage) at least, but this limit has been
raised by GL 4.3 to 14, so don't advertize GL 4.3 without it (only checking
the vertex stage since all drivers probably have the same limit anyway for
other stages). (piglit has minmax tests for that kind of thing, but they go
only up to 3.3, so this won't really be noticed.)
I think this currently should not affect any driver - r600 until very
recently only supported 12 but now advertizes 14 too.

Reviewed-by: Brian Paul 

---

 src/mesa/main/version.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/main/version.c b/src/mesa/main/version.c
index 90c5c5f84e..68079f4ebb 100644
--- a/src/mesa/main/version.c
+++ b/src/mesa/main/version.c
@@ -352,6 +352,7 @@ compute_version(const struct gl_extensions *extensions,
  extensions->ARB_transform_feedback_instanced);
const bool ver_4_3 = (ver_4_2 &&
  consts->GLSLVersion >= 430 &&
+ consts->Program[MESA_SHADER_VERTEX].MaxUniformBlocks 
>= 14 &&
  extensions->ARB_ES3_compatibility &&
  extensions->ARB_arrays_of_arrays &&
  extensions->ARB_compute_shader &&

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): r600: fix sampler indexing with texture buffers sampling

2018-01-09 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 762ccf483aa0f5f853e75c886d49c4025cebaf00
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=762ccf483aa0f5f853e75c886d49c4025cebaf00

Author: Roland Scheidegger 
Date:   Tue Jan  2 23:03:44 2018 +0100

r600: fix sampler indexing with texture buffers sampling

This fixes the new piglit test.
While here also fix up the logic for early exit of setting up driver consts.

Tested-by: Konstantin Kharlamov 
Reviewed-by: Reviewed-by: Dave Airlie 

---

 src/gallium/drivers/r600/r600_shader.c   | 2 ++
 src/gallium/drivers/r600/r600_state_common.c | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index bb7cc177ae..716a829273 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -6856,6 +6856,7 @@ static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, 
boolean src_requires_l
struct tgsi_full_instruction *inst = 
&ctx->parse.FullToken.FullInstruction;
int src_gpr, r, i;
int id = tgsi_tex_get_src_gpr(ctx, 1);
+   int sampler_index_mode = inst->Src[1].Indirect.Index == 2 ? 2 : 0; // 
CF_INDEX_1 : CF_INDEX_NONE
 
src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
if (src_requires_loading) {
@@ -6887,6 +6888,7 @@ static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, 
boolean src_requires_l
vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;  
/* SEL_Z */
vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;  
/* SEL_W */
vtx.use_const_fields = 1;
+   vtx.buffer_index_mode = sampler_index_mode;
 
if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
return r;
diff --git a/src/gallium/drivers/r600/r600_state_common.c 
b/src/gallium/drivers/r600/r600_state_common.c
index e7fa1bbf57..1d9ff7bd6e 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -1380,8 +1380,8 @@ void eg_setup_buffer_constants(struct r600_context *rctx, 
int shader_type)
}
 
if (!samplers->views.dirty_buffer_constants &&
-   (images && !images->dirty_buffer_constants) &&
-   (buffers && !buffers->dirty_buffer_constants))
+   !(images && images->dirty_buffer_constants) &&
+   !(buffers && buffers->dirty_buffer_constants))
return;
 
if (images)

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): winsys/radeon: fix up default enabled_rb_mask for r600

2018-01-09 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: f0dd1b36126ceff8726797f40f56defbf5f82e2c
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=f0dd1b36126ceff8726797f40f56defbf5f82e2c

Author: Roland Scheidegger 
Date:   Tue Jan  9 02:53:28 2018 +0100

winsys/radeon: fix up default enabled_rb_mask for r600

The logic had two fatal flaws which completely killed the default value.
1) drm will overwrite the value anyway even if the chip can't be handled
2) the default value logic is relying on num_render_backends, which was
filled in later.
Luckily noone is relying on it, but it's a bit confusing seeing the chip clock
printed out there (as hex) with R600_DEBUG=info...
(Albeit radeonsi does not appear to fix up the value. If kernels which don't
handle this query are still supported, radeonsi will still end up with a broken
enabled_rb_mask, I have no idea of the potential results of this there.)

Reviewed-by: Dave Airlie 

---

 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c 
b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index e600199d26..10f2ecc900 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -369,12 +369,6 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
  &ws->info.max_shader_clock);
 ws->info.max_shader_clock /= 1000;
 
-/* Default value. */
-ws->info.enabled_rb_mask = u_bit_consecutive(0, 
ws->info.num_render_backends);
-/* This fails on non-GCN or older kernels: */
-radeon_get_drm_value(ws->fd, RADEON_INFO_SI_BACKEND_ENABLED_MASK, NULL,
- &ws->info.enabled_rb_mask);
-
 ws->num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
 
 /* Generation-specific queries. */
@@ -433,6 +427,16 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
   &ws->info.r600_gb_backend_map))
 ws->info.r600_gb_backend_map_valid = true;
 
+/* Default value. */
+ws->info.enabled_rb_mask = u_bit_consecutive(0, 
ws->info.num_render_backends);
+/*
+ * This fails (silently) on non-GCN or older kernels, overwriting the
+ * default enabled_rb_mask with the result of the last query.
+*/
+if (ws->gen >= DRV_SI)
+radeon_get_drm_value(ws->fd, RADEON_INFO_SI_BACKEND_ENABLED_MASK, 
NULL,
+ &ws->info.enabled_rb_mask);
+
 ws->info.has_virtual_memory = false;
 if (ws->info.drm_minor >= 13) {
 uint32_t ib_vm_max_size;

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): r600: hack up num_render_backends on Juniper to 8

2018-01-09 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 76baf997371dc8678cbea51fe5d4651aa59af741
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=76baf997371dc8678cbea51fe5d4651aa59af741

Author: Roland Scheidegger 
Date:   Tue Jan  9 03:28:45 2018 +0100

r600: hack up num_render_backends on Juniper to 8

Juniper really has a maximum of 4 RBEs (16 pixels). However, predication
always locks up on my HD 5750, and through experiments it looks like if we're
pretending it has a maximum of 8, with 4 disabled, it works correctly.
My conclusion would be that there's a bug (likely firmware, not hw) which
causes the predication logic to try to read 8 results out of the query buffer
instead of just 4, and since of course noone ever writes the upper 4, the
status bit is never set and hence it will wait for it forever.

Ideally this would be fixed in firmware, but I'd guess chances of that
happening are slim.
This will double the size of (occlusion) query result buffers, write the
status bit for the disabled rbs in these buffers, and will also add 8 results
together instead of just 4 when reading them back. The latter is unnecessary,
but it's probably not worth bothering - luckily num_render_backends isn't
used outside of occlusion queries, so don't need separate value for the
"real" maximum.
Also print out the enabled_rb_mask if it changed from the pre-fixed value
(which is already printed out), just in case there's some more problems
with chips which have some rbs disabled...

This fixes all the lockups with piglit nv_conditional_render tests on my
HD 5750 (all pass).

Reviewed-by: Dave Airlie 

---

 src/gallium/drivers/r600/r600_query.c | 21 +++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_query.c 
b/src/gallium/drivers/r600/r600_query.c
index 8f87c51cca..b4519830cc 100644
--- a/src/gallium/drivers/r600/r600_query.c
+++ b/src/gallium/drivers/r600/r600_query.c
@@ -1818,7 +1818,19 @@ void r600_query_fix_enabled_rb_mask(struct 
r600_common_screen *rscreen)
struct r600_resource *buffer;
uint32_t *results;
unsigned i, mask = 0;
-   unsigned max_rbs = ctx->screen->info.num_render_backends;
+   unsigned max_rbs;
+   
+   if (ctx->family == CHIP_JUNIPER) {
+   /*
+* Fix for predication lockups - the chip can only ever have
+* 4 RBs, however it looks like the predication logic assumes
+* there's 8, trying to read results from query buffers never
+* written to. By increasing this number we'll write the
+* status bit for these as per the normal disabled rb logic.
+*/
+   ctx->screen->info.num_render_backends = 8;
+   }
+   max_rbs = ctx->screen->info.num_render_backends;
 
assert(rscreen->chip_class <= CAYMAN);
 
@@ -1890,8 +1902,13 @@ void r600_query_fix_enabled_rb_mask(struct 
r600_common_screen *rscreen)
 
r600_resource_reference(&buffer, NULL);
 
-   if (mask)
+   if (mask) {
+   if (rscreen->debug_flags & DBG_INFO &&
+   mask != rscreen->info.enabled_rb_mask) {
+   printf("enabled_rb_mask (fixed) = 0x%x\n", mask);
+   }
rscreen->info.enabled_rb_mask = mask;
+   }
 }
 
 #define XFULL(name_, query_type_, type_, result_type_, group_id_) \

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): r600: don't emit tes samplers/views when tes isn't active

2018-01-09 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: ea227f4322debd68380feaad1de44a2feaf3d2a9
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=ea227f4322debd68380feaad1de44a2feaf3d2a9

Author: Roland Scheidegger 
Date:   Wed Jan  3 03:23:13 2018 +0100

r600: don't emit tes samplers/views when tes isn't active

Similar to const buffers. The driver must not emit any tes-related state if tes
is disabled, since the hw slots are all shared by VS, therefore it would
overwrite them (the mesa state tracker might not do this, but it would be
perfectly legal to do so).
Nevertheless I think the dirty state tracking logic in the driver is
fundamentally flawed when tes is disabled/enabled, since it looks to me like
the VS (and TES) state would not get reemitted to the correct slots (if it's
not dirty anyway). Unless I'm missing something...
Theoretically, the overwrite problem could be solved by using non-overlapping
resource slots for TES and VS (since we're not even close to using half the
resource slots), but it wouldn't work for constant buffers nor samplers, and
for VS would still need to propagate changes to both LS and VS, so probably
not a useful idea.
Unfortunately there's zero coverage of this with piglit, since all tessellation
shader tests are just shader_runner tests, which are unsuitable for testing
any kind of state dependency tracking issues (so I can't even quickly hack
something up to proove it and fix it...).
TCS otoh is just fine - like GS it has its own hw slots.

Tested-by: Konstantin Kharlamov 
Reviewed-by: Dave Airlie 

---

 src/gallium/drivers/r600/evergreen_state.c   |  4 
 src/gallium/drivers/r600/r600_state_common.c | 15 +++
 2 files changed, 19 insertions(+)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 4cc48dfa11..fb1de9cbf4 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -2334,6 +2334,8 @@ static void evergreen_emit_tcs_sampler_views(struct 
r600_context *rctx, struct r
 
 static void evergreen_emit_tes_sampler_views(struct r600_context *rctx, struct 
r600_atom *atom)
 {
+   if (!rctx->tes_shader)
+   return;
evergreen_emit_sampler_views(rctx, 
&rctx->samplers[PIPE_SHADER_TESS_EVAL].views,
 EG_FETCH_CONSTANTS_OFFSET_VS + 
R600_MAX_CONST_BUFFERS, 0);
 }
@@ -2404,6 +2406,8 @@ static void evergreen_emit_tcs_sampler_states(struct 
r600_context *rctx, struct
 
 static void evergreen_emit_tes_sampler_states(struct r600_context *rctx, 
struct r600_atom *atom)
 {
+   if (!rctx->tes_shader)
+   return;
evergreen_emit_sampler_states(rctx, 
&rctx->samplers[PIPE_SHADER_TESS_EVAL], 18,
  R_00A414_TD_VS_SAMPLER0_BORDER_INDEX, 0);
 }
diff --git a/src/gallium/drivers/r600/r600_state_common.c 
b/src/gallium/drivers/r600/r600_state_common.c
index 7f4d9f3e33..b49b05608d 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -1724,6 +1724,21 @@ static bool r600_update_derived_state(struct 
r600_context *rctx)
}
}
 
+   /*
+* XXX: I believe there's some fatal flaw in the dirty state logic when
+* enabling/disabling tes.
+* VS/ES share all buffer/resource/sampler slots. If TES is enabled,
+* it will therefore overwrite the VS slots. If it now gets disabled,
+* the VS needs to rebind all buffer/resource/sampler slots - not only
+* has TES overwritten the corresponding slots, but when the VS was
+* operating as LS the things with correpsonding dirty bits got bound
+* to LS slots and won't reflect what is dirty as VS stage even if the
+* TES didn't overwrite it. The story for re-enabled TES is similar.
+* In any case, we're not allowed to submit any TES state when
+* TES is disabled (the state tracker may not do this but this looks
+* like an optimization to me, not something which can be relied on).
+*/
+
/* Update clip misc state. */
if (clip_so_current) {
r600_update_clip_state(rctx, clip_so_current);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): r600: set up constants needed for txq for buffers and cube maps with tes

2018-01-09 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 43292c78b7f6b496cd568005c8fa14b5b1d6375f
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=43292c78b7f6b496cd568005c8fa14b5b1d6375f

Author: Roland Scheidegger 
Date:   Mon Jan  1 03:04:38 2018 +0100

r600: set up constants needed for txq for buffers and cube maps with tes

We only did this for the other stages, but obviously tess eval/ctrl need it
too.
This fixes the (newly modified) piglit texturing/textureSize test when run
with tes stage and bufferSampler.

Reviewed-by: Dave Airlie 

---

 src/gallium/drivers/r600/r600_state_common.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/src/gallium/drivers/r600/r600_state_common.c 
b/src/gallium/drivers/r600/r600_state_common.c
index 1d9ff7bd6e..4429246d31 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -1812,6 +1812,22 @@ static bool r600_update_derived_state(struct 
r600_context *rctx)
}
}
 
+   if (rctx->tes_shader) {
+   assert(rctx->b.chip_class >= EVERGREEN);
+   need_buf_const = 
rctx->tes_shader->current->shader.uses_tex_buffers ||
+
rctx->tes_shader->current->shader.has_txq_cube_array_z_comp;
+   if (need_buf_const) {
+   eg_setup_buffer_constants(rctx, PIPE_SHADER_TESS_EVAL);
+   }
+   if (rctx->tcs_shader) {
+   need_buf_const = 
rctx->tcs_shader->current->shader.uses_tex_buffers ||
+
rctx->tcs_shader->current->shader.has_txq_cube_array_z_comp;
+   if (need_buf_const) {
+   eg_setup_buffer_constants(rctx, 
PIPE_SHADER_TESS_CTRL);
+   }
+   }
+   }
+
r600_update_driver_const_buffers(rctx, false);
 
if (rctx->b.chip_class < EVERGREEN && rctx->ps_shader && 
rctx->vs_shader) {

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): r600: fix enabled_rb_mask on eg/cm

2018-01-09 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 7c0bc495f1e467562c4b47da1c2821fd323a45b1
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=7c0bc495f1e467562c4b47da1c2821fd323a45b1

Author: Roland Scheidegger 
Date:   Tue Jan  9 01:38:27 2018 +0100

r600: fix enabled_rb_mask on eg/cm

For eg/cm, the r600_gb_backend_map will always be 0. This is a bug in
the drm kernel driver, as it just just never fills the information in
(it is now being fixed - the history shows it was being filled in when
the query was brand new but got lost shortly thereafter with backend_map
fixes).
This causes r600_query_hw_prepare_buffer to write the "status bit"
(just the highest bit of the occlusion query result) even for active rbes
(all but the first). This doesn't make much sense, albeit I suppose it's mostly
safe. According to the commit history, it's necessary to set these bits for
inactive rbes since otherwise predication will lock up - presumably the hw just
is waiting for the status bit to appear, which will never happen with inactive
rbes. I'd guess potentially predication could be wrong (due to not waiting for
the actual result if the status bit is already there) if this is set for
active rbes.

Discovered while trying to fix predication lockups on Juniper (needs another
patch).

Reviewed-by: Dave Airlie 

---

 src/gallium/drivers/r600/r600_query.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_query.c 
b/src/gallium/drivers/r600/r600_query.c
index 0d22bc5216..8f87c51cca 100644
--- a/src/gallium/drivers/r600/r600_query.c
+++ b/src/gallium/drivers/r600/r600_query.c
@@ -1822,8 +1822,15 @@ void r600_query_fix_enabled_rb_mask(struct 
r600_common_screen *rscreen)
 
assert(rscreen->chip_class <= CAYMAN);
 
-   /* if backend_map query is supported by the kernel */
-   if (rscreen->info.r600_gb_backend_map_valid) {
+   /*
+* if backend_map query is supported by the kernel.
+* Note the kernel drm driver for a long time never filled in the
+* associated data on eg/cm, only on r600/r700, hence ignore the valid
+* bit there if the map is zero.
+* (Albeit some chips with just one active rb can have a valid 0 map.)
+*/ 
+   if (rscreen->info.r600_gb_backend_map_valid &&
+   (ctx->chip_class < EVERGREEN || rscreen->info.r600_gb_backend_map 
!= 0)) {
unsigned num_tile_pipes = rscreen->info.num_tile_pipes;
unsigned backend_map = rscreen->info.r600_gb_backend_map;
unsigned item_width, item_mask;

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): r600: use GET_BUFFER_RESINFO vtx fetch on eg instead of setting up consts

2018-01-09 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: c5162fd3c4b55f9a9e7d0ec253bb2be6f55ee777
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=c5162fd3c4b55f9a9e7d0ec253bb2be6f55ee777

Author: Roland Scheidegger 
Date:   Tue Jan  2 23:39:34 2018 +0100

r600: use GET_BUFFER_RESINFO vtx fetch on eg instead of setting up consts

Contrary to what the comment said, this appears to work just fine on my rv770
(tested with piglit textureSize 140 fs/vs samplerBuffer).
Dave Airlie confirmed it working on cayman too.
I have no clue though if it's actually preferrable to use it (unfortunately
we cannot get rid of the tex constants completely, as we still require them
for cube map txq).
Albeit filling in the format (1 channels or 4?) and the stuff related to mega-
or mini-fetch (what the hell is this...) is just a guess based on other usage
of vtx fetch instructions...

v2: it really needs to be done through texture cache (I botched the
testing because sb optimizations turned it automatically into tc, but
can't rely on it and isn't happening on tes).

Tested-by: Konstantin Kharlamov 
Reviewed-by: Dave Airlie 

---

 src/gallium/drivers/r600/evergreen_state.c   |  7 ++--
 src/gallium/drivers/r600/r600_asm.c  |  3 +-
 src/gallium/drivers/r600/r600_shader.c   | 59 ++--
 src/gallium/drivers/r600/r600_state_common.c | 39 +++---
 4 files changed, 50 insertions(+), 58 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index f5b8e7115d..f645791a2c 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -653,11 +653,12 @@ static void evergreen_fill_buffer_resource_words(struct 
r600_context *rctx,
S_030008_ENDIAN_SWAP(endian);
tex_resource_words[3] = swizzle_res | 
S_03000C_UNCACHED(params->uncached);
/*
-* in theory dword 4 is for number of elements, for use with resinfo,
-* but it seems to utterly fail to work, the amd gpu shader analyser
+* dword 4 is for number of elements, for use with resinfo,
+* albeit the amd gpu shader analyser
 * uses a const buffer to store the element sizes for buffer txq
 */
-   tex_resource_words[4] = 0;
+   tex_resource_words[4] = params->size / stride;
+
tex_resource_words[5] = tex_resource_words[6] = 0;
tex_resource_words[7] = S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_BUFFER);
 }
diff --git a/src/gallium/drivers/r600/r600_asm.c 
b/src/gallium/drivers/r600/r600_asm.c
index d6bd561f01..92c2bdf27c 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -1510,7 +1510,8 @@ int cm_bytecode_add_cf_end(struct r600_bytecode *bc)
 /* common to all 3 families */
 static int r600_bytecode_vtx_build(struct r600_bytecode *bc, struct 
r600_bytecode_vtx *vtx, unsigned id)
 {
-   bc->bytecode[id] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) |
+   bc->bytecode[id] = S_SQ_VTX_WORD0_VTX_INST(vtx->op) |
+   S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) |
S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) |
S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) |
S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x);
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 716a829273..f2bc34660f 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -6949,31 +6949,48 @@ static int do_vtx_fetch_inst(struct r600_shader_ctx 
*ctx, boolean src_requires_l
 static int r600_do_buffer_txq(struct r600_shader_ctx *ctx, int reg_idx, int 
offset)
 {
struct tgsi_full_instruction *inst = 
&ctx->parse.FullToken.FullInstruction;
-   struct r600_bytecode_alu alu;
int r;
int id = tgsi_tex_get_src_gpr(ctx, reg_idx) + offset;
+   int sampler_index_mode = inst->Src[reg_idx].Indirect.Index == 2 ? 2 : 
0; // CF_INDEX_1 : CF_INDEX_NONE
 
-   memset(&alu, 0, sizeof(struct r600_bytecode_alu));
-   alu.op = ALU_OP1_MOV;
-   alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL;
-   if (ctx->bc->chip_class >= EVERGREEN) {
-   /* with eg each dword is either buf size or number of cubes */
-   alu.src[0].sel += id / 4;
-   alu.src[0].chan = id % 4;
-   } else {
+   if (ctx->bc->chip_class < EVERGREEN) {
+   struct r600_bytecode_alu alu;
+   memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+   alu.op = ALU_OP1_MOV;
+   alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL;
/* r600 we have them at channel 2 of the second dword */
alu.src[0].sel += (id * 2) + 1;
alu.src[0].chan = 1;
+   alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
+ 

Mesa (master): r600: increase number of ubos by one to 14

2018-01-09 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 0be1dc25cf72da49fc767f2cd6560f738c0449e0
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=0be1dc25cf72da49fc767f2cd6560f738c0449e0

Author: Roland Scheidegger 
Date:   Mon Jan  1 04:20:41 2018 +0100

r600: increase number of ubos by one to 14

Ideally we'd support 16 (d3d11 requires 15, and mesa subtracts one for non-ubo
constants), but that's kind of impossible (it would be only doable if either
we'd somehow merge the mesa non-ubo constants with the driver constants, or
only use the driver constants with vtx fetch instead of through the kcache
mechanism - the latter probably wouldn't be too bad).
For now just do as the comment already said, place the gs ring (not really
a const buffer in any case) which is only ever referred to through vc fetch
clauses at index 16. Throw in a couple asserts for good measure to make sure
the hw limit isn't exceeded.

Tested-by: Konstantin Kharlamov 
Reviewed-by: Dave Airlie 

---

 src/gallium/drivers/r600/evergreen_state.c |  1 +
 src/gallium/drivers/r600/r600_asm.c|  1 +
 src/gallium/drivers/r600/r600_pipe.h   | 10 ++
 src/gallium/drivers/r600/r600_state.c  |  1 +
 4 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 81b7c4a285..f5b8e7115d 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -2168,6 +2168,7 @@ static void evergreen_emit_constant_buffers(struct 
r600_context *rctx,
va = rbuffer->gpu_address + cb->buffer_offset;
 
if (!gs_ring_buffer) {
+   assert(buffer_index < R600_MAX_HW_CONST_BUFFERS);
radeon_set_context_reg_flag(cs, reg_alu_constbuf_size + 
buffer_index * 4,

DIV_ROUND_UP(cb->buffer_size, 256), pkt_flags);
radeon_set_context_reg_flag(cs, reg_alu_const_cache + 
buffer_index * 4, va >> 8,
diff --git a/src/gallium/drivers/r600/r600_asm.c 
b/src/gallium/drivers/r600/r600_asm.c
index 69b2d142c1..d6bd561f01 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -1008,6 +1008,7 @@ static int r600_bytecode_alloc_inst_kcache_lines(struct 
r600_bytecode *bc,
continue;
 
bank = alu->src[i].kc_bank;
+   assert(bank < R600_MAX_HW_CONST_BUFFERS);
line = (sel-512)>>4;
index_mode = alu->src[i].kc_rel ? 1 : 0; // V_SQ_CF_INDEX_0 / 
V_SQ_CF_INDEX_NONE
 
diff --git a/src/gallium/drivers/r600/r600_pipe.h 
b/src/gallium/drivers/r600/r600_pipe.h
index e042edf2b4..cb84bc1998 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -69,11 +69,12 @@
 #define R600_MAX_DRAW_CS_DWORDS58
 #define R600_MAX_PFP_SYNC_ME_DWORDS16
 
-#define R600_MAX_USER_CONST_BUFFERS 13
+#define EG_MAX_ATOMIC_BUFFERS 8
+
+#define R600_MAX_USER_CONST_BUFFERS 14
 #define R600_MAX_DRIVER_CONST_BUFFERS 3
 #define R600_MAX_CONST_BUFFERS (R600_MAX_USER_CONST_BUFFERS + 
R600_MAX_DRIVER_CONST_BUFFERS)
-
-#define EG_MAX_ATOMIC_BUFFERS 8
+#define R600_MAX_HW_CONST_BUFFERS 16
 
 /* start driver buffers after user buffers */
 #define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS)
@@ -84,7 +85,8 @@
 #define R600_LDS_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
 /*
  * Note GS doesn't use a constant buffer binding, just a resource index,
- * so it's fine to have it exist at index 16.
+ * so it's fine to have it exist at index 16. I.e. it's not actually
+ * a const buffer, just a buffer resource.
  */
 #define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2)
 /* Currently R600_MAX_CONST_BUFFERS just fits on the hw, which has a limit
diff --git a/src/gallium/drivers/r600/r600_state.c 
b/src/gallium/drivers/r600/r600_state.c
index 253ff57a98..89cf7d2e50 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -1712,6 +1712,7 @@ static void r600_emit_constant_buffers(struct 
r600_context *rctx,
offset = cb->buffer_offset;
 
if (!gs_ring_buffer) {
+   assert(buffer_index < R600_MAX_HW_CONST_BUFFERS);
radeon_set_context_reg(cs, reg_alu_constbuf_size + 
buffer_index * 4,
   DIV_ROUND_UP(cb->buffer_size, 
256));
radeon_set_context_reg(cs, reg_alu_const_cache + 
buffer_index * 4, offset >> 8);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): r600: increase number of UBOs to 15

2018-01-09 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 523b6c87048ddc5b49be4ca985bf91d8585aef47
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=523b6c87048ddc5b49be4ca985bf91d8585aef47

Author: Roland Scheidegger 
Date:   Wed Jan  3 02:09:01 2018 +0100

r600: increase number of UBOs to 15

With the exception of the default tess levels only ever accessed
by the default tcs shader, the LDS_INFO const buffer was only accessed by vtx
instructions, and not through kcache. No idea why really, but use this to our
advantage by not using a constant buffer slot for it. This just requires us to
throw the default tess levels into the "normal" driver const buffer instead.
Alternatively, could acesss those constants via vtx instructions too, but then
we couldn't use a ordinary ureg prog accessing them as constants and would have
to generate that directly when compiling the default tcs shader. (Another
alternative would be to put all lds info into the ordinary driver const
buffer, albeit we'd maybe need to increase the fixed size as it can't fit
alongside the ucp since vs needs access to the lds info too.)

Tested-by: Konstantin Kharlamov 
Dave Airlie 

---

 src/gallium/drivers/r600/evergreen_state.c   | 15 --
 src/gallium/drivers/r600/r600_pipe.h | 13 
 src/gallium/drivers/r600/r600_state_common.c | 31 +---
 3 files changed, 37 insertions(+), 22 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index f645791a2c..4cc48dfa11 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -2168,8 +2168,7 @@ static void evergreen_emit_constant_buffers(struct 
r600_context *rctx,
 
va = rbuffer->gpu_address + cb->buffer_offset;
 
-   if (!gs_ring_buffer) {
-   assert(buffer_index < R600_MAX_HW_CONST_BUFFERS);
+   if (buffer_index < R600_MAX_HW_CONST_BUFFERS) {
radeon_set_context_reg_flag(cs, reg_alu_constbuf_size + 
buffer_index * 4,

DIV_ROUND_UP(cb->buffer_size, 256), pkt_flags);
radeon_set_context_reg_flag(cs, reg_alu_const_cache + 
buffer_index * 4, va >> 8,
@@ -3880,7 +3879,7 @@ static void evergreen_set_tess_state(struct pipe_context 
*ctx,
 
memcpy(rctx->tess_state, default_outer_level, sizeof(float) * 4);
memcpy(rctx->tess_state+4, default_inner_level, sizeof(float) * 2);
-   rctx->tess_state_dirty = true;
+   rctx->driver_consts[PIPE_SHADER_TESS_CTRL].tcs_default_levels_dirty = 
true;
 }
 
 static void evergreen_setup_immed_buffer(struct r600_context *rctx,
@@ -4344,7 +4343,7 @@ void evergreen_setup_tess_constants(struct r600_context 
*rctx, const struct pipe
unsigned input_vertex_size, output_vertex_size;
unsigned input_patch_size, pervertex_output_patch_size, 
output_patch_size;
unsigned output_patch0_offset, perpatch_output_offset, lds_size;
-   uint32_t values[16];
+   uint32_t values[8];
unsigned num_waves;
unsigned num_pipes = rctx->screen->b.info.r600_max_quad_pipes;
unsigned wave_divisor = (16 * num_pipes);
@@ -4364,7 +4363,6 @@ void evergreen_setup_tess_constants(struct r600_context 
*rctx, const struct pipe
 
if (rctx->lds_alloc != 0 &&
rctx->last_ls == ls &&
-   !rctx->tess_state_dirty &&
rctx->last_num_tcs_input_cp == num_tcs_input_cp &&
rctx->last_tcs == tcs)
return;
@@ -4411,17 +4409,12 @@ void evergreen_setup_tess_constants(struct r600_context 
*rctx, const struct pipe
 
rctx->lds_alloc = (lds_size | (num_waves << 14));
 
-   memcpy(&values[8], rctx->tess_state, 6 * sizeof(float));
-   values[14] = 0;
-   values[15] = 0;
-
-   rctx->tess_state_dirty = false;
rctx->last_ls = ls;
rctx->last_tcs = tcs;
rctx->last_num_tcs_input_cp = num_tcs_input_cp;
 
constbuf.user_buffer = values;
-   constbuf.buffer_size = 16 * 4;
+   constbuf.buffer_size = 8 * 4;
 
rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX,
  R600_LDS_INFO_CONST_BUFFER, &constbuf);
diff --git a/src/gallium/drivers/r600/r600_pipe.h 
b/src/gallium/drivers/r600/r600_pipe.h
index cb84bc1998..112b5cbb83 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -71,7 +71,7 @@
 
 #define EG_MAX_ATOMIC_BUFFERS 8
 
-#define R600_MAX_USER_CONST_BUFFERS 14
+#define R600_MAX_USER_CONST_BUFFERS 15
 #define R600_MAX_DRIVER_CONST_BUFFERS 3
 #define R600_MAX_CONST_BUFFERS (R600_MAX_USER_CONST_BUFFERS + 
R600_MAX_DRIVER_CONST_BUFFERS)
 #define R600_MAX_HW_CONST_BUFFERS 16
@@ -80,12 +80,17

Mesa (master): r600: don't emit reloc for ring buffer out into the blue

2018-01-09 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 22ba4ebb1877a86c560533f5e162aa84389597e9
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=22ba4ebb1877a86c560533f5e162aa84389597e9

Author: Roland Scheidegger 
Date:   Sun Dec 31 19:21:04 2017 +0100

r600: don't emit reloc for ring buffer out into the blue

It looks like this reloc belongs to setting the constant reg, which is skipped
for gs ring.

Reviewed-by: Dave Airlie 

---

 src/gallium/drivers/r600/evergreen_state.c | 7 +++
 src/gallium/drivers/r600/r600_state.c  | 7 +++
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 0da665f634..81b7c4a285 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -2172,12 +2172,11 @@ static void evergreen_emit_constant_buffers(struct 
r600_context *rctx,

DIV_ROUND_UP(cb->buffer_size, 256), pkt_flags);
radeon_set_context_reg_flag(cs, reg_alu_const_cache + 
buffer_index * 4, va >> 8,
pkt_flags);
+   radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
+   radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, 
&rctx->b.gfx, rbuffer,
+ 
RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER));
}
 
-   radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
-   radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, 
&rctx->b.gfx, rbuffer,
- RADEON_USAGE_READ, 
RADEON_PRIO_CONST_BUFFER));
-
radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags);
radeon_emit(cs, (buffer_id_base + buffer_index) * 8);
radeon_emit(cs, va); /* RESOURCEi_WORD0 */
diff --git a/src/gallium/drivers/r600/r600_state.c 
b/src/gallium/drivers/r600/r600_state.c
index cbf860f45f..253ff57a98 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -1715,12 +1715,11 @@ static void r600_emit_constant_buffers(struct 
r600_context *rctx,
radeon_set_context_reg(cs, reg_alu_constbuf_size + 
buffer_index * 4,
   DIV_ROUND_UP(cb->buffer_size, 
256));
radeon_set_context_reg(cs, reg_alu_const_cache + 
buffer_index * 4, offset >> 8);
+   radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
+   radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, 
&rctx->b.gfx, rbuffer,
+ 
RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER));
}
 
-   radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
-   radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, 
&rctx->b.gfx, rbuffer,
- RADEON_USAGE_READ, 
RADEON_PRIO_CONST_BUFFER));
-
radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 7, 0));
radeon_emit(cs, (buffer_id_base + buffer_index) * 7);
radeon_emit(cs, offset); /* RESOURCEi_WORD0 */

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): r600: don't use vtx offset for load_sample_position

2018-01-09 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 6c8d6ce982d3ce9dfde02a59db23b138df26ae55
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=6c8d6ce982d3ce9dfde02a59db23b138df26ae55

Author: Roland Scheidegger 
Date:   Mon Jan  1 19:40:56 2018 +0100

r600: don't use vtx offset for load_sample_position

The offset looks bogus to me. Albeit in the end it doesn't matter, by the
looks of it offsets smaller than 4 get ignored there (not sure of the rules,
I suppose either non-dword aligned offsets never work there or the offset
must be at least aligned to the size of a single element).

Tested-by: Konstantin Kharlamov 
Reviewed-by: Dave Airlie 

---

 src/gallium/drivers/r600/r600_shader.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index f6ff2055ee..bb7cc177ae 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -1284,7 +1284,7 @@ static int load_sample_position(struct r600_shader_ctx 
*ctx, struct r600_shader_
vtx.num_format_all = 2;
vtx.format_comp_all = 1;
vtx.use_const_fields = 0;
-   vtx.offset = 1; // first element is size of buffer
+   vtx.offset = 0;
vtx.endian = r600_endian_swap(32);
vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): r600: fix textureSize queries with tbos

2017-12-29 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 878bc4a5ae6215a5d84c3e3a5c9575ccd1ae27e2
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=878bc4a5ae6215a5d84c3e3a5c9575ccd1ae27e2

Author: Roland Scheidegger 
Date:   Sat Dec 23 04:50:13 2017 +0100

r600: fix textureSize queries with tbos

piglit doesn't care, but I'm quite confident that the size actually bound
as range should be reported and not the base size of the resource (and
some quick piglit test hacking confirms this).
Also, the array in the constant buffer looks overallocated by a factor of 4.
For eg, also decrease the size by another factor of 2 by using the same
constant slot for both buffer size (required for txq for TBOs) and the number
of layers for cube arrays, as these are mutually exclusive. Could of course use
some more logic and only actually do this for the samplers/images/buffers where
it's required rather than for all, but ah well...

Reviewed-by: Dave Airlie 

---

 src/gallium/drivers/r600/r600_shader.c   | 18 ++---
 src/gallium/drivers/r600/r600_state_common.c | 39 +---
 2 files changed, 33 insertions(+), 24 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index c9247e1c28..e28882b2e5 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -6955,9 +6955,9 @@ static int r600_do_buffer_txq(struct r600_shader_ctx 
*ctx, int reg_idx, int offs
alu.op = ALU_OP1_MOV;
alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL;
if (ctx->bc->chip_class >= EVERGREEN) {
-   /* channel 0 or 2 of each word */
-   alu.src[0].sel += (id / 2);
-   alu.src[0].chan = (id % 2) * 2;
+   /* with eg each dword is either buf size or number of cubes */
+   alu.src[0].sel += id / 4;
+   alu.src[0].chan = id % 4;
} else {
/* r600 we have them at channel 2 of the second dword */
alu.src[0].sel += (id * 2) + 1;
@@ -7615,9 +7615,9 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 
alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL;
if (ctx->bc->chip_class >= EVERGREEN) {
-   /* channel 1 or 3 of each word */
-   alu.src[0].sel += (id / 2);
-   alu.src[0].chan = ((id % 2) * 2) + 1;
+   /* with eg each dword is either buf size or number of 
cubes */
+   alu.src[0].sel += id / 4;
+   alu.src[0].chan = id % 4;
} else {
/* r600 we have them at channel 2 of the second dword */
alu.src[0].sel += (id * 2) + 1;
@@ -8782,9 +8782,9 @@ static int tgsi_resq(struct r600_shader_ctx *ctx)
alu.op = ALU_OP1_MOV;
 
alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL;
-   /* channel 1 or 3 of each word */
-   alu.src[0].sel += (id / 2);
-   alu.src[0].chan = ((id % 2) * 2) + 1;
+   /* with eg each dword is either buf size or number of cubes */
+   alu.src[0].sel += id / 4;
+   alu.src[0].chan = id % 4;
alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
alu.last = 1;
diff --git a/src/gallium/drivers/r600/r600_state_common.c 
b/src/gallium/drivers/r600/r600_state_common.c
index e5a5a33367..e7fa1bbf57 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -902,7 +902,6 @@ struct r600_pipe_shader_selector 
*r600_create_shader_state_tokens(struct pipe_co
  unsigned 
pipe_shader_type)
 {
struct r600_pipe_shader_selector *sel = 
CALLOC_STRUCT(r600_pipe_shader_selector);
-   int i;
 
sel->type = pipe_shader_type;
sel->tokens = tgsi_dup_tokens(tokens);
@@ -1326,7 +1325,7 @@ static void r600_setup_buffer_constants(struct 
r600_context *rctx, int shader_ty
samplers->views.dirty_buffer_constants = FALSE;
 
bits = util_last_bit(samplers->views.enabled_mask);
-   array_size = bits * 8 * sizeof(uint32_t) * 4;
+   array_size = bits * 8 * sizeof(uint32_t);
 
constants = r600_alloc_buf_consts(rctx, shader_type, array_size, 
&base_offset);
 
@@ -1349,15 +1348,16 @@ static void r600_setup_buffer_constants(struct 
r600_context *rctx, int shader_ty
} else
constants[offset + 4] = 0;
 
-   constants[offset + 5] = 
samplers->views.views[i]->base.texture->width0 / 
util_format_get_blocksize(samplers->views.views[i]->base.format);
+   constants[offset + 5] = 
samplers->views.views[i]->base.u.buf.size 

Mesa (master): r600: kill off native_integer shader ctx flag

2017-12-29 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: eafaf136862db1c5c6a63e2127d553a38dcc63f2
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=eafaf136862db1c5c6a63e2127d553a38dcc63f2

Author: Roland Scheidegger 
Date:   Fri Dec 22 23:31:43 2017 +0100

r600: kill off native_integer shader ctx flag

Maybe upon a time it wasn't always true.

Reviewed-by: Dave Airlie 

---

 src/gallium/drivers/r600/r600_shader.c | 18 --
 1 file changed, 18 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 2650a33846..c9247e1c28 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -350,7 +350,6 @@ struct r600_shader_ctx {
int cs_grid_size_reg;
bool cs_block_size_loaded, cs_grid_size_loaded;
int fragcoord_input;
-   int native_integers;
int next_ring_offset;
int gs_out_ring_offset;
int gs_next_vertex;
@@ -998,22 +997,6 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
d->Semantic.Name == TGSI_SEMANTIC_SAMPLEPOS) {
break; /* Already handled from 
allocate_system_value_inputs */
} else if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
-   if (!ctx->native_integers) {
-   struct r600_bytecode_alu alu;
-   memset(&alu, 0, sizeof(struct 
r600_bytecode_alu));
-
-   alu.op = ALU_OP1_INT_TO_FLT;
-   alu.src[0].sel = 0;
-   alu.src[0].chan = 3;
-
-   alu.dst.sel = 0;
-   alu.dst.chan = 3;
-   alu.dst.write = 1;
-   alu.last = 1;
-
-   if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
-   return r;
-   }
break;
} else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID)
break;
@@ -3128,7 +3111,6 @@ static int r600_shader_from_tgsi(struct r600_context 
*rctx,
 
ctx.bc = &shader->bc;
ctx.shader = shader;
-   ctx.native_integers = true;
 
r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family,
   rscreen->has_compressed_msaa_texturing);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): gallivm: implement accurate corner behavior for textureGather with cube maps

2017-12-14 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 1ae48963f7648bb4e98faacfa3dd63906b26a518
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=1ae48963f7648bb4e98faacfa3dd63906b26a518

Author: Roland Scheidegger 
Date:   Wed Dec 13 03:33:07 2017 +0100

gallivm: implement accurate corner behavior for textureGather with cube maps

The spec says the missing texel (when we wrap around both x and y axis)
should be synthesized as the average of the 3 other texels. For bilinear
filtering however we instead adjusted the filter weights (because, while
the complexity looks similar, there would be 4 times as many color values
to fix up than weights). Obviously this could not work for gather (hence
accurate corner filtering was disabled with gather).
Implement this by just doing it as the spec implies - calculate the 4th
texel as the average of the other 3. With gather of course there's only
one color to worry about, so it's not all that many instructions neither
(albeit surely the whole cube map filtering is hilariously complex).

Reviewed-by: Brian Paul 
Reviewed-by: Jose Fonseca 

---

 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 304 ++
 1 file changed, 201 insertions(+), 103 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c 
b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 6b1509c7cf..ff8cbf604c 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -1030,20 +1030,13 @@ lp_build_sample_image_linear(struct 
lp_build_sample_context *bld,
LLVMValueRef neighbors[2][2][4];
int chan, texel_index;
boolean seamless_cube_filter, accurate_cube_corners;
+   unsigned chan_swiz = bld->static_texture_state->swizzle_r;
 
seamless_cube_filter = (bld->static_texture_state->target == 
PIPE_TEXTURE_CUBE ||
bld->static_texture_state->target == 
PIPE_TEXTURE_CUBE_ARRAY) &&
   bld->static_sampler_state->seamless_cube_map;
-   /*
-* XXX I don't know how this is really supposed to work with gather. From GL
-* spec wording (not gather specific) it sounds like the 4th missing texel
-* should be an average of the other 3, hence for gather could return this.
-* This is however NOT how the code here works, which just fixes up the
-* weights used for filtering instead. And of course for gather there is
-* no filter to tweak...
-*/
-   accurate_cube_corners = ACCURATE_CUBE_CORNERS && seamless_cube_filter &&
-   !is_gather;
+
+   accurate_cube_corners = ACCURATE_CUBE_CORNERS && seamless_cube_filter;
 
lp_build_extract_image_sizes(bld,
 &bld->int_size_bld,
@@ -1382,94 +1375,191 @@ lp_build_sample_image_linear(struct 
lp_build_sample_context *bld,
* as well) here.
*/
   if (accurate_cube_corners) {
- LLVMValueRef w00, w01, w10, w11, wx0, wy0;
- LLVMValueRef c_weight, c00, c01, c10, c11;
- LLVMValueRef have_corner, one_third, tmp;
+ LLVMValueRef c00, c01, c10, c11, c00f, c01f, c10f, c11f;
+ LLVMValueRef have_corner, one_third;
 
- colorss[0] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
- colorss[1] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
- colorss[2] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
- colorss[3] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
+ colorss[0] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, 
"cs0");
+ colorss[1] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, 
"cs1");
+ colorss[2] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, 
"cs2");
+ colorss[3] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, 
"cs3");
 
  have_corner = LLVMBuildLoad(builder, have_corners, "");
 
  lp_build_if(&corner_if, bld->gallivm, have_corner);
 
- /*
-  * we can't use standard 2d lerp as we need per-element weight
-  * in case of corners, so just calculate bilinear result as
-  * w00*s00 + w01*s01 + w10*s10 + w11*s11.
-  * (This is actually less work than using 2d lerp, 7 vs. 9 
instructions,
-  * however calculating the weights needs another 6, so actually 
probably
-  * not slower than 2d lerp only for 4 channels as weights only need
-  * to be calculated once - of course fixing the weights has 
additional cost.)
-  */
- wx0 = lp_build_sub(coord_bld, coord_bld->one, s_fpart);
- wy0 = lp_build_sub(coord_bld, coord_bld->one, t_fpart);
- w00 = lp_build_mul(coord_bld, wx0, wy0);
- w01 = lp_build_mul(coord_bld, s_

Mesa (master): gallivm: fix an issue with NaNs with seamless cube filtering

2017-12-14 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: a485ad0bcdcab865e14a54133a271198c86e41ab
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=a485ad0bcdcab865e14a54133a271198c86e41ab

Author: Roland Scheidegger 
Date:   Wed Dec 13 03:33:21 2017 +0100

gallivm: fix an issue with NaNs with seamless cube filtering

Cube texture wrapping is a bit special since the values (post face
projection) always are within [0,1], so we took advantage of that and
omitted some clamps.
However, we can still get NaNs (either because the coords already had NaNs,
or the face projection generated them), and in fact we didn't handle them
quite safely. I've seen -INT_MAX + 1 been propagated through as the final int
coord value, albeit I didn't observe a crash. (Not quite a coincidence, since
any stride mul with -INT_MAX or -INT_MAX+1 will turn up as a small positive
number - nevertheless, I'd rather not try my luck, I'm not entirely sure it
can't really turn up negative neither due to seamless coord swapping, plus
ifloor of a NaN is not guaranteed to return -INT_MAX by any standard. And
we kill off NaNs similarly with ordinary texture wrapping too.)
So kill off the NaNs by using the common max against zero method.

Reviewed-by: Brian Paul 
Reviewed-by: Jose Fonseca 

---

 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c 
b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index def731e9d9..6b1509c7cf 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -1130,6 +1130,17 @@ lp_build_sample_image_linear(struct 
lp_build_sample_context *bld,
*/
   /* should always have normalized coords, and offsets are undefined */
   assert(bld->static_sampler_state->normalized_coords);
+  /*
+   * The coords should all be between [0,1] however we can have NaNs,
+   * which will wreak havoc. In particular the y1_clamped value below
+   * can be -INT_MAX (on x86) and be propagated right through (probably
+   * other values might be bogus in the end too).
+   * So kill off the NaNs here.
+   */
+  coords[0] = lp_build_max_ext(coord_bld, coords[0], coord_bld->zero,
+   GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
+  coords[1] = lp_build_max_ext(coord_bld, coords[1], coord_bld->zero,
+   GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
   coord = lp_build_mul(coord_bld, coords[0], flt_width_vec);
   /* instead of clamp, build mask if overflowed */
   coord = lp_build_sub(coord_bld, coord, half);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): gallivm: fix texture wrapping for texture gather for mirror modes

2017-12-11 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 84c363fb09167bc45aeba95423b20bee7293f44a
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=84c363fb09167bc45aeba95423b20bee7293f44a

Author: Roland Scheidegger 
Date:   Tue Dec 12 04:22:28 2017 +0100

gallivm: fix texture wrapping for texture gather for mirror modes

Care must be taken that all coords end up correct, the tests are very
sensitive that everything is correctly rounded. This doesn't matter
for bilinear filter (since picking a wrong texel with weight zero is
ok), and we could also switch the per-sample coords mistakenly.
While here, also optimize the coord_mirror helper a bit (we can do the
mirroring directly by exploiting float rounding, no need for fixing up
odd/even manually).
I did not touch the mirror_clamp and mirror_clamp_to_border modes.
In contrast to mirror_clamp_to_edge and mirror_repeat these are legacy
modes. They are specified against old gl rules, which actually does
the mirroring not per sample (so you get swapped order if the coord
is in the mirrored section). I think the idea though is that they should
follow the respecified mirror_clamp_to_edge rules so the order would be
correct.

Reviewed-by: Jose Fonseca 

---

 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 245 +++---
 1 file changed, 171 insertions(+), 74 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c 
b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index b67a089c47..def731e9d9 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -218,34 +218,42 @@ lp_build_sample_texel_soa(struct lp_build_sample_context 
*bld,
 
 
 /**
- * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes.
+ * Helper to compute the mirror function for the PIPE_WRAP_MIRROR_REPEAT mode.
+ * (Note that with pot sizes could do this much more easily post-scale
+ * with some bit arithmetic.)
  */
 static LLVMValueRef
 lp_build_coord_mirror(struct lp_build_sample_context *bld,
-  LLVMValueRef coord)
+  LLVMValueRef coord, boolean posOnly)
 {
struct lp_build_context *coord_bld = &bld->coord_bld;
-   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
-   LLVMValueRef fract, flr, isOdd;
-
-   lp_build_ifloor_fract(coord_bld, coord, &flr, &fract);
-   /* kill off NaNs */
-   /* XXX: not safe without arch rounding, fract can be anything. */
-   fract = lp_build_max_ext(coord_bld, fract, coord_bld->zero,
-GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
-
-   /* isOdd = flr & 1 */
-   isOdd = LLVMBuildAnd(bld->gallivm->builder, flr, int_coord_bld->one, "");
+   LLVMValueRef fract;
+   LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
 
-   /* make coord positive or negative depending on isOdd */
-   /* XXX slight overkill masking out sign bit is unnecessary */
-   coord = lp_build_set_sign(coord_bld, fract, isOdd);
+   /*
+* We can just use 2*(x - round(0.5*x)) to do all the mirroring,
+* it all works out. (The result is in range [-1, 1.0], negative if
+* the coord is in the "odd" section, otherwise positive.)
+*/
 
-   /* convert isOdd to float */
-   isOdd = lp_build_int_to_float(coord_bld, isOdd);
+   coord = lp_build_mul(coord_bld, coord, half);
+   fract = lp_build_round(coord_bld, coord);
+   fract = lp_build_sub(coord_bld, coord, fract);
+   coord = lp_build_add(coord_bld, fract, fract);
 
-   /* add isOdd to coord */
-   coord = lp_build_add(coord_bld, coord, isOdd);
+   if (posOnly) {
+  /*
+   * Theoretically it's not quite 100% accurate because the spec says
+   * that ultimately a scaled coord of -x.0 should map to int coord
+   * -x + 1 with mirroring, not -x (this does not matter for bilinear
+   * filtering).
+   */
+  coord = lp_build_abs(coord_bld, coord);
+  /* kill off NaNs */
+  /* XXX: not safe without arch rounding, fract can be anything. */
+  coord = lp_build_max_ext(coord_bld, coord, coord_bld->zero,
+   GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
+   }
 
return coord;
 }
@@ -362,7 +370,13 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context 
*bld,
  coord = lp_build_add(coord_bld, coord, offset);
   }
 
-  /* clamp to [0, length] */
+  /*
+   * clamp to [0, length]
+   *
+   * Unlike some other wrap modes, this should be correct for gather
+   * too. GL_CLAMP explicitly does this clamp on the coord prior to
+   * actual wrapping (which is per sample).
+   */
   coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f);
 
   coord = lp_build_sub(coord_bld, coord, half);
@@ -426,8 +440,13 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context 
*bld,
  offset = lp_build_int_to_float(coord_bld, offset);
  

Mesa (master): r600: set DX10_CLAMP for compute shader too

2017-11-22 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 71e630753ebbee82e8f8709da5488296b2c070c8
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=71e630753ebbee82e8f8709da5488296b2c070c8

Author: Roland Scheidegger 
Date:   Wed Nov 22 03:11:33 2017 +0100

r600: set DX10_CLAMP for compute shader too

I really intended to set this for all shader stages by
3835009796166968750ff46cf209f6d4208cda86 but missed it for compute shaders
(because it's in a different source file...).

Reviewed-by: Dave Airlie 

---

 src/gallium/drivers/r600/evergreen_compute.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
b/src/gallium/drivers/r600/evergreen_compute.c
index 6e87539cfe..48c4a9ca45 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -746,8 +746,9 @@ void evergreen_emit_cs_shader(struct r600_context *rctx,
radeon_compute_set_context_reg_seq(cs, R_0288D0_SQ_PGM_START_LS, 3);
radeon_emit(cs, va >> 8); /* R_0288D0_SQ_PGM_START_LS */
radeon_emit(cs,   /* R_0288D4_SQ_PGM_RESOURCES_LS */
-   S_0288D4_NUM_GPRS(ngpr)
-   | S_0288D4_STACK_SIZE(nstack));
+   S_0288D4_NUM_GPRS(ngpr) |
+   S_0288D4_DX10_CLAMP(1) |
+   S_0288D4_STACK_SIZE(nstack));
radeon_emit(cs, 0); /* R_0288D8_SQ_PGM_RESOURCES_LS_2 */
 
radeon_emit(cs, PKT3C(PKT3_NOP, 0, 0));

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): llvmpipe: fix snorm blending

2017-11-20 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: b5957cee920cd7a62e4e726538dbbe44c12e33ab
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=b5957cee920cd7a62e4e726538dbbe44c12e33ab

Author: Roland Scheidegger 
Date:   Sat Nov 18 06:23:35 2017 +0100

llvmpipe: fix snorm blending

The blend math gets a bit funky due to inverse blend factors being
in range [0,2] rather than [-1,1], our normalized math can't really
cover this.
src_alpha_saturate blend factor has a similar problem too.
(Note that piglit fbo-blending-formats test is mostly useless for
anything but unorm formats, since not just all src/dst values are
between [0,1], but the tests are crafted in a way that the results
are between [0,1] too.)

v2: some formatting fixes, and fix a fairly obscure (to debug)
issue with alpha-only formats (not related to snorm at all), where
blend optimization would think it could simplify the blend equation
if the blend factors were complementary, however was using the
completely unrelated rgb blend factors instead of the alpha ones...

Reviewed-by: Jose Fonseca 

---

 src/gallium/auxiliary/gallivm/lp_bld_arit.c |  50 -
 src/gallium/auxiliary/gallivm/lp_bld_arit.h |   7 ++
 src/gallium/drivers/llvmpipe/lp_bld_blend.c | 134 ++--
 src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c |  53 ++
 4 files changed, 191 insertions(+), 53 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c 
b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index a1edd349f1..321c6e4edf 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -541,38 +541,38 @@ lp_build_add(struct lp_build_context *bld,
assert(lp_check_value(type, a));
assert(lp_check_value(type, b));
 
-   if(a == bld->zero)
+   if (a == bld->zero)
   return b;
-   if(b == bld->zero)
+   if (b == bld->zero)
   return a;
-   if(a == bld->undef || b == bld->undef)
+   if (a == bld->undef || b == bld->undef)
   return bld->undef;
 
-   if(bld->type.norm) {
+   if (type.norm) {
   const char *intrinsic = NULL;
 
-  if(a == bld->one || b == bld->one)
+  if (!type.sign && (a == bld->one || b == bld->one))
 return bld->one;
 
   if (!type.floating && !type.fixed) {
  if (type.width * type.length == 128) {
-if(util_cpu_caps.has_sse2) {
-  if(type.width == 8)
+if (util_cpu_caps.has_sse2) {
+  if (type.width == 8)
 intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : 
"llvm.x86.sse2.paddus.b";
-  if(type.width == 16)
+  if (type.width == 16)
 intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : 
"llvm.x86.sse2.paddus.w";
 } else if (util_cpu_caps.has_altivec) {
-  if(type.width == 8)
+  if (type.width == 8)
  intrinsic = type.sign ? "llvm.ppc.altivec.vaddsbs" : 
"llvm.ppc.altivec.vaddubs";
-  if(type.width == 16)
+  if (type.width == 16)
  intrinsic = type.sign ? "llvm.ppc.altivec.vaddshs" : 
"llvm.ppc.altivec.vadduhs";
 }
  }
  if (type.width * type.length == 256) {
-if(util_cpu_caps.has_avx2) {
-  if(type.width == 8)
+if (util_cpu_caps.has_avx2) {
+  if (type.width == 8)
 intrinsic = type.sign ? "llvm.x86.avx2.padds.b" : 
"llvm.x86.avx2.paddus.b";
-  if(type.width == 16)
+  if (type.width == 16)
 intrinsic = type.sign ? "llvm.x86.avx2.padds.w" : 
"llvm.x86.avx2.paddus.w";
 }
  }
@@ -842,38 +842,38 @@ lp_build_sub(struct lp_build_context *bld,
assert(lp_check_value(type, a));
assert(lp_check_value(type, b));
 
-   if(b == bld->zero)
+   if (b == bld->zero)
   return a;
-   if(a == bld->undef || b == bld->undef)
+   if (a == bld->undef || b == bld->undef)
   return bld->undef;
-   if(a == b)
+   if (a == b)
   return bld->zero;
 
-   if(bld->type.norm) {
+   if (type.norm) {
   const char *intrinsic = NULL;
 
-  if(b == bld->one)
+  if (!type.sign && b == bld->one)
 return bld->zero;
 
   if (!type.floating && !type.fixed) {
  if (type.width * type.length == 128) {
 if (util_cpu_caps.has_sse2) {
-  if(type.width == 8)
+  if (type.width == 8)
  intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : 
"llvm.x86.sse2.psubus.b";
-  if(type.width == 16)
+  if (type.width == 16)
  intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : 
"llvm.x86.sse2.psubus.w";
 } el

Mesa (master): r600: use min_dx10/max_dx10 instead of min/max

2017-11-14 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: aab0bfc648bf1be50b81a25224970015f1dc78b8
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=aab0bfc648bf1be50b81a25224970015f1dc78b8

Author: Roland Scheidegger 
Date:   Thu Nov  9 19:37:54 2017 +0100

r600: use min_dx10/max_dx10 instead of min/max

I believe this is the safe thing to do, especially ever since the driver
actually generates NaNs for muls too.
The ISA docs are not very helpful here, however the dx10 versions will pick
a non-nan result over a NaN one (this is also the ieee754 behavior), whereas
the non-dx10 ones will pick the NaN (verified by newly changed piglit
isinf-and-isnan test).
Other "modern" drivers will most likely do the same.
This was shown to make some difference for bug 103544, albeit it is not
required to fix it.

Reviewed-by: Dave Airlie 

---

 src/gallium/drivers/r600/r600_shader.c  | 13 +++--
 src/gallium/drivers/r600/sb/sb_expr.cpp |  2 ++
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 0fa2a1f0d1..805b3b6b3d 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -9175,8 +9175,9 @@ static const struct r600_shader_tgsi_instruction 
r600_shader_tgsi_instruction[]
[TGSI_OPCODE_DP3]   = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_DP4]   = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_DST]   = { ALU_OP0_NOP, tgsi_opdst},
-   [TGSI_OPCODE_MIN]   = { ALU_OP2_MIN, tgsi_op2},
-   [TGSI_OPCODE_MAX]   = { ALU_OP2_MAX, tgsi_op2},
+   /* MIN_DX10 returns non-nan result if one src is NaN, MIN returns NaN */
+   [TGSI_OPCODE_MIN]   = { ALU_OP2_MIN_DX10, tgsi_op2},
+   [TGSI_OPCODE_MAX]   = { ALU_OP2_MAX_DX10, tgsi_op2},
[TGSI_OPCODE_SLT]   = { ALU_OP2_SETGT, tgsi_op2_swap},
[TGSI_OPCODE_SGE]   = { ALU_OP2_SETGE, tgsi_op2},
[TGSI_OPCODE_MAD]   = { ALU_OP3_MULADD_IEEE, tgsi_op3},
@@ -9373,8 +9374,8 @@ static const struct r600_shader_tgsi_instruction 
eg_shader_tgsi_instruction[] =
[TGSI_OPCODE_DP3]   = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_DP4]   = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_DST]   = { ALU_OP0_NOP, tgsi_opdst},
-   [TGSI_OPCODE_MIN]   = { ALU_OP2_MIN, tgsi_op2},
-   [TGSI_OPCODE_MAX]   = { ALU_OP2_MAX, tgsi_op2},
+   [TGSI_OPCODE_MIN]   = { ALU_OP2_MIN_DX10, tgsi_op2},
+   [TGSI_OPCODE_MAX]   = { ALU_OP2_MAX_DX10, tgsi_op2},
[TGSI_OPCODE_SLT]   = { ALU_OP2_SETGT, tgsi_op2_swap},
[TGSI_OPCODE_SGE]   = { ALU_OP2_SETGE, tgsi_op2},
[TGSI_OPCODE_MAD]   = { ALU_OP3_MULADD_IEEE, tgsi_op3},
@@ -9596,8 +9597,8 @@ static const struct r600_shader_tgsi_instruction 
cm_shader_tgsi_instruction[] =
[TGSI_OPCODE_DP3]   = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_DP4]   = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_DST]   = { ALU_OP0_NOP, tgsi_opdst},
-   [TGSI_OPCODE_MIN]   = { ALU_OP2_MIN, tgsi_op2},
-   [TGSI_OPCODE_MAX]   = { ALU_OP2_MAX, tgsi_op2},
+   [TGSI_OPCODE_MIN]   = { ALU_OP2_MIN_DX10, tgsi_op2},
+   [TGSI_OPCODE_MAX]   = { ALU_OP2_MAX_DX10, tgsi_op2},
[TGSI_OPCODE_SLT]   = { ALU_OP2_SETGT, tgsi_op2_swap},
[TGSI_OPCODE_SGE]   = { ALU_OP2_SETGE, tgsi_op2},
[TGSI_OPCODE_MAD]   = { ALU_OP3_MULADD_IEEE, tgsi_op3},
diff --git a/src/gallium/drivers/r600/sb/sb_expr.cpp 
b/src/gallium/drivers/r600/sb/sb_expr.cpp
index 3dd3a4815b..7a5d62c8e8 100644
--- a/src/gallium/drivers/r600/sb/sb_expr.cpp
+++ b/src/gallium/drivers/r600/sb/sb_expr.cpp
@@ -753,7 +753,9 @@ bool expr_handler::fold_alu_op2(alu_node& n) {
n.bc.src[0].abs == n.bc.src[1].abs) {
switch (n.bc.op) {
case ALU_OP2_MIN: // (MIN x, x) => (MOV x)
+   case ALU_OP2_MIN_DX10:
case ALU_OP2_MAX:
+   case ALU_OP2_MAX_DX10:
convert_to_mov(n, v0, n.bc.src[0].neg, 
n.bc.src[0].abs);
return fold_alu_op1(n);
case ALU_OP2_ADD:  // (ADD x, x) => (MUL x, 2)

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): r600: use ieee version of rsq

2017-11-14 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 570d5b79929554a45d8aebd294bbd67969396ba5
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=570d5b79929554a45d8aebd294bbd67969396ba5

Author: Roland Scheidegger 
Date:   Thu Nov  9 19:50:41 2017 +0100

r600: use ieee version of rsq

Both r600 and evergreen used the clamped version, whereas cayman used the
ieee one. I don't think there's a valid reason for this discrepancy, so let's
switch to the ieee version for r600 and evergreen too, since we generally
want to stick to ieee arithmetic.
With this, behavior for both rcp and rsq should now be the same for all of
r600, eg, cm, all using ieee versions (albeit note rsq retains the abs
behavior for everybody, which may not be a good idea ultimately).

Reviewed-by: Dave Airlie 

---

 src/gallium/drivers/r600/r600_shader.c | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 547eebac12..b1a164e594 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -4865,11 +4865,7 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx)
 
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
-   /* XXX:
-* For state trackers other than OpenGL, we'll want to use
-* _RECIPSQRT_IEEE instead.
-*/
-   alu.op = ALU_OP1_RECIPSQRT_CLAMPED;
+   alu.op = ALU_OP1_RECIPSQRT_IEEE;
 
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): r600: use DX10_CLAMP bit in shader setup

2017-11-14 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 3835009796166968750ff46cf209f6d4208cda86
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=3835009796166968750ff46cf209f6d4208cda86

Author: Roland Scheidegger 
Date:   Thu Nov  9 19:41:29 2017 +0100

r600: use DX10_CLAMP bit in shader setup

The docs are not very concise in what this really does, however both
Alex Deucher and Nicolai Hähnle suggested this only really affects instructions
using the CLAMP output modifier, and I've confirmed that with the newly
changed piglit isinf_and_isnan test.
So, with this bit set, if an instruction has the CLAMP modifier bit (which
clamps to [0,1]) set, then NaNs will be converted to zero, otherwise the result
will be NaN.
D3D10 would require this, glsl doesn't have modifiers (with mesa
clamp(x,0,1) would get converted to such a modifier) coupled with a
whatever-floats-your-boat specified NaN behavior, but the clamp behavior
should probably always be used (this also matches what a decomposition into
min(1.0, max(x, 0.0)) would do, if min/max also adhere to the ieee spec of
picking the non-nan result).
Some apps may in fact rely on this, as this prevents misrenderings in
This War of Mine since using ieee muls
(ce7a045feeef8cad155f1c9aa07f166e146e3d00), without having to use clamped
rcp opcode, which would also fix this bug there.
radeonsi also seems to set this bit nowadays if I see that righ (albeit the
llvm amdgpu code comment now says "Make clamp modifier on NaN input returns 0"
instead of "Do not clamp NAN to 0" since it was changed, which also looks
a bit misleading).

v2: set it in all shader stages.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103544

Reviewed-by: Dave Airlie 

---

 src/gallium/drivers/r600/evergreen_state.c | 6 ++
 src/gallium/drivers/r600/r600_state.c  | 9 +
 2 files changed, 15 insertions(+)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index b02d7eeca6..7c2dfa092d 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -3244,6 +3244,7 @@ void evergreen_update_ps_state(struct pipe_context *ctx, 
struct r600_pipe_shader
r600_store_value(cb, /* R_028844_SQ_PGM_RESOURCES_PS */
 S_028844_NUM_GPRS(rshader->bc.ngpr) |
 S_028844_PRIME_CACHE_ON_DRAW(1) |
+S_028844_DX10_CLAMP(1) |
 S_028844_STACK_SIZE(rshader->bc.nstack));
/* After that, the NOP relocation packet must be emitted (shader->bo, 
RADEON_USAGE_READ). */
 
@@ -3264,6 +3265,7 @@ void evergreen_update_es_state(struct pipe_context *ctx, 
struct r600_pipe_shader
 
r600_store_context_reg(cb, R_028890_SQ_PGM_RESOURCES_ES,
   S_028890_NUM_GPRS(rshader->bc.ngpr) |
+  S_028890_DX10_CLAMP(1) |
   S_028890_STACK_SIZE(rshader->bc.nstack));
r600_store_context_reg(cb, R_02888C_SQ_PGM_START_ES,
   shader->bo->gpu_address >> 8);
@@ -3326,6 +3328,7 @@ void evergreen_update_gs_state(struct pipe_context *ctx, 
struct r600_pipe_shader
 
r600_store_context_reg(cb, R_028878_SQ_PGM_RESOURCES_GS,
   S_028878_NUM_GPRS(rshader->bc.ngpr) |
+  S_028878_DX10_CLAMP(1) |
   S_028878_STACK_SIZE(rshader->bc.nstack));
r600_store_context_reg(cb, R_028874_SQ_PGM_START_GS,
   shader->bo->gpu_address >> 8);
@@ -3366,6 +3369,7 @@ void evergreen_update_vs_state(struct pipe_context *ctx, 
struct r600_pipe_shader
   S_0286C4_VS_EXPORT_COUNT(nparams - 1));
r600_store_context_reg(cb, R_028860_SQ_PGM_RESOURCES_VS,
   S_028860_NUM_GPRS(rshader->bc.ngpr) |
+  S_028860_DX10_CLAMP(1) |
   S_028860_STACK_SIZE(rshader->bc.nstack));
if (rshader->vs_position_window_space) {
r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL,
@@ -3400,6 +3404,7 @@ void evergreen_update_hs_state(struct pipe_context *ctx, 
struct r600_pipe_shader
r600_init_command_buffer(cb, 32);
r600_store_context_reg(cb, R_0288BC_SQ_PGM_RESOURCES_HS,
   S_0288BC_NUM_GPRS(rshader->bc.ngpr) |
+  S_0288BC_DX10_CLAMP(1) |
   S_0288BC_STACK_SIZE(rshader->bc.nstack));
r600_store_context_reg(cb, R_0288B8_SQ_PGM_START_HS,
   shader->bo->gpu_address >> 8);
@@ -3413,6 +3418,7 @@ void evergreen_update_ls_state(struct pipe_context *ctx, 
struct r600_pipe_shader
r600_init_command_buffer(cb, 32);
r600_store_context_r

Mesa (master): r600: set the number type correctly for float rts in cb setup

2017-11-14 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 65123ee62cd66cdffe2c4193a3e28e811d73ff65
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=65123ee62cd66cdffe2c4193a3e28e811d73ff65

Author: Roland Scheidegger 
Date:   Thu Nov  9 19:53:49 2017 +0100

r600: set the number type correctly for float rts in cb setup

Float rts were always set as unorm instead of float.
Not sure of the consequences, but at least it looks like the blend clamp
would have been enabled, which is against the rules (only eg really bothered
to even attempt to specify this correctly, r600 always used clamp anyway).
Albeit r600 (not r700) setup still looks bugged to me due to never setting
BLEND_FLOAT32 which must be set according to docs...
Not sure if the hw really cares, no piglit change (on eg/juniper).

Reviewed-by: Dave Airlie 

---

 src/gallium/drivers/r600/evergreen_state.c |  7 ++-
 src/gallium/drivers/r600/r600_state.c  | 10 +-
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 7c2dfa092d..2cd162629d 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1050,7 +1050,7 @@ static void evergreen_set_color_surface_buffer(struct 
r600_context *rctx,
}
}
ntype = V_028C70_NUMBER_UNORM;
-   if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
+   if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
ntype = V_028C70_NUMBER_SRGB;
else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
if (desc->channel[i].normalized)
@@ -1062,7 +1062,10 @@ static void evergreen_set_color_surface_buffer(struct 
r600_context *rctx,
ntype = V_028C70_NUMBER_UNORM;
else if (desc->channel[i].pure_integer)
ntype = V_028C70_NUMBER_UINT;
+   } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) {
+   ntype = V_028C70_NUMBER_FLOAT;
}
+
pitch = (pitch / 8) - 1;
color->pitch = S_028C64_PITCH_TILE_MAX(pitch);
 
@@ -1188,6 +1191,8 @@ static void evergreen_set_color_surface_common(struct 
r600_context *rctx,
ntype = V_028C70_NUMBER_UNORM;
else if (desc->channel[i].pure_integer)
ntype = V_028C70_NUMBER_UINT;
+   } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) {
+   ntype = V_028C70_NUMBER_FLOAT;
}
 
if (R600_BIG_ENDIAN)
diff --git a/src/gallium/drivers/r600/r600_state.c 
b/src/gallium/drivers/r600/r600_state.c
index 0e266aef42..3102905537 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -817,7 +817,7 @@ static void r600_init_color_surface(struct r600_context 
*rctx,
unsigned offset;
const struct util_format_description *desc;
int i;
-   bool blend_bypass = 0, blend_clamp = 1, do_endian_swap = FALSE;
+   bool blend_bypass = 0, blend_clamp = 0, do_endian_swap = FALSE;
 
if (rtex->db_compatible && !r600_can_sample_zs(rtex, false)) {
r600_init_flushed_depth_texture(&rctx->b.b, surf->base.texture, 
NULL);
@@ -869,6 +869,8 @@ static void r600_init_color_surface(struct r600_context 
*rctx,
ntype = V_0280A0_NUMBER_UNORM;
else if (desc->channel[i].pure_integer)
ntype = V_0280A0_NUMBER_UINT;
+   } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) {
+   ntype = V_0280A0_NUMBER_FLOAT;
}
 
if (R600_BIG_ENDIAN)
@@ -883,6 +885,11 @@ static void r600_init_color_surface(struct r600_context 
*rctx,
 
endian = r600_colorformat_endian_swap(format, do_endian_swap);
 
+   /* blend clamp should be set for all NORM/SRGB types */
+   if (ntype == V_0280A0_NUMBER_UNORM || ntype == V_0280A0_NUMBER_SNORM ||
+   ntype == V_0280A0_NUMBER_SRGB)
+   blend_clamp = 1;
+
/* set blend bypass according to docs if SINT/UINT or
   8/24 COLOR variants */
if (ntype == V_0280A0_NUMBER_UINT || ntype == V_0280A0_NUMBER_SINT ||
@@ -917,6 +924,7 @@ static void r600_init_color_surface(struct r600_context 
*rctx,
 ntype != V_0280A0_NUMBER_UINT &&
 ntype != V_0280A0_NUMBER_SINT) &&
G_0280A0_BLEND_CLAMP(color_info) &&
+   /* XXX this condition is always true since BLEND_FLOAT32 is 
never set (bug?). */
!G_0280A0_BLEND_FLOAT32(color_info)) {
color_info |= 
S_0280A0_SOURCE_FORMAT(V_0280A0_EXPORT_NORM);
surf->export_16bpc = true;

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): r600: use ieee version of rcp

2017-11-14 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 1c8d57a008861f856a7fad8feaf14ec412a29d3e
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=1c8d57a008861f856a7fad8feaf14ec412a29d3e

Author: Roland Scheidegger 
Date:   Thu Nov  9 19:44:23 2017 +0100

r600: use ieee version of rcp

r600 used the clamped version for rcp, whereas both evergreen and cayman
used the ieee version. I don't know why that discrepancy exists (it does so
since day 1) but there does not seem to be a valid reason for this, so make
it consistent. This seems now safer than before the previous commit (using
the dx10 clamp bit).
Note that rsq still uses clamped version (as before even though the table
may have suggested otherwise for evergreen) for r600/eg, but not for cayman.
Will be changed separately for better regression tracking...

Reviewed-by: Dave Airlie 

---

 src/gallium/drivers/r600/r600_shader.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 805b3b6b3d..547eebac12 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -9161,11 +9161,7 @@ static const struct r600_shader_tgsi_instruction 
r600_shader_tgsi_instruction[]
[TGSI_OPCODE_MOV]   = { ALU_OP1_MOV, tgsi_op2},
[TGSI_OPCODE_LIT]   = { ALU_OP0_NOP, tgsi_lit},
 
-   /* XXX:
-* For state trackers other than OpenGL, we'll want to use
-* _RECIP_IEEE instead.
-*/
-   [TGSI_OPCODE_RCP]   = { ALU_OP1_RECIP_CLAMPED, 
tgsi_trans_srcx_replicate},
+   [TGSI_OPCODE_RCP]   = { ALU_OP1_RECIP_IEEE, 
tgsi_trans_srcx_replicate},
 
[TGSI_OPCODE_RSQ]   = { ALU_OP0_NOP, tgsi_rsq},
[TGSI_OPCODE_EXP]   = { ALU_OP0_NOP, tgsi_exp},
@@ -9366,7 +9362,7 @@ static const struct r600_shader_tgsi_instruction 
eg_shader_tgsi_instruction[] =
[TGSI_OPCODE_MOV]   = { ALU_OP1_MOV, tgsi_op2},
[TGSI_OPCODE_LIT]   = { ALU_OP0_NOP, tgsi_lit},
[TGSI_OPCODE_RCP]   = { ALU_OP1_RECIP_IEEE, 
tgsi_trans_srcx_replicate},
-   [TGSI_OPCODE_RSQ]   = { ALU_OP1_RECIPSQRT_IEEE, tgsi_rsq},
+   [TGSI_OPCODE_RSQ]   = { ALU_OP0_NOP, tgsi_rsq},
[TGSI_OPCODE_EXP]   = { ALU_OP0_NOP, tgsi_exp},
[TGSI_OPCODE_LOG]   = { ALU_OP0_NOP, tgsi_log},
[TGSI_OPCODE_MUL]   = { ALU_OP2_MUL_IEEE, tgsi_op2},

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): docs: Fix GL_MESA_program_debug enums

2017-11-09 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: dd38a4ee0d0b6b7addb341fe327c245bf64903e5
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=dd38a4ee0d0b6b7addb341fe327c245bf64903e5

Author: Roland Scheidegger 
Date:   Tue Nov  7 01:43:51 2017 +0100

docs: Fix GL_MESA_program_debug enums

13b303ff9265b89bdd9100e32f905e9cdadfad81 added the actual enums but
didn't remove the already existing  ones. (And also duplicated
the "fragment" names instead of using the "vertex" names.)

Fixes: 13b303ff9265b89bdd91 "docs: Update the list of used MESA GL enums."
Reviewed-by: Eric Engestrom 
Reviewed-by: Brian Paul 

---

 docs/specs/enums.txt | 26 --
 1 file changed, 8 insertions(+), 18 deletions(-)

diff --git a/docs/specs/enums.txt b/docs/specs/enums.txt
index 4b0485f349..eb4aa396c5 100644
--- a/docs/specs/enums.txt
+++ b/docs/specs/enums.txt
@@ -46,14 +46,14 @@ GL_MESA_shader_debug.spec: (obsolete)
 GL_DEBUG_ASSERT_MESA 0x875B
 
 GL_MESA_program_debug: (obsolete)
-   GL_FRAGMENT_PROGRAM_CALLBACK_MESA  0x
-   GL_VERTEX_PROGRAM_CALLBACK_MESA0x
-   GL_FRAGMENT_PROGRAM_POSITION_MESA  0x
-   GL_VERTEX_PROGRAM_POSITION_MESA0x
-   GL_FRAGMENT_PROGRAM_CALLBACK_FUNC_MESA 0x
-   GL_FRAGMENT_PROGRAM_CALLBACK_DATA_MESA 0x
-   GL_VERTEX_PROGRAM_CALLBACK_FUNC_MESA   0x
-   GL_VERTEX_PROGRAM_CALLBACK_DATA_MESA   0x
+GL_FRAGMENT_PROGRAM_POSITION_MESA   0x8BB0
+GL_FRAGMENT_PROGRAM_CALLBACK_MESA   0x8BB1
+GL_FRAGMENT_PROGRAM_CALLBACK_FUNC_MESA  0x8BB2
+GL_FRAGMENT_PROGRAM_CALLBACK_DATA_MESA  0x8BB3
+GL_VERTEX_PROGRAM_POSITION_MESA 0x8BB4
+GL_VERTEX_PROGRAM_CALLBACK_MESA 0x8BB5
+GL_VERTEX_PROGRAM_CALLBACK_FUNC_MESA0x8BB6
+GL_VERTEX_PROGRAM_CALLBACK_DATA_MESA0x8BB7
 
 GL_MESAX_texture_stack:
GL_TEXTURE_1D_STACK_MESAX0x8759
@@ -63,16 +63,6 @@ GL_MESAX_texture_stack:
GL_TEXTURE_1D_STACK_BINDING_MESAX0x875D
GL_TEXTURE_2D_STACK_BINDING_MESAX0x875E
 
-GL_MESA_program_debug
-   GL_FRAGMENT_PROGRAM_POSITION_MESA   0x8BB0
-   GL_FRAGMENT_PROGRAM_CALLBACK_MESA   0x8BB1
-   GL_FRAGMENT_PROGRAM_CALLBACK_FUNC_MESA  0x8BB2
-   GL_FRAGMENT_PROGRAM_CALLBACK_DATA_MESA  0x8BB3
-   GL_FRAGMENT_PROGRAM_POSITION_MESA   0x8BB4
-   GL_FRAGMENT_PROGRAM_CALLBACK_MESA   0x8BB5
-   GL_FRAGMENT_PROGRAM_CALLBACK_FUNC_MESA  0x8BB6
-   GL_FRAGMENT_PROGRAM_CALLBACK_DATA_MESA  0x8BB7
-
 GL_MESA_tile_raster_order
GL_TILE_RASTER_ORDER_FIXED_MESA 0x8BB8
GL_TILE_RASTER_ORDER_INCREASING_X_MESA  0x8BB9

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): draw: don't cull tris with zero area

2017-10-27 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 3e4fd2d4b185dac55a481384f8ce3a8c93d78f87
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=3e4fd2d4b185dac55a481384f8ce3a8c93d78f87

Author: Roland Scheidegger 
Date:   Thu Oct 26 21:23:27 2017 +0200

draw: don't cull tris with zero area

Culling tris with zero area seems like a great idea, but apparently with
fill mode line (and point) we're supposed to draw them, at least some tests
for some other state tracker complained otherwise.
Such tris also always seem to be back facing (not sure if this can be
inferred from anything, since in a mathematical sense it cannot really be
determined), so make sure to account for this when filling in the face
information.
(For solid tris, this is of course unnecessary, drivers will throw the tris
away later in any case.)

Reviewed-by: Brian Paul 

---

 src/gallium/auxiliary/draw/draw_pipe_cull.c | 10 ++
 src/gallium/auxiliary/draw/draw_pipe_unfilled.c |  5 ++---
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_pipe_cull.c 
b/src/gallium/auxiliary/draw/draw_pipe_cull.c
index 3e8e458959..318d743dbb 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_cull.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_cull.c
@@ -181,6 +181,16 @@ static void cull_tri( struct draw_stage *stage,
 /* triangle is not culled, pass to next stage */
 stage->next->tri( stage->next, header );
  }
+  } else {
+ /*
+  * With zero area, this is back facing (because the spec says
+  * it's front facing if sign is positive?).
+  * Some apis apparently do not allow us to cull zero area tris
+  * here, in case of fill mode line (which is rather lame).
+  */
+ if ((PIPE_FACE_BACK & cull_stage(stage)->cull_face) == 0) {
+stage->next->tri( stage->next, header );
+ }
   }
}
 }
diff --git a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c 
b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
index c465c7526f..f39db0e6a0 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
@@ -63,10 +63,9 @@ inject_front_face_info(struct draw_stage *stage,
struct prim_header *header)
 {
struct unfilled_stage *unfilled = unfilled_stage(stage);
-   unsigned ccw = header->det < 0.0;
boolean is_front_face = (
-  (stage->draw->rasterizer->front_ccw && ccw) ||
-  (!stage->draw->rasterizer->front_ccw && !ccw));
+  (stage->draw->rasterizer->front_ccw && header->det < 0.0f) ||
+  (!stage->draw->rasterizer->front_ccw && header->det > 0.0f));
int slot = unfilled->face_slot;
unsigned i;
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): gallium/util: remove some block alignment assertions

2017-10-25 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 20c77ae6390451a74e2463f02c49bd7fec3dd29c
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=20c77ae6390451a74e2463f02c49bd7fec3dd29c

Author: Roland Scheidegger 
Date:   Wed Oct 25 02:39:20 2017 +0200

gallium/util: remove some block alignment assertions

These assertions were revisited a couple of times in the past, and they
still weren't quite right.
The problem I was seeing (with some other state tracker) was a copy between
two 512x512 s3tc textures, but from mip level 0 to mip level 8. Therefore,
the destination has only size 2x2 (not a full block), so the box width/height
was only 2, causing the assertion to trigger for src alignment.
As far as I can tell, such a copy is completely legal, and because a correct
assertion would get ridiculously complicated just get rid of it for good.

Reviewed-by: Brian Paul 

---

 src/gallium/auxiliary/util/u_surface.c | 8 
 1 file changed, 8 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_surface.c 
b/src/gallium/auxiliary/util/u_surface.c
index 5abf96625e..0a79a25a43 100644
--- a/src/gallium/auxiliary/util/u_surface.c
+++ b/src/gallium/auxiliary/util/u_surface.c
@@ -324,16 +324,8 @@ util_resource_copy_region(struct pipe_context *pipe,
/* check that region boxes are block aligned */
assert(src_box.x % src_bw == 0);
assert(src_box.y % src_bh == 0);
-   assert(src_box.width % src_bw == 0 ||
-  src_box.x + src_box.width == u_minify(src->width0, src_level));
-   assert(src_box.height % src_bh == 0 ||
-  src_box.y + src_box.height == u_minify(src->height0, src_level));
assert(dst_box.x % dst_bw == 0);
assert(dst_box.y % dst_bh == 0);
-   assert(dst_box.width % dst_bw == 0 ||
-  dst_box.x + dst_box.width == u_minify(dst->width0, dst_level));
-   assert(dst_box.height % dst_bh == 0 ||
-  dst_box.y + dst_box.height == u_minify(dst->height0, dst_level));
 
/* check that region boxes are not out of bounds */
assert(src_box.x + src_box.width <= u_minify(src->width0, src_level));

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): tgsi: fix tgsi_util_get_inst_usage_mask

2017-10-19 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 77b8392858815625ee7909cf9e866043dab9d074
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=77b8392858815625ee7909cf9e866043dab9d074

Author: Roland Scheidegger 
Date:   Wed Oct 18 23:13:58 2017 +0200

tgsi: fix tgsi_util_get_inst_usage_mask

The logic for handling shadow coords was completely broken.
Fixes be3ab867bd444594f9d9e0f8e59d305d15769afd.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103265

Reviewed-by: Marek Olšák 

---

 src/gallium/auxiliary/tgsi/tgsi_util.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c 
b/src/gallium/auxiliary/tgsi/tgsi_util.c
index be8bcdf123..cfce59093c 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_util.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.c
@@ -292,17 +292,17 @@ tgsi_util_get_inst_usage_mask(const struct 
tgsi_full_instruction *inst,
case TGSI_OPCODE_TXL2:
case TGSI_OPCODE_LODQ:
case TGSI_OPCODE_TG4: {
-  unsigned dim_layer_shadow =
+  unsigned dim_layer =
  tgsi_util_get_texture_coord_dim(inst->Texture.Texture);
-  unsigned dim_layer, dim;
+  unsigned dim_layer_shadow, dim;
 
-  /* Remove shadow. */
+  /* Add shadow. */
   if (tgsi_is_shadow_target(inst->Texture.Texture)) {
- dim_layer = dim_layer_shadow - 1;
+ dim_layer_shadow = dim_layer + 1;
  if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D)
-dim_layer = 1;
+dim_layer_shadow = 3;
   } else {
- dim_layer = dim_layer_shadow;
+ dim_layer_shadow = dim_layer;
   }
 
   /* Remove layer. */

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): llvmpipe: handle shader sample mask output

2017-10-18 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 3d0deed12ab3982cc183189f39c0df2793c2d94a
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=3d0deed12ab3982cc183189f39c0df2793c2d94a

Author: Roland Scheidegger 
Date:   Tue Oct 17 21:55:03 2017 +0200

llvmpipe: handle shader sample mask output

This probably isn't all that useful for GL, but there are apis where
sample_mask is a valid output even without msaa.
Just discard the pixel if the sample_mask doesn't include the bit for
sample 0.

Reviewed-by: Brian Paul 

---

 src/gallium/drivers/llvmpipe/lp_state_fs.c | 26 --
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c 
b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 05984b346e..9223ce63e3 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -84,6 +84,7 @@
 #include "gallivm/lp_bld_flow.h"
 #include "gallivm/lp_bld_debug.h"
 #include "gallivm/lp_bld_arit.h"
+#include "gallivm/lp_bld_bitarit.h"
 #include "gallivm/lp_bld_pack.h"
 #include "gallivm/lp_bld_format.h"
 #include "gallivm/lp_bld_quad.h"
@@ -347,7 +348,8 @@ generate_fs_loop(struct gallivm_state *gallivm,
   if (!shader->info.base.writes_z && !shader->info.base.writes_stencil) {
  if (key->alpha.enabled ||
  key->blend.alpha_to_coverage ||
- shader->info.base.uses_kill) {
+ shader->info.base.uses_kill ||
+ shader->info.base.writes_samplemask) {
 /* With alpha test and kill, can do the depth test early
  * and hopefully eliminate some quads.  But need to do a
  * special deferred depth write once the final mask value
@@ -516,6 +518,25 @@ generate_fs_loop(struct gallivm_state *gallivm,
   }
}
 
+   if (shader->info.base.writes_samplemask) {
+  int smaski = find_output_by_semantic(&shader->info.base,
+   TGSI_SEMANTIC_SAMPLEMASK,
+   0);
+  LLVMValueRef smask;
+  struct lp_build_context smask_bld;
+  lp_build_context_init(&smask_bld, gallivm, int_type);
+
+  assert(smaski >= 0);
+  smask = LLVMBuildLoad(builder, outputs[smaski][0], "smask");
+  /*
+   * Pixel is alive according to the first sample in the mask.
+   */
+  smask = LLVMBuildBitCast(builder, smask, smask_bld.vec_type, "");
+  smask = lp_build_and(&smask_bld, smask, smask_bld.one);
+  smask = lp_build_cmp(&smask_bld, PIPE_FUNC_NOTEQUAL, smask, 
smask_bld.zero);
+  lp_build_mask_update(&mask, smask);
+   }
+
/* Late Z test */
if (depth_mode & LATE_DEPTH_TEST) {
   int pos0 = find_output_by_semantic(&shader->info.base,
@@ -2818,7 +2839,8 @@ generate_variant(struct llvmpipe_context *lp,
  !key->alpha.enabled &&
  !key->blend.alpha_to_coverage &&
  !key->depth.enabled &&
- !shader->info.base.uses_kill
+ !shader->info.base.uses_kill &&
+ !shader->info.base.writes_samplemask
   ? TRUE : FALSE;
 
if ((shader->info.base.num_tokens <= 1) &&

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): gallivm: don't use pabs intrinsic with llvm version >= 6

2017-10-06 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 52b73caaf40e79c90a105ec6d349abb3398e3c6b
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=52b73caaf40e79c90a105ec6d349abb3398e3c6b

Author: Roland Scheidegger 
Date:   Sat Oct  7 00:52:58 2017 +0200

gallivm: don't use pabs intrinsic with llvm version >= 6

The intrinsic is gone, causing shader compilation to crash.
While here, also change the fallback code to match what llvm's auto-updater
of these intrinsics would do (except that there will still be zext/trunc
instructions in there), which should ensure that the sequence gets recognized
and fused back into a pabs in the end (I didn't test this, and it's possible
even the old sequence would get recognized, but I don't see a reason why we
shouldn't use the same sequence in any case).

Tested-by: Vinson Lee 

---

 src/gallium/auxiliary/gallivm/lp_bld_arit.c | 13 -
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c 
b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 04f86bef28..cf1958b3b6 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -1799,7 +1799,7 @@ lp_build_abs(struct lp_build_context *bld,
   }
}
 
-   if(type.width*type.length == 128 && util_cpu_caps.has_ssse3) {
+   if(type.width*type.length == 128 && util_cpu_caps.has_ssse3 && HAVE_LLVM < 
0x0600) {
   switch(type.width) {
   case 8:
  return lp_build_intrinsic_unary(builder, "llvm.x86.ssse3.pabs.b.128", 
vec_type, a);
@@ -1809,7 +1809,7 @@ lp_build_abs(struct lp_build_context *bld,
  return lp_build_intrinsic_unary(builder, "llvm.x86.ssse3.pabs.d.128", 
vec_type, a);
   }
}
-   else if (type.width*type.length == 256 && util_cpu_caps.has_avx2) {
+   else if (type.width*type.length == 256 && util_cpu_caps.has_avx2 && 
HAVE_LLVM < 0x0600) {
   switch(type.width) {
   case 8:
  return lp_build_intrinsic_unary(builder, "llvm.x86.avx2.pabs.b", 
vec_type, a);
@@ -1819,14 +1819,9 @@ lp_build_abs(struct lp_build_context *bld,
  return lp_build_intrinsic_unary(builder, "llvm.x86.avx2.pabs.d", 
vec_type, a);
   }
}
-   else if (type.width*type.length == 256 && util_cpu_caps.has_ssse3 &&
-(gallivm_debug & GALLIVM_DEBUG_PERF) &&
-(type.width == 8 || type.width == 16 || type.width == 32)) {
-  debug_printf("%s: inefficient code, should split vectors manually\n",
-   __FUNCTION__);
-   }
 
-   return lp_build_max(bld, a, LLVMBuildNeg(builder, a, ""));
+   return lp_build_select(bld, lp_build_cmp(bld, PIPE_FUNC_GREATER, a, 
bld->zero),
+  a, LLVMBuildNeg(builder, a, ""));
 }
 
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): gallivm/ppc64le: adjust VSX code generation control.

2017-10-04 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: e93f056a4e5babde516c9ef53ae3547f68f1b824
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=e93f056a4e5babde516c9ef53ae3547f68f1b824

Author: Ben Crocker 
Date:   Thu Sep 28 14:09:12 2017 -0400

gallivm/ppc64le: adjust VSX code generation control.

In lp_build_create_jit_compiler_for_module(), advance the minimum
version of LLVM for VSX code generation to 4.0; this is the minimum
revision at which several known VSX code generation bugs are fixed:

  https://llvm.org/bugs/show_bug.cgi?id=25503 (fixed in 3.8.1)
  https://llvm.org/bugs/show_bug.cgi?id=26775 (fixed in 3.8.1)
  https://llvm.org/bugs/show_bug.cgi?id=33531 (fixed in 4.0)

An llc performance bug introduced in LLVM 4.0,

  https://llvm.org/bugs/show_bug.cgi?id=34647

is still pending as of LLVM 5.0, but only has a pronounced effect on
one of the Piglit tests: ext_transform_feedback-max-varyings.

All changes tested via Piglit.

Cc: "17.2" 
Signed-off-by: Ben Crocker 
Acked-by: Roland Scheidegger 

---

 src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 37 ++-
 1 file changed, 30 insertions(+), 7 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp 
b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
index 60d88269e5..d988910a7e 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -630,23 +630,46 @@ 
lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
 #if defined(PIPE_ARCH_PPC)
MAttrs.push_back(util_cpu_caps.has_altivec ? "+altivec" : "-altivec");
 #if (HAVE_LLVM >= 0x0304)
-#if (HAVE_LLVM <= 0x0307) || (HAVE_LLVM == 0x0308 && MESA_LLVM_VERSION_PATCH 
== 0)
+#if (HAVE_LLVM < 0x0400)
/*
 * Make sure VSX instructions are disabled
-* See LLVM bug https://llvm.org/bugs/show_bug.cgi?id=25503#c7
+* See LLVM bugs:
+* https://llvm.org/bugs/show_bug.cgi?id=25503#c7 (fixed in 3.8.1)
+* https://llvm.org/bugs/show_bug.cgi?id=26775 (fixed in 3.8.1)
+* https://llvm.org/bugs/show_bug.cgi?id=33531 (fixed in 4.0)
+* https://llvm.org/bugs/show_bug.cgi?id=34647 (llc performance on certain 
unusual shader IR; intro'd in 4.0, pending as of 5.0)
 */
if (util_cpu_caps.has_altivec) {
   MAttrs.push_back("-vsx");
}
 #else
/*
-* However, bug 25503 is fixed, by the same fix that fixed
-* bug 26775, in versions of LLVM later than 3.8 (starting with 3.8.1):
-* Make sure VSX instructions are ENABLED
-* See LLVM bug https://llvm.org/bugs/show_bug.cgi?id=26775
+* Bug 25503 is fixed, by the same fix that fixed
+* bug 26775, in versions of LLVM later than 3.8 (starting with 3.8.1).
+* BZ 33531 actually comprises more than one bug, all of
+* which are fixed in LLVM 4.0.
+*
+* With LLVM 4.0 or higher:
+* Make sure VSX instructions are ENABLED, unless
+* a) the entire -mattr option is overridden via GALLIVM_MATTRS, or
+* b) VSX instructions are explicitly enabled/disabled via GALLIVM_VSX=1 or 
0.
 */
if (util_cpu_caps.has_altivec) {
-  MAttrs.push_back("+vsx");
+  char *env_mattrs = getenv("GALLIVM_MATTRS");
+  if (env_mattrs) {
+ MAttrs.push_back(env_mattrs);
+  }
+  else {
+ boolean enable_vsx = true;
+ char *env_vsx = getenv("GALLIVM_VSX");
+ if (env_vsx && env_vsx[0] == '0') {
+enable_vsx = false;
+ }
+ if (enable_vsx)
+MAttrs.push_back("+vsx");
+ else
+MAttrs.push_back("-vsx");
+  }
}
 #endif
 #endif

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): gallivm: allow additional llc options

2017-10-04 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 5c75f0c8bb876c1f4c85cda5ed10a4d632d24f56
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=5c75f0c8bb876c1f4c85cda5ed10a4d632d24f56

Author: Ben Crocker 
Date:   Thu Sep 28 14:09:11 2017 -0400

gallivm: allow additional llc options

In init_native_targets, allow the passing of additional options to
the LLC compiler via new GALLIVM_LLC_OPTIONS environmental control.
This option is available only #ifdef DEBUG, initially.
At top, add #include  for LLVMParseCommandLineOptions()
declaration.

v2: Fix compile error with old llvm versions (sroland)

Cc: "17.2" 

Signed-off-by: Ben Crocker 
Acked-by: Nicolai Hähnle 
Reviewed-by: Roland Scheidegger 

---

 src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 23 +++
 1 file changed, 23 insertions(+)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp 
b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
index 342cb386d6..60d88269e5 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -49,6 +49,9 @@
 #endif
 
 #include 
+#if HAVE_LLVM >= 0x0306
+#include 
+#endif
 #include 
 #include 
 #include 
@@ -122,6 +125,26 @@ static void init_native_targets()
llvm::InitializeNativeTargetAsmPrinter();
 
llvm::InitializeNativeTargetDisassembler();
+#if DEBUG && HAVE_LLVM >= 0x0306
+   {
+  char *env_llc_options = getenv("GALLIVM_LLC_OPTIONS");
+  if (env_llc_options) {
+ char *option;
+ char *options[64] = {(char *) "llc"};  // Warning without cast
+ int   n;
+ for (n = 0, option = strtok(env_llc_options, " "); option; n++, 
option = strtok(NULL, " ")) {
+options[n + 1] = option;
+ }
+ if (gallivm_debug & (GALLIVM_DEBUG_IR | GALLIVM_DEBUG_ASM | 
GALLIVM_DEBUG_DUMP_BC)) {
+debug_printf("llc additional options (%d):\n", n);
+for (int i = 1; i <= n; i++)
+   debug_printf("\t%s\n", options[i]);
+debug_printf("\n");
+ }
+ LLVMParseCommandLineOptions(n + 1, options, NULL);
+  }
+   }
+#endif
 }
 
 extern "C" void

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): gallivm: fix typo in debug_printf message

2017-10-04 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 3a9feb4db8ad1e87a70c761987798b7575d522aa
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=3a9feb4db8ad1e87a70c761987798b7575d522aa

Author: Ben Crocker 
Date:   Thu Sep 28 14:09:10 2017 -0400

gallivm: fix typo in debug_printf message

In gallivm_compile_module, fix a typo in the
debug_printf("Invoke as \"llc ..." message.

Cc: "17.2" 

Signed-off-by: Ben Crocker 
Reviewed-by: Roland Scheidegger 

---

 src/gallium/auxiliary/gallivm/lp_bld_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c 
b/src/gallium/auxiliary/gallivm/lp_bld_init.c
index 9f1ade68c4..c456a97eb6 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -606,7 +606,7 @@ gallivm_compile_module(struct gallivm_state *gallivm)
   LLVMWriteBitcodeToFile(gallivm->module, filename);
   debug_printf("%s written\n", filename);
   debug_printf("Invoke as \"llc %s%s -o - %s\"\n",
-   (HAVE_LLVM >= 0x0305) ? "[-mcpu=<-mcpu option] " : "",
+   (HAVE_LLVM >= 0x0305) ? "[-mcpu=<-mcpu option>] " : "",
"[-mattr=<-mattr option(s)>]",
filename);
}

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): gallivm/ppc64le: allow environmental control of Altivec code generation

2017-10-04 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 1359af930ee5baf8444b0acc3d55b1e5e1a3879e
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=1359af930ee5baf8444b0acc3d55b1e5e1a3879e

Author: Ben Crocker 
Date:   Thu Sep 28 14:09:13 2017 -0400

gallivm/ppc64le: allow environmental control of Altivec code generation

In check_os_altivec_support(), allow control of Altivec (first PPC vector
instruction set) code generation via a new environmental control,
GALLIVM_ALTIVEC, which is expected to take on a value of 1 or 0.
The default is to enable Altivec code generation.

This environmental control of Altivec code generation is initially
available only #ifdef DEBUG.

Cc: "17.2" 
Signed-off-by: Ben Crocker 
Acked-by: Roland Scheidegger 

---

 src/gallium/auxiliary/util/u_cpu_detect.c | 32 +++
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c 
b/src/gallium/auxiliary/util/u_cpu_detect.c
index 4e71041bc9..6a59f271a8 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.c
+++ b/src/gallium/auxiliary/util/u_cpu_detect.c
@@ -132,16 +132,32 @@ check_os_altivec_support(void)
if (setjmp(__lv_powerpc_jmpbuf)) {
   signal(SIGILL, SIG_DFL);
} else {
-  __lv_powerpc_canjump = 1;
+  boolean enable_altivec = TRUE;/* Default: enable  if available, and 
if not overridden */
+#ifdef DEBUG
+  /* Disabling Altivec code generation is not the same as disabling VSX 
code generation,
+   * which can be done simply by passing -mattr=-vsx to the LLVM compiler; 
cf.
+   * lp_build_create_jit_compiler_for_module().
+   * If you want to disable Altivec code generation, the best place to do 
it is here.
+   */
+  char *env_control = getenv("GALLIVM_ALTIVEC");/* 1=enable (default); 
0=disable */
+  if (env_control && env_control[0] == '0') {
+ enable_altivec = FALSE;
+  }
+#endif
+  if (enable_altivec) {
+ __lv_powerpc_canjump = 1;
 
-  __asm __volatile
- ("mtspr 256, %0\n\t"
-  "vand %%v0, %%v0, %%v0"
-  :
-  : "r" (-1));
+ __asm __volatile
+("mtspr 256, %0\n\t"
+ "vand %%v0, %%v0, %%v0"
+ :
+ : "r" (-1));
 
-  signal(SIGILL, SIG_DFL);
-  util_cpu_caps.has_altivec = 1;
+ signal(SIGILL, SIG_DFL);
+ util_cpu_caps.has_altivec = 1;
+  } else {
+ util_cpu_caps.has_altivec = 0;
+  }
}
 #endif /* !PIPE_OS_APPLE */
 }

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): gallium: add new LOD opcode

2017-09-29 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 740a1618c34c095f85d4929e11ef107d560f7450
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=740a1618c34c095f85d4929e11ef107d560f7450

Author: Roland Scheidegger 
Date:   Thu Sep 28 03:45:04 2017 +0200

gallium: add new LOD opcode

The operation performed is all the same as LODQ, but with the usual
differences between dx10 and GL texture opcodes, that is separate resource
and sampler indices (plus result swizzling, and setting z/w channels
to zero).

Reviewed-by: Jose Fonseca 
Acked-by: Nicolai Hähnle 

---

 src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 14 
 src/gallium/auxiliary/tgsi/tgsi_exec.c  | 48 ++---
 src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h  |  1 +
 src/gallium/docs/source/tgsi.rst| 12 +++
 src/gallium/include/pipe/p_shader_tokens.h  |  4 ++-
 5 files changed, 74 insertions(+), 5 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index e5d0293b8f..de18f629cd 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -3284,6 +3284,18 @@ sviewinfo_emit(
emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
 }
 
+static void
+lod_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+
+   emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
+   FALSE, LP_SAMPLER_OP_LODQ, emit_data->output);
+}
+
 static LLVMValueRef
 mask_vec(struct lp_build_tgsi_context *bld_base)
 {
@@ -3898,6 +3910,8 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
bld.bld_base.op_actions[TGSI_OPCODE_GATHER4].emit = gather4_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
+   bld.bld_base.op_actions[TGSI_OPCODE_LOD].emit = lod_emit;
+
 
if (gs_iface) {
   /* There's no specific value for this because it should always
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c 
b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 9c019a311d..afed96c9b1 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -2351,15 +2351,22 @@ static void
 exec_lodq(struct tgsi_exec_machine *mach,
   const struct tgsi_full_instruction *inst)
 {
-   uint unit;
+   uint resource_unit, sampler_unit;
int dim;
int i;
union tgsi_exec_channel coords[4];
const union tgsi_exec_channel *args[ARRAY_SIZE(coords)];
union tgsi_exec_channel r[2];
 
-   unit = fetch_sampler_unit(mach, inst, 1);
-   dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture);
+   resource_unit = fetch_sampler_unit(mach, inst, 1);
+   if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) {
+  uint target = mach->SamplerViews[resource_unit].Resource;
+  dim = tgsi_util_get_texture_coord_dim(target);
+  sampler_unit = fetch_sampler_unit(mach, inst, 2);
+   } else {
+  dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture);
+  sampler_unit = resource_unit;
+   }
assert(dim <= ARRAY_SIZE(coords));
/* fetch coordinates */
for (i = 0; i < dim; i++) {
@@ -2369,7 +2376,7 @@ exec_lodq(struct tgsi_exec_machine *mach,
for (i = dim; i < ARRAY_SIZE(coords); i++) {
   args[i] = &ZeroVec;
}
-   mach->Sampler->query_lod(mach->Sampler, unit, unit,
+   mach->Sampler->query_lod(mach->Sampler, resource_unit, sampler_unit,
 args[0]->f,
 args[1]->f,
 args[2]->f,
@@ -2386,6 +2393,35 @@ exec_lodq(struct tgsi_exec_machine *mach,
   store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y,
  TGSI_EXEC_DATA_FLOAT);
}
+   if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) {
+  unsigned char swizzles[4];
+  unsigned chan;
+  swizzles[0] = inst->Src[1].Register.SwizzleX;
+  swizzles[1] = inst->Src[1].Register.SwizzleY;
+  swizzles[2] = inst->Src[1].Register.SwizzleZ;
+  swizzles[3] = inst->Src[1].Register.SwizzleW;
+
+  for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+if (swizzles[chan] >= 2) {
+   store_dest(mach, &ZeroVec,
+  &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
+} else {
+   store_dest(mach, &r[swizzles[chan]],
+  &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
+}
+ }
+  }
+   } else {
+  if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+ store_dest

Mesa (master): llvmpipe, gallivm: implement lod queries (LODQ opcode)

2017-09-20 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 886626960bca51bdfc0880e3830c0a95ea143c4c
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=886626960bca51bdfc0880e3830c0a95ea143c4c

Author: Roland Scheidegger 
Date:   Mon Sep 18 04:52:26 2017 +0200

llvmpipe, gallivm: implement lod queries (LODQ opcode)

This uses all the existing code to calculate lod values for mip linear
filtering. Though we'll have to disable the simplifications (if we know some
parts of the lod calculation won't actually matter for filtering purposes due
to mip clamps etc.). For better or worse, we'll also disable lod calculation
hacks (mostly should make a difference for cube maps) always - the issue with
per-pixel lod being difficult is mostly because we then have different mipmaps
needed for the actual texel fetch, which isn't a problem with lodq.
We still use approximation for the log2 - for that reason I believe the float
part of the lod is only accurate to about 4-5 bits (and one bit less with 1d
textures actually) which is hopefully good enough (though d3d10 technically
requires 6 bits - could use quadratic interpolation instead of linear to get
8 bits or so).
Since lodq requires unclamped lod, we also have to move some sampler key
calculations to texture sampling code - even if we know we're going to access
mipmap 0 we still have to calculate lod and apply lod_bias for lodq.

Passes piglit ARB_texture_query_lod tests (after having fixed the test).

Reviewed-by: Jose Fonseca 

---

 docs/features.txt |   2 +-
 src/gallium/auxiliary/gallivm/lp_bld_sample.c |  40 ---
 src/gallium/auxiliary/gallivm/lp_bld_sample.h |  10 +-
 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 137 +++---
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c   |  13 ++
 src/gallium/drivers/llvmpipe/lp_screen.c  |   2 +-
 6 files changed, 145 insertions(+), 59 deletions(-)

diff --git a/docs/features.txt b/docs/features.txt
index fe412f6607..c186dc70da 100644
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -131,7 +131,7 @@ GL 4.0, GLSL 4.00 --- all DONE: i965/gen7+, nvc0, r600, 
radeonsi
   GL_ARB_texture_buffer_object_rgb32DONE (i965/gen6+, 
llvmpipe, softpipe, swr)
   GL_ARB_texture_cube_map_array DONE (i965/gen6+, 
nv50, llvmpipe, softpipe)
   GL_ARB_texture_gather DONE (i965/gen6+, 
nv50, llvmpipe, softpipe, swr)
-  GL_ARB_texture_query_lod  DONE (i965, nv50, 
softpipe)
+  GL_ARB_texture_query_lod  DONE (i965, nv50, 
llvmpipe, softpipe)
   GL_ARB_transform_feedback2DONE (i965/gen6+, 
nv50, llvmpipe, softpipe, swr)
   GL_ARB_transform_feedback3DONE (i965/gen7+, 
llvmpipe, softpipe, swr)
 
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c 
b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index a1dc61d40f..db3d9d65c9 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -156,19 +156,19 @@ lp_sampler_static_sampler_state(struct 
lp_static_sampler_state *state,
state->wrap_r= sampler->wrap_r;
state->min_img_filter= sampler->min_img_filter;
state->mag_img_filter= sampler->mag_img_filter;
+   state->min_mip_filter= sampler->min_mip_filter;
state->seamless_cube_map = sampler->seamless_cube_map;
 
if (sampler->max_lod > 0.0f) {
-  state->min_mip_filter = sampler->min_mip_filter;
-   } else {
-  state->min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+  state->max_lod_pos = 1;
+   }
+
+   if (sampler->lod_bias != 0.0f) {
+  state->lod_bias_non_zero = 1;
}
 
if (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE ||
state->min_img_filter != state->mag_img_filter) {
-  if (sampler->lod_bias != 0.0f) {
- state->lod_bias_non_zero = 1;
-  }
 
   /* If min_lod == max_lod we can greatly simplify mipmap selection.
* This is a case that occurs during automatic mipmap generation.
@@ -234,7 +234,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
unsigned length = coord_bld->type.length;
unsigned num_quads = length / 4;
boolean rho_per_quad = rho_bld->type.length != length;
-   boolean no_rho_opt = (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) && (dims 
> 1);
+   boolean no_rho_opt = bld->no_rho_approx && (dims > 1);
unsigned i;
LLVMValueRef i32undef = 
LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
LLVMValueRef rho_xvec, rho_yvec;
@@ -694,6 +694,7 @@ lp_build_ilog2_sqrt(struct lp_build_context *bld,
  */
 void
 lp_build_lod_selector(struct lp_build_sample_context *bld,
+  boolean is_lodq,
   unsigned texture_unit,
   unsigne

Mesa (master): gallivm: fix gather implementation a bit

2017-09-08 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: dcf2feadc336a1d81bf1b03d0b9c6dd68ea61441
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=dcf2feadc336a1d81bf1b03d0b9c6dd68ea61441

Author: Roland Scheidegger 
Date:   Sat Sep  9 02:58:21 2017 +0200

gallivm: fix gather implementation a bit

gather is defined in terms of bilinear filtering, just without the filtering
part. However, there's actually some subtle differences required in our
implementation, because we use some tricks to simplify coord wrapping for the
two coords per direction.
For bilinear filtering, we don't care if we end up with an incorrect
texel, as long as the filter weight is 0.0 for it. Likewise, the order of
the texels doesn't actually matter (as long as they still have the correct
filter weight).
But for gather, these tricks lead to incorrect results.
Fix this for CLAMP_TO_EDGE, and add some comments to the other wrap functions
which look broken (the 3 mirror_clamp plus mirror_repeat) (too complex to fix
right now, and noone really seems to care...).

Reviewed-by: Brian Paul 
Reviewed-by: Jose Fonseca 

---

 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 58 +++
 1 file changed, 48 insertions(+), 10 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c 
b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index cb4660e424..1539849b2d 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -299,6 +299,7 @@ lp_build_coord_repeat_npot_linear(struct 
lp_build_sample_context *bld,
  */
 static void
 lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
+boolean is_gather,
 LLVMValueRef coord,
 LLVMValueRef length,
 LLVMValueRef length_f,
@@ -388,13 +389,29 @@ lp_build_sample_wrap_linear(struct 
lp_build_sample_context *bld,
  /* clamp to length max */
  coord = lp_build_min_ext(coord_bld, coord, length_f,
   GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
- /* subtract 0.5 */
- coord = lp_build_sub(coord_bld, coord, half);
- /* clamp to [0, length - 0.5] */
- coord = lp_build_max(coord_bld, coord, coord_bld->zero);
- /* convert to int, compute lerp weight */
- lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
- coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
+ if (!is_gather) {
+/* subtract 0.5 */
+coord = lp_build_sub(coord_bld, coord, half);
+/* clamp to [0, length - 0.5] */
+coord = lp_build_max(coord_bld, coord, coord_bld->zero);
+/* convert to int, compute lerp weight */
+lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
+coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
+ } else {
+/*
+ * The non-gather path will end up with coords 0, 1 if coord was
+ * smaller than 0.5 (with corresponding weight 0.0 so it doesn't
+ * really matter what the second coord is). But for gather, we
+ * really need to end up with coords 0, 0.
+ */
+coord = lp_build_max(coord_bld, coord, coord_bld->zero);
+coord0 = lp_build_sub(coord_bld, coord, half);
+coord1 = lp_build_add(coord_bld, coord, half);
+/* Values range ([-0.5, length_f - 0.5], [0.5, length_f + 0.5] */
+coord0 = lp_build_itrunc(coord_bld, coord0);
+coord1 = lp_build_itrunc(coord_bld, coord1);
+weight = coord_bld->undef;
+ }
  /* coord1 = min(coord1, length-1) */
  coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
  break;
@@ -424,6 +441,13 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context 
*bld,
  coord = lp_build_add(coord_bld, coord, offset);
   }
   /* compute mirror function */
+  /*
+   * XXX: This looks incorrect wrt gather. Due to wrap specification,
+   * it is possible the first coord ends up larger than the second one.
+   * However, with our simplifications the coordinates will be swapped
+   * in this case. (Albeit some other api tests don't like it even
+   * with this fixed...)
+   */
   coord = lp_build_coord_mirror(bld, coord);
 
   /* scale coord to length */
@@ -474,6 +498,20 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context 
*bld,
 offset = lp_build_int_to_float(coord_bld, offset);
 coord = lp_build_add(coord_bld, coord, offset);
  }
+ /*
+  * XXX: This looks incorrect wrt gather. Due to wrap specification,
+  * the first and second texel actually end up with "different order"
+  

Mesa (master): llvmpipe, draw: improve shader cache debugging

2017-09-08 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 57a341b0a94d37e2aee5380703d171c422d8550e
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=57a341b0a94d37e2aee5380703d171c422d8550e

Author: Roland Scheidegger 
Date:   Fri Sep  8 02:59:11 2017 +0200

llvmpipe, draw: improve shader cache debugging

With GALLIVM_DEBUG=perf set, output the relevant stats for shader cache usage
whenever we have to evict shader variants.
Also add some output when shaders are deleted (but not with the perf setting
to keep this one less noisy).
While here, also don't delete that many shaders when we have to evict. For fs,
there's potentially some cost if we have to evict due to the required flush,
however certainly shader recompiles have a high cost too so I don't think
evicting one quarter of the cache size makes sense (and, if we're evicting
based on IR count, we probably typically evict only very few or just one
shader too). For vs, I'm not sure it even makes sense to evict more than
one shader at a time, but keep the logic the same for now.

Reviewed-by: Jose Fonseca 
Reviewed-by: Brian Paul 

---

 src/gallium/auxiliary/draw/draw_llvm.c | 10 
 .../draw/draw_pt_fetch_shade_pipeline_llvm.c   | 55 +-
 src/gallium/drivers/llvmpipe/lp_state_fs.c | 25 ++
 3 files changed, 59 insertions(+), 31 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_llvm.c 
b/src/gallium/auxiliary/draw/draw_llvm.c
index 203572010f..8de29ea1fd 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -2156,6 +2156,11 @@ draw_llvm_destroy_variant(struct draw_llvm_variant 
*variant)
 {
struct draw_llvm *llvm = variant->llvm;
 
+   if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
+  debug_printf("Deleting VS variant: %u vs variants,\t%u total variants\n",
+variant->shader->variants_cached, llvm->nr_variants);
+   }
+
gallivm_destroy(variant->gallivm);
 
remove_from_list(&variant->list_item_local);
@@ -2418,6 +2423,11 @@ draw_gs_llvm_destroy_variant(struct draw_gs_llvm_variant 
*variant)
 {
struct draw_llvm *llvm = variant->llvm;
 
+   if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
+  debug_printf("Deleting GS variant: %u gs variants,\t%u total variants\n",
+variant->shader->variants_cached, llvm->nr_gs_variants);
+   }
+
gallivm_destroy(variant->gallivm);
 
remove_from_list(&variant->list_item_local);
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c 
b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
index 0277cbfc83..c6492a18cf 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
@@ -37,6 +37,7 @@
 #include "draw/draw_vs.h"
 #include "draw/draw_llvm.h"
 #include "gallivm/lp_bld_init.h"
+#include "gallivm/lp_bld_debug.h"
 
 
 struct llvm_middle_end {
@@ -71,6 +72,7 @@ static void
 llvm_middle_end_prepare_gs(struct llvm_middle_end *fpme)
 {
struct draw_context *draw = fpme->draw;
+   struct draw_llvm *llvm = fpme->llvm;
struct draw_geometry_shader *gs = draw->gs.geometry_shader;
struct draw_gs_llvm_variant_key *key;
struct draw_gs_llvm_variant *variant = NULL;
@@ -79,7 +81,7 @@ llvm_middle_end_prepare_gs(struct llvm_middle_end *fpme)
char store[DRAW_GS_LLVM_MAX_VARIANT_KEY_SIZE];
unsigned i;
 
-   key = draw_gs_llvm_make_variant_key(fpme->llvm, store);
+   key = draw_gs_llvm_make_variant_key(llvm, store);
 
/* Search shader's list of variants for the key */
li = first_elem(&shader->variants);
@@ -93,38 +95,42 @@ llvm_middle_end_prepare_gs(struct llvm_middle_end *fpme)
 
if (variant) {
   /* found the variant, move to head of global list (for LRU) */
-  move_to_head(&fpme->llvm->gs_variants_list,
-   &variant->list_item_global);
+  move_to_head(&llvm->gs_variants_list, &variant->list_item_global);
}
else {
   /* Need to create new variant */
 
   /* First check if we've created too many variants.  If so, free
-   * 25% of the LRU to avoid using too much memory.
+   * 3.125% of the LRU to avoid using too much memory.
*/
-  if (fpme->llvm->nr_gs_variants >= DRAW_MAX_SHADER_VARIANTS) {
+  if (llvm->nr_gs_variants >= DRAW_MAX_SHADER_VARIANTS) {
+ if (gallivm_debug & GALLIVM_DEBUG_PERF) {
+debug_printf("Evicting GS: %u gs variants,\t%u total variants\n",
+  shader->variants_cached, llvm->nr_gs_variants);
+ }
+
  /*
   * XXX: should we flush here ?
   */
- for (i = 0; i < DRAW_MAX_SHADER_VARIANTS / 4; i++) {
+ 

Mesa (master): llvmpipe: enable PIPE_CAP_QUERY_PIPELINE_STATISTICS

2017-09-08 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 772f475351d63067f8fd0251e2fe6a33aedf1f56
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=772f475351d63067f8fd0251e2fe6a33aedf1f56

Author: Roland Scheidegger 
Date:   Fri Sep  8 02:23:05 2017 +0200

llvmpipe: enable PIPE_CAP_QUERY_PIPELINE_STATISTICS

This was implemented since forever, but not enabled.
It passes all piglit tests except one, arb_pipeline_statistics_query-frag.
The reason is that the test (for drawing a 10x10 rect) expects between
100 and 150 pixel shader invocations. But since llvmpipe counts this with
4x4 granularity (and due to the rect being 2 tris) we end up with 224
invocations. I believe however what llvmpipe is doing violates neither the
spirit nor the letter of the spec (our fragment shader granularity really
is 4x4 pixels, albeit we will bail out early on 2x2 or 4x2 (the latter
if AVX is available) granularity), the spec allows to count additional
invocations due to implementation reasons.

Reviewed-by: Brian Paul 
Reviewed-by: Jose Fonseca 

---

 docs/features.txt| 2 +-
 src/gallium/drivers/llvmpipe/lp_screen.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/features.txt b/docs/features.txt
index 0435ce61ff..fe412f6607 100644
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -225,7 +225,7 @@ GL 4.6, GLSL 4.60
 
   GL_ARB_gl_spirv   in progress (Nicolai 
Hähnle, Ian Romanick)
   GL_ARB_indirect_parametersDONE (nvc0, radeonsi)
-  GL_ARB_pipeline_statistics_query  DONE (i965, nvc0, 
radeonsi, softpipe, swr)
+  GL_ARB_pipeline_statistics_query  DONE (i965, nvc0, 
radeonsi, llvmpipe, softpipe, swr)
   GL_ARB_polygon_offset_clamp   DONE (i965, nv50, 
nvc0, r600, radeonsi, llvmpipe, swr)
   GL_ARB_shader_atomic_counter_ops  DONE (i965/gen7+, 
nvc0, radeonsi, softpipe)
   GL_ARB_shader_draw_parameters DONE (i965, nvc0, 
radeonsi)
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c 
b/src/gallium/drivers/llvmpipe/lp_screen.c
index 32a405088f..dba7ae3d01 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -132,7 +132,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum 
pipe_cap param)
case PIPE_CAP_QUERY_TIMESTAMP:
   return 1;
case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
-  return 0;
+  return 1;
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
   return 1;
case PIPE_CAP_TEXTURE_SHADOW_MAP:

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): llvmpipe, draw: increase shader cache limits

2017-09-06 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: de6810d9be9d1e6426881774458f8a6f3bed17ee
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=de6810d9be9d1e6426881774458f8a6f3bed17ee

Author: Roland Scheidegger 
Date:   Tue Sep  5 00:17:31 2017 +0200

llvmpipe, draw: increase shader cache limits

We're not particularly concerned with memory usage, if the tradeoff is
shader recompiles. And it's common for apps to have a lot of shaders
nowadays (and, since our shaders include a LOT of context state of course
we may create quite a bit more shaders even).
So quadruple the amount of shaders draw will cache (from 128 to 512).
For llvmpipe (fs shaders) quadruple the number of instructions, keep the
number of variants the same for now (only with very simple, non-texturing
shaders the variant limit could really be reached), and simplify the
definition, it's probably easier to just have one different definition
per branch...

Reviewed-by: Jose Fonseca 

---

 src/gallium/auxiliary/draw/draw_private.h | 2 +-
 src/gallium/drivers/llvmpipe/lp_limits.h  | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_private.h 
b/src/gallium/auxiliary/draw/draw_private.h
index 030bb2cece..06ad7372a7 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -103,7 +103,7 @@ struct vertex_header {
 
 
 /* maximum number of shader variants we can cache */
-#define DRAW_MAX_SHADER_VARIANTS 128
+#define DRAW_MAX_SHADER_VARIANTS 512
 
 /**
  * Private context for the drawing module.
diff --git a/src/gallium/drivers/llvmpipe/lp_limits.h 
b/src/gallium/drivers/llvmpipe/lp_limits.h
index 5294ced3c4..c2808162c7 100644
--- a/src/gallium/drivers/llvmpipe/lp_limits.h
+++ b/src/gallium/drivers/llvmpipe/lp_limits.h
@@ -78,10 +78,8 @@
 /**
  * Max number of instructions (for all fragment shaders combined per context)
  * that will be kept around (counted in terms of llvm ir).
- * Note: the definition looks odd, but there's branches which use a different
- * number of max shader variants.
  */
-#define LP_MAX_SHADER_INSTRUCTIONS MAX2(256*1024, 512*LP_MAX_SHADER_VARIANTS)
+#define LP_MAX_SHADER_INSTRUCTIONS (2048 * LP_MAX_SHADER_VARIANTS)
 
 /**
  * Max number of setup variants that will be kept around.

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): llvmpipe, tgsi: hook up dx10 gather4 opcode

2017-09-06 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 6d9d6071ee961acc82543b321764a0ffec8cd39a
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=6d9d6071ee961acc82543b321764a0ffec8cd39a

Author: Roland Scheidegger 
Date:   Tue Sep  5 17:59:37 2017 +0200

llvmpipe, tgsi: hook up dx10 gather4 opcode

Trivial. We already support tg4 for legacy tex opcodes, so the actual
texture sampling code already handles it.
(Just like TG4, we don't handle additional capabilities and always sample
red channel.)

Reviewed-by: Jose Fonseca 

---

 src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 28 ++---
 src/gallium/auxiliary/tgsi/tgsi_exec.c  |  5 -
 2 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index b7f1140135..f16c579f38 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -2232,6 +2232,7 @@ emit_sample(struct lp_build_tgsi_soa_context *bld,
 const struct tgsi_full_instruction *inst,
 enum lp_build_tex_modifier modifier,
 boolean compare,
+enum lp_sampler_op_type sample_type,
 LLVMValueRef *texel)
 {
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
@@ -2245,7 +2246,7 @@ emit_sample(struct lp_build_tgsi_soa_context *bld,
 
unsigned num_offsets, num_derivs, i;
unsigned layer_coord = 0;
-   unsigned sample_key = LP_SAMPLER_OP_TEXTURE << LP_SAMPLER_OP_TYPE_SHIFT;
+   unsigned sample_key = sample_type << LP_SAMPLER_OP_TYPE_SHIFT;
 
memset(¶ms, 0, sizeof(params));
 
@@ -3186,7 +3187,7 @@ sample_emit(
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
 
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
-   FALSE, emit_data->output);
+   FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
 }
 
 static void
@@ -3198,7 +3199,7 @@ sample_b_emit(
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
 
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
-   FALSE, emit_data->output);
+   FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
 }
 
 static void
@@ -3210,7 +3211,7 @@ sample_c_emit(
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
 
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
-   TRUE, emit_data->output);
+   TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
 }
 
 static void
@@ -3222,7 +3223,7 @@ sample_c_lz_emit(
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
 
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
-   TRUE, emit_data->output);
+   TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
 }
 
 static void
@@ -3234,7 +3235,7 @@ sample_d_emit(
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
 
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
-   FALSE, emit_data->output);
+   FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
 }
 
 static void
@@ -3246,7 +3247,19 @@ sample_l_emit(
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
 
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
-   FALSE, emit_data->output);
+   FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
+}
+
+static void
+gather4_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+
+   emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
+   FALSE, LP_SAMPLER_OP_GATHER, emit_data->output);
 }
 
 static void
@@ -3871,6 +3884,7 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
+   bld.bld_base.op_actions[TGSI_OPCODE_GATHER4].emit = gather4_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
 
if (gs_iface) {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c 
b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index c58ea6ad09..1264df0c62 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -2631,6 +2631,9 @@ exec_sample(struct tgsi_exec_machine *mach,
  lod = &c1;
  control = TGSI_SAMPLER_LOD_EXPLICIT;
   }
+  else if (modifier == TEX_MODIFIER_GATHER) {
+ control = TGSI_SAMPLER_GATHER;
+  }
   else {
  assert(modifier == TEX_MODIFIER_LEVEL_ZERO);
  control = TGSI_SAMPLER_LOD_ZERO;
@@ -5687,7 +5690,

Mesa (master): st/mesa: fix view template initialization in try_pbo_readpixels

2017-09-02 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 2b2c61f0df5c18355b65772d21be36339ba5e1d9
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=2b2c61f0df5c18355b65772d21be36339ba5e1d9

Author: Roland Scheidegger 
Date:   Fri Sep  1 01:48:42 2017 +0200

st/mesa: fix view template initialization in try_pbo_readpixels

I think this is what the code was meant to do, albeit as far as I can tell
the redundant initialization some analyzers complain about should work as
well just fine (only the first layer will be used, if the view contains one
or more layers doesn't really matter).

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102467
Reviewed-by: Brian Paul 
Reviewed-by: Marek Olšák 
Cc: mesa-sta...@lists.freedesktop.org

---

 src/mesa/state_tracker/st_cb_readpixels.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_cb_readpixels.c 
b/src/mesa/state_tracker/st_cb_readpixels.c
index 0bcf2eb4fd..84dd2d548e 100644
--- a/src/mesa/state_tracker/st_cb_readpixels.c
+++ b/src/mesa/state_tracker/st_cb_readpixels.c
@@ -175,7 +175,7 @@ try_pbo_readpixels(struct st_context *st, struct 
st_renderbuffer *strb,
 
   if (view_target != PIPE_TEXTURE_3D) {
  templ.u.tex.first_layer = surface->u.tex.first_layer;
- templ.u.tex.last_layer = templ.u.tex.last_layer;
+ templ.u.tex.last_layer = templ.u.tex.first_layer;
   } else {
  addr.constants.layer_offset = surface->u.tex.first_layer;
   }

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): util: only use SCHED_IDLE in pthread_setschedparam() when it's defined

2017-08-31 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: c92fe8a8c50968a6ac37cbecdd54208f0eea246c
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=c92fe8a8c50968a6ac37cbecdd54208f0eea246c

Author: Roland Scheidegger 
Date:   Sat Aug 26 17:08:07 2017 +0200

util: only use SCHED_IDLE in pthread_setschedparam() when it's defined

Fixes build error when it's not.

Reviewed-by: Jose Fonseca 

---

 src/util/u_queue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/util/u_queue.c b/src/util/u_queue.c
index 49361c3dad..449da7dc9a 100644
--- a/src/util/u_queue.c
+++ b/src/util/u_queue.c
@@ -246,7 +246,7 @@ util_queue_init(struct util_queue *queue,
   }
 
   if (flags & UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY) {
-   #if defined(__linux__)
+   #if defined(__linux__) && defined(SCHED_IDLE)
  struct sched_param sched_param = {0};
 
  /* The nice() function can only set a maximum of 19.

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): llvmpipe: lp_build_gather_elem_vec BE fix for 3x16 load

2017-08-31 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 57c8ead0cd08e6aaf88a389f7ce528c4f0face65
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=57c8ead0cd08e6aaf88a389f7ce528c4f0face65

Author: Ben Crocker 
Date:   Wed Aug 23 16:32:43 2017 -0400

llvmpipe: lp_build_gather_elem_vec BE fix for 3x16 load

Fix loading of a 3x16 vector as a single 48-bit load
on big-endian systems (PPC64, S390).

Roland Scheidegger's commit e827d9175675aaa6cfc0b981e2a80685fb7b3a74
plus Ray Strode's patch reduce pre-Roland Piglit failures from ~4000 to ~2000.  
This patch fixes
three of the four regressions observed by Ray:

- draw-vertices
- draw-vertices-half-float
- draw-vertices-half-float_gles2

One regression remains:
- draw-vertices-2101010

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100613
Cc: "17.2" "17.1" 

Signed-off-by: Ben Crocker 
Reviewed-by: Roland Scheidegger 

---

 src/gallium/auxiliary/gallivm/lp_bld_gather.c | 30 +--
 1 file changed, 28 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_gather.c 
b/src/gallium/auxiliary/gallivm/lp_bld_gather.c
index ccd03765c7..7d11dcd3b6 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_gather.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_gather.c
@@ -234,13 +234,39 @@ lp_build_gather_elem_vec(struct gallivm_state *gallivm,
   */
  res = LLVMBuildZExt(gallivm->builder, res, dst_elem_type, "");
 
- if (vector_justify) {
 #ifdef PIPE_ARCH_BIG_ENDIAN
+ if (vector_justify) {
  res = LLVMBuildShl(gallivm->builder, res,
 LLVMConstInt(dst_elem_type,
  dst_type.width - src_width, 0), "");
-#endif
  }
+ if (src_width == 48) {
+/* Load 3x16 bit vector.
+ * The sequence of loads on big-endian hardware proceeds as 
follows.
+ * 16-bit fields are denoted by X, Y, Z, and 0.  In memory, the 
sequence
+ * of three fields appears in the order X, Y, Z.
+ *
+ * Load 32-bit word: 0.0.X.Y
+ * Load 16-bit halfword: 0.0.0.Z
+ * Rotate left: 0.X.Y.0
+ * Bitwise OR: 0.X.Y.Z
+ *
+ * The order in which we need the fields in the result is 0.Z.Y.X,
+ * the same as on little-endian; permute 16-bit fields accordingly
+ * within 64-bit register:
+ */
+LLVMValueRef shuffles[4] = {
+   lp_build_const_int32(gallivm, 2),
+   lp_build_const_int32(gallivm, 1),
+   lp_build_const_int32(gallivm, 0),
+   lp_build_const_int32(gallivm, 3),
+};
+res = LLVMBuildBitCast(gallivm->builder, res,
+   lp_build_vec_type(gallivm, 
lp_type_uint_vec(16, 4*16)), "");
+res = LLVMBuildShuffleVector(gallivm->builder, res, res, 
LLVMConstVector(shuffles, 4), "");
+res = LLVMBuildBitCast(gallivm->builder, res, dst_elem_type, "");
+ }
+#endif
   }
}
return res;

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): gallivm: correct channel shift logic on big endian

2017-08-31 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 75cb6e36178b9474bbb59b76cbbcce2a67bf88d2
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=75cb6e36178b9474bbb59b76cbbcce2a67bf88d2

Author: Ray Strode 
Date:   Wed Aug 23 16:32:42 2017 -0400

gallivm: correct channel shift logic on big endian

lp_build_fetch_rgba_soa fetches a texel from a texture.
Part of that process involves first gathering the element
together from memory into a packed format, and then breaking
out the individual color channels into separate, parallel
arrays.

The code fails to account for endianess when reading the packed
values.

This commit attempts to correct the problem by reversing the order
the packed values are read on big endian systems.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100613
Cc: "17.2" "17.1" 
Signed-off-by: Ray Strode 
Reviewed-by: Roland Scheidegger 

---

 src/gallium/auxiliary/gallivm/lp_bld_format_soa.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c 
b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
index 98eb694c1f..22c19b10db 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
@@ -650,7 +650,13 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
  for (i = 0; i < format_desc->nr_channels; i++) {
 struct util_format_channel_description chan_desc = 
format_desc->channel[i];
 unsigned blockbits = type.width;
-unsigned vec_nr = chan_desc.shift / type.width;
+unsigned vec_nr;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+vec_nr = (format_desc->block.bits - (chan_desc.shift + 
chan_desc.size)) / type.width;
+#else
+vec_nr = chan_desc.shift / type.width;
+#endif
 chan_desc.shift %= type.width;
 
 output[i] = lp_build_extract_soa_chan(&bld,

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): softpipe: enable PIPE_CAP_QUERY_SO_OVERFLOW

2017-08-17 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 26d46b94b4f03a8a5203539949e19124e3cdefad
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=26d46b94b4f03a8a5203539949e19124e3cdefad

Author: Roland Scheidegger 
Date:   Tue Aug 15 17:52:41 2017 +0200

softpipe: enable PIPE_CAP_QUERY_SO_OVERFLOW

The driver was supposed to support this since way before the GL spec for it
existed, albeit it was apparently broken, so fix and enable it.

Reviewed-by: Jose Fonseca 

---

 docs/features.txt| 2 +-
 src/gallium/drivers/softpipe/sp_query.c  | 7 ++-
 src/gallium/drivers/softpipe/sp_screen.c | 3 ++-
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/docs/features.txt b/docs/features.txt
index ac7645d069..ace46692b5 100644
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -232,7 +232,7 @@ GL 4.6, GLSL 4.60
   GL_ARB_shader_group_vote  DONE (i965, nvc0, 
radeonsi)
   GL_ARB_spirv_extensions   in progress (Nicolai 
Hähnle, Ian Romanick)
   GL_ARB_texture_filter_anisotropic not started
-  GL_ARB_transform_feedback_overflow_query  DONE (i965/gen6+, 
radeonsi)
+  GL_ARB_transform_feedback_overflow_query  DONE (i965/gen6+, 
radeonsi, softpipe)
   GL_KHR_no_error   started (Timothy 
Arceri)
 
 These are the extensions cherry-picked to make GLES 3.1
diff --git a/src/gallium/drivers/softpipe/sp_query.c 
b/src/gallium/drivers/softpipe/sp_query.c
index bec0116a56..63f6c4be04 100644
--- a/src/gallium/drivers/softpipe/sp_query.c
+++ b/src/gallium/drivers/softpipe/sp_query.c
@@ -63,6 +63,7 @@ softpipe_create_query(struct pipe_context *pipe,
   type == PIPE_QUERY_TIME_ELAPSED ||
   type == PIPE_QUERY_SO_STATISTICS ||
   type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
+  type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE ||
   type == PIPE_QUERY_PRIMITIVES_EMITTED ||
   type == PIPE_QUERY_PRIMITIVES_GENERATED || 
   type == PIPE_QUERY_PIPELINE_STATISTICS ||
@@ -102,7 +103,9 @@ softpipe_begin_query(struct pipe_context *pipe, struct 
pipe_query *q)
   sq->so.primitives_storage_needed = 
softpipe->so_stats.primitives_storage_needed;
   break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
-  sq->end = FALSE;
+   case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+  sq->so.num_primitives_written = 
softpipe->so_stats.num_primitives_written;
+  sq->so.primitives_storage_needed = 
softpipe->so_stats.primitives_storage_needed;
   break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
   sq->so.num_primitives_written = 
softpipe->so_stats.num_primitives_written;
@@ -153,6 +156,7 @@ softpipe_end_query(struct pipe_context *pipe, struct 
pipe_query *q)
   sq->end = os_time_get_nano();
   break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+   case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
   sq->so.num_primitives_written =
  softpipe->so_stats.num_primitives_written - 
sq->so.num_primitives_written;
   sq->so.primitives_storage_needed =
@@ -230,6 +234,7 @@ softpipe_get_query_result(struct pipe_context *pipe,
   vresult->b = TRUE;
   break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+   case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
   vresult->b = sq->end != 0;
   break;
case PIPE_QUERY_TIMESTAMP_DISJOINT: {
diff --git a/src/gallium/drivers/softpipe/sp_screen.c 
b/src/gallium/drivers/softpipe/sp_screen.c
index 0feef2189a..2988095eec 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -220,6 +220,8 @@ softpipe_get_param(struct pipe_screen *screen, enum 
pipe_cap param)
   return 31;
case PIPE_CAP_DRAW_INDIRECT:
   return 1;
+   case PIPE_CAP_QUERY_SO_OVERFLOW:
+  return 1;
 
case PIPE_CAP_VENDOR_ID:
   return 0x;
@@ -307,7 +309,6 @@ softpipe_get_param(struct pipe_screen *screen, enum 
pipe_cap param)
case PIPE_CAP_POST_DEPTH_COVERAGE:
case PIPE_CAP_BINDLESS_TEXTURE:
case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
-   case PIPE_CAP_QUERY_SO_OVERFLOW:
case PIPE_CAP_MEMOBJ:
   return 0;
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): llvmpipe: enable PIPE_CAP_QUERY_SO_OVERFLOW

2017-08-17 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 3e9623145760883b431c0902b198d71d003ef7a0
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=3e9623145760883b431c0902b198d71d003ef7a0

Author: Roland Scheidegger 
Date:   Tue Aug 15 17:53:49 2017 +0200

llvmpipe: enable PIPE_CAP_QUERY_SO_OVERFLOW

The driver supported this since way before the GL spec for it existed.
Just need to support both the per-stream and for all streams variants
(which are identical due to only supporting 1 stream).
Passes piglit arb_transform_feedback_overflow_query-basic.

Reviewed-by: Jose Fonseca 

---

 docs/features.txt| 2 +-
 src/gallium/drivers/llvmpipe/lp_query.c  | 3 +++
 src/gallium/drivers/llvmpipe/lp_screen.c | 2 +-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/docs/features.txt b/docs/features.txt
index ace46692b5..6f57ec26fd 100644
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -232,7 +232,7 @@ GL 4.6, GLSL 4.60
   GL_ARB_shader_group_vote  DONE (i965, nvc0, 
radeonsi)
   GL_ARB_spirv_extensions   in progress (Nicolai 
Hähnle, Ian Romanick)
   GL_ARB_texture_filter_anisotropic not started
-  GL_ARB_transform_feedback_overflow_query  DONE (i965/gen6+, 
radeonsi, softpipe)
+  GL_ARB_transform_feedback_overflow_query  DONE (i965/gen6+, 
radeonsi, llvmpipe, softpipe)
   GL_KHR_no_error   started (Timothy 
Arceri)
 
 These are the extensions cherry-picked to make GLES 3.1
diff --git a/src/gallium/drivers/llvmpipe/lp_query.c 
b/src/gallium/drivers/llvmpipe/lp_query.c
index d5ed6561b8..6f8ce94e5d 100644
--- a/src/gallium/drivers/llvmpipe/lp_query.c
+++ b/src/gallium/drivers/llvmpipe/lp_query.c
@@ -155,6 +155,7 @@ llvmpipe_get_query_result(struct pipe_context *pipe,
   *result = pq->num_primitives_written;
   break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+   case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
   vresult->b = pq->num_primitives_generated > pq->num_primitives_written;
   break;
case PIPE_QUERY_SO_STATISTICS: {
@@ -215,6 +216,7 @@ llvmpipe_begin_query(struct pipe_context *pipe, struct 
pipe_query *q)
   pq->num_primitives_generated = 
llvmpipe->so_stats.primitives_storage_needed;
   break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+   case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
   pq->num_primitives_written = llvmpipe->so_stats.num_primitives_written;
   pq->num_primitives_generated = 
llvmpipe->so_stats.primitives_storage_needed;
   break;
@@ -264,6 +266,7 @@ llvmpipe_end_query(struct pipe_context *pipe, struct 
pipe_query *q)
  llvmpipe->so_stats.primitives_storage_needed - 
pq->num_primitives_generated;
   break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+   case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
   pq->num_primitives_written =
  llvmpipe->so_stats.num_primitives_written - 
pq->num_primitives_written;
   pq->num_primitives_generated =
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c 
b/src/gallium/drivers/llvmpipe/lp_screen.c
index 6c64133b90..32a405088f 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -270,6 +270,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum 
pipe_cap param)
case PIPE_CAP_DOUBLES:
case PIPE_CAP_INT64:
case PIPE_CAP_INT64_DIVMOD:
+   case PIPE_CAP_QUERY_SO_OVERFLOW:
   return 1;
 
case PIPE_CAP_VENDOR_ID:
@@ -357,7 +358,6 @@ llvmpipe_get_param(struct pipe_screen *screen, enum 
pipe_cap param)
case PIPE_CAP_POST_DEPTH_COVERAGE:
case PIPE_CAP_BINDLESS_TEXTURE:
case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
-   case PIPE_CAP_QUERY_SO_OVERFLOW:
case PIPE_CAP_MEMOBJ:
   return 0;
}

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): gallivm: handle call attributes for llvm < 4.0 in lp_add_function_attr

2017-07-21 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: dbde58dd311a77c08d316362f9365b4c0b6852fe
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=dbde58dd311a77c08d316362f9365b4c0b6852fe

Author: Roland Scheidegger 
Date:   Fri Jul 21 20:27:43 2017 +0200

gallivm: handle call attributes for llvm < 4.0 in lp_add_function_attr

We had some caller using LLVMAddInstrAttributes, which couldn't be
converted to lp_add_function_attr, because attributes were only handled
for functions in this case, so fix this.
For llvm >= 4.0, this already works correctly.
(radeonsi seems to avoid setting call site attributes prior to llvm 4.0,
the patch then citing it doesn't work when calling intrinsics. But at
least for calling external functions we always used that, albeit only
for actual call attributes, not call parameter attributes, though some
quick test shows llvm seems to handle that as well. The attribute index
is sort of iffy though, since attribute 0 of the call is the actual function,
attribute 1 corresponds to the first parameter of the called function.)
(Verified with GALLIVM_DEBUG=dumpbc plus llvm-dis that the correct
attributes are shown for calls, both for llvm 4.0 and 3.3.)

Reviewed-by: Jose Fonseca 
Reviewed-by: Brian Paul 

---

 src/gallium/auxiliary/gallivm/lp_bld_intr.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.c 
b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
index 19f98bb781..b92455593f 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_intr.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
@@ -168,10 +168,14 @@ lp_add_function_attr(LLVMValueRef function_or_call,
 
 #if HAVE_LLVM < 0x0400
LLVMAttribute llvm_attr = lp_attr_to_llvm_attr(attr);
-   if (attr_idx == -1) {
-  LLVMAddFunctionAttr(function_or_call, llvm_attr);
+   if (LLVMIsAFunction(function_or_call)) {
+  if (attr_idx == -1) {
+ LLVMAddFunctionAttr(function_or_call, llvm_attr);
+  } else {
+ LLVMAddAttribute(LLVMGetParam(function_or_call, attr_idx - 1), 
llvm_attr);
+  }
} else {
-  LLVMAddAttribute(LLVMGetParam(function_or_call, attr_idx - 1), 
llvm_attr);
+  LLVMAddInstrAttribute(function_or_call, attr_idx, llvm_attr);
}
 #else
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): draw: handle more TGSI_SEMANTIC_COLOR indices

2017-07-07 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 4db72852a16fc4a2a559255f9965e1d02e4f2b9c
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=4db72852a16fc4a2a559255f9965e1d02e4f2b9c

Author: Roland Scheidegger 
Date:   Sat Jul  8 00:14:35 2017 +0200

draw: handle more TGSI_SEMANTIC_COLOR indices

It could only handle indices 0/1, otherwise what happened was bad (accessing
array out of bounds, no crash but kind of random). This is enough for the gl
state tracker (primary/secondary color) but not enough for some other state
trackers (d3d9 has no limits on the number of color interpolants).
The complexity with color semantics are all due to the front/back mapping (2
outputs in the vs map to one input in the fs) so this isn't extended to
indices > 1 - d3d9 has no use for back colors, therefore this isn't needed and
still only 2 back colors can be handled correctly.

Reviewed-by: Brian Paul 

---

 src/gallium/auxiliary/draw/draw_pipe_clip.c  | 17 ++---
 src/gallium/auxiliary/draw/draw_pipe_flatshade.c | 11 +++
 src/gallium/auxiliary/draw/draw_pipe_twoside.c   |  9 ++---
 3 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c 
b/src/gallium/auxiliary/draw/draw_pipe_clip.c
index cf2b41738b..4cfa54b2e1 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_clip.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c
@@ -771,8 +771,9 @@ find_interp(const struct draw_fragment_shader *fs, int 
*indexed_interp,
int interp;
/* If it's gl_{Front,Back}{,Secondary}Color, pick up the mode
 * from the array we've filled before. */
-   if (semantic_name == TGSI_SEMANTIC_COLOR ||
-   semantic_name == TGSI_SEMANTIC_BCOLOR) {
+   if ((semantic_name == TGSI_SEMANTIC_COLOR ||
+semantic_name == TGSI_SEMANTIC_BCOLOR) &&
+   semantic_index < 2) {
   interp = indexed_interp[semantic_index];
} else if (semantic_name == TGSI_SEMANTIC_POSITION ||
   semantic_name == TGSI_SEMANTIC_CLIPVERTEX) {
@@ -851,7 +852,8 @@ clip_init_state(struct draw_stage *stage)
 
if (fs) {
   for (i = 0; i < fs->info.num_inputs; i++) {
- if (fs->info.input_semantic_name[i] == TGSI_SEMANTIC_COLOR) {
+ if (fs->info.input_semantic_name[i] == TGSI_SEMANTIC_COLOR &&
+ fs->info.input_semantic_index[i] < 2) {
 if (fs->info.input_interpolate[i] != TGSI_INTERPOLATE_COLOR)
indexed_interp[fs->info.input_semantic_index[i]] = 
fs->info.input_interpolate[i];
  }
@@ -881,6 +883,15 @@ clip_init_state(struct draw_stage *stage)
  clipper->perspect_attribs[clipper->num_perspect_attribs] = i;
  clipper->num_perspect_attribs++;
  break;
+  case TGSI_INTERPOLATE_COLOR:
+ if (draw->rasterizer->flatshade) {
+clipper->const_attribs[clipper->num_const_attribs] = i;
+clipper->num_const_attribs++;
+ } else {
+clipper->perspect_attribs[clipper->num_perspect_attribs] = i;
+clipper->num_perspect_attribs++;
+ }
+ break;
   default:
  assert(interp == -1);
  break;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c 
b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
index cd285e6f97..2830435b99 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
@@ -170,8 +170,9 @@ find_interp(const struct draw_fragment_shader *fs, int 
*indexed_interp,
int interp;
/* If it's gl_{Front,Back}{,Secondary}Color, pick up the mode
 * from the array we've filled before. */
-   if (semantic_name == TGSI_SEMANTIC_COLOR ||
-   semantic_name == TGSI_SEMANTIC_BCOLOR) {
+   if ((semantic_name == TGSI_SEMANTIC_COLOR ||
+semantic_name == TGSI_SEMANTIC_BCOLOR) &&
+   semantic_index < 2) {
   interp = indexed_interp[semantic_index];
} else {
   /* Otherwise, search in the FS inputs, with a decent default
@@ -216,7 +217,8 @@ static void flatshade_init_state( struct draw_stage *stage )
 
if (fs) {
   for (i = 0; i < fs->info.num_inputs; i++) {
- if (fs->info.input_semantic_name[i] == TGSI_SEMANTIC_COLOR) {
+ if (fs->info.input_semantic_name[i] == TGSI_SEMANTIC_COLOR &&
+ fs->info.input_semantic_index[i] < 2) {
 if (fs->info.input_interpolate[i] != TGSI_INTERPOLATE_COLOR)
indexed_interp[fs->info.input_semantic_index[i]] = 
fs->info.input_interpolate[i];
  }
@@ -236,7 +238,8 @@ static void flatshade_init_state( struct draw_stage *stage )
info->output_semantic_index[i]);
   /* If it's flat, add it to the flat vector. */
 
-  if (interp == TGSI_INTERPOLATE_CONSTANT) {
+  if (interp == TGSI_INTERPOLATE_CONSTANT ||
+   

Mesa (master): llvmpipe: initialize default fb correctly in setup

2017-06-23 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 8bfe451ed30918244618608871423b2a72cf9767
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=8bfe451ed30918244618608871423b2a72cf9767

Author: Roland Scheidegger 
Date:   Fri Jun 23 19:40:18 2017 +0200

llvmpipe: initialize default fb correctly in setup

If lp_setup_bind_framebuffer() is never called, then setup fb x1/y1 was not
correctly initialized. This can happen if there's never a fb set - both
cso and llvmpipe would consider setting this with no cbufs and no zsbuf a
redundant change and therefore it would never get set.
We rely on this setup fb rect being initialized correctly for the tri intersect
tests, throwing away tris which don't intersect. Not initializing it meant
we'd then say it intersected, and we'd try to bin that despite that we have
no actual tiles to bin it to, leading to assertion failures (pretty harmless
since tile 0/0 always exists nevertheless as tiles are statically allocated,
albeit that should change at some point).
(Note probably not an issue with gl state tracker)

Reviewed-by: Jose Fonseca 

---

 src/gallium/drivers/llvmpipe/lp_setup.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c 
b/src/gallium/drivers/llvmpipe/lp_setup.c
index 38d91385cf..32387ab553 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -1347,6 +1347,10 @@ lp_setup_create( struct pipe_context *pipe,

setup->dirty = ~0;
 
+   /* Initialize empty default fb correctly, so the rect is empty */
+   setup->framebuffer.x1 = -1;
+   setup->framebuffer.y1 = -1;
+
return setup;
 
 no_scenes:

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): llvmpipe: fill in debug vertex info for tri rasterization

2017-06-23 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 672d245ffe85e85afe6ddd36868c145bb528c79b
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=672d245ffe85e85afe6ddd36868c145bb528c79b

Author: Roland Scheidegger 
Date:   Fri Jun 23 04:57:57 2017 +0200

llvmpipe: fill in debug vertex info for tri rasterization

This is pretty useful for debugging rasterization issues, so turn it on
based on DEBUG (the actual existence of the fields is also conditionalized
on DEBUG, lines fill it out the same too).

Reviewed-by: Brian Paul 
Reviewed-by: Jose Fonseca 

---

 src/gallium/drivers/llvmpipe/lp_setup_tri.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c 
b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index a7a5d05c32..324e93841f 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -358,7 +358,7 @@ do_triangle_ccw(struct lp_setup_context *setup,
if (!tri)
   return FALSE;
 
-#if 0
+#ifdef DEBUG
tri->v[0][0] = v0[0][0];
tri->v[1][0] = v1[0][0];
tri->v[2][0] = v2[0][0];

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): llvmpipe:fix using 32bit rasterization mistakenly, causing overflows

2017-06-23 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: c7688d2de5bb0861965e6e7b76a396ab6eec253f
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=c7688d2de5bb0861965e6e7b76a396ab6eec253f

Author: Roland Scheidegger 
Date:   Fri Jun 23 19:35:50 2017 +0200

llvmpipe:fix using 32bit rasterization mistakenly, causing overflows

We use the bounding box (triangle extents) to figure out if 32bit rasterization
could potentially overflow. However, we used the bounding box which already got
rounded up to 0 for negative coords for this, which is incorrect, leading to
overflows and hence bogus rendering in some of our private use.

It might be possible to simplify this somehow (we're now using 3 different
boxes for binning) but I don't quite see how.

Reviewed-by: Brian Paul 
Reviewed-by: Jose Fonseca 

---

 src/gallium/drivers/llvmpipe/lp_setup_context.h | 11 ---
 src/gallium/drivers/llvmpipe/lp_setup_line.c| 20 ++--
 src/gallium/drivers/llvmpipe/lp_setup_point.c   |  2 +-
 src/gallium/drivers/llvmpipe/lp_setup_tri.c | 41 -
 4 files changed, 43 insertions(+), 31 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h 
b/src/gallium/drivers/llvmpipe/lp_setup_context.h
index 9714691270..4b55fd922c 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h
@@ -215,10 +215,11 @@ lp_setup_alloc_triangle(struct lp_scene *scene,
 unsigned *tri_size);
 
 boolean
-lp_setup_bin_triangle( struct lp_setup_context *setup,
-   struct lp_rast_triangle *tri,
-   const struct u_rect *bbox,
-   int nr_planes,
-   unsigned scissor_index );
+lp_setup_bin_triangle(struct lp_setup_context *setup,
+  struct lp_rast_triangle *tri,
+  const struct u_rect *bboxorig,
+  const struct u_rect *bbox,
+  int nr_planes,
+  unsigned scissor_index);
 
 #endif
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c 
b/src/gallium/drivers/llvmpipe/lp_setup_line.c
index 018130c319..d0bac5efb9 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_line.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c
@@ -288,7 +288,9 @@ try_setup_line( struct lp_setup_context *setup,
struct lp_rast_plane *plane;
struct lp_line_info info;
float width = MAX2(1.0, setup->line_width);
-   struct u_rect bbox;
+   const struct u_rect *scissor;
+   struct u_rect bbox, bboxpos;
+   boolean s_planes[4];
unsigned tri_bytes;
int x[4]; 
int y[4];
@@ -579,10 +581,12 @@ try_setup_line( struct lp_setup_context *setup,
   return TRUE;
}
 
+   bboxpos = bbox;
+
/* Can safely discard negative regions:
 */
-   bbox.x0 = MAX2(bbox.x0, 0);
-   bbox.y0 = MAX2(bbox.y0, 0);
+   bboxpos.x0 = MAX2(bboxpos.x0, 0);
+   bboxpos.y0 = MAX2(bboxpos.y0, 0);
 
nr_planes = 4;
/*
@@ -591,8 +595,8 @@ try_setup_line( struct lp_setup_context *setup,
 */
if (setup->scissor_test) {
   /* why not just use draw_regions */
-  boolean s_planes[4];
-  scissor_planes_needed(s_planes, &bbox, &setup->scissors[viewport_index]);
+  scissor = &setup->scissors[viewport_index];
+  scissor_planes_needed(s_planes, &bboxpos, scissor);
   nr_planes += s_planes[0] + s_planes[1] + s_planes[2] + s_planes[3];
}
 
@@ -718,11 +722,7 @@ try_setup_line( struct lp_setup_context *setup,
 * (easier to evaluate) to ordinary planes.)
 */
if (nr_planes > 4) {
-  /* why not just use draw_regions */
-  const struct u_rect *scissor = &setup->scissors[viewport_index];
   struct lp_rast_plane *plane_s = &plane[4];
-  boolean s_planes[4];
-  scissor_planes_needed(s_planes, &bbox, scissor);
 
   if (s_planes[0]) {
  plane_s->dcdx = -1 << 8;
@@ -755,7 +755,7 @@ try_setup_line( struct lp_setup_context *setup,
   assert(plane_s == &plane[nr_planes]);
}
 
-   return lp_setup_bin_triangle(setup, line, &bbox, nr_planes, viewport_index);
+   return lp_setup_bin_triangle(setup, line, &bbox, &bboxpos, nr_planes, 
viewport_index);
 }
 
 
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c 
b/src/gallium/drivers/llvmpipe/lp_setup_point.c
index ddb6f0e73b..8cb6b83f91 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_point.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c
@@ -513,7 +513,7 @@ try_setup_point( struct lp_setup_context *setup,
   plane[3].eo = 0;
}
 
-   return lp_setup_bin_triangle(setup, point, &bbox, nr_planes, 
viewport_index);
+   return lp_setup_bin_triangle(setup, point, &bbox, &bbox, nr_planes, 
viewport_index);
 }
 
 
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c 
b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index 324e93841f..39755d6b58 100644
--- a/src/gallium/drive

Mesa (master): gallium: fixed modulo zero crashes in tgsi interpreter (v2)

2017-06-10 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: f3c0bbe18ac65d22b2630f89fc1628bfe79695d4
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=f3c0bbe18ac65d22b2630f89fc1628bfe79695d4

Author: Marius Gräfe 
Date:   Fri Jun  9 15:39:00 2017 +0200

gallium: fixed modulo zero crashes in tgsi interpreter (v2)

softpipe throws integer division by zero exceptions on windows
when using % with integers in a geometry shader.

v2: Made error results consistent with existing div/mod zero handling in
tgsi. 64 bit signed integer division by zero returns zero like in
micro_idiv, unsigned returns ~0u like in micro_udiv.
Modulo operations always set all result bits to one (like in
micro_umod).

Reviewed-by: Roland Scheidegger 

---

 src/gallium/auxiliary/tgsi/tgsi_exec.c | 40 +-
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c 
b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index c41954cbf7..97c75e999c 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -846,40 +846,40 @@ static void
 micro_u64div(union tgsi_double_channel *dst,
  const union tgsi_double_channel *src)
 {
-   dst->u64[0] = src[0].u64[0] / src[1].u64[0];
-   dst->u64[1] = src[0].u64[1] / src[1].u64[1];
-   dst->u64[2] = src[0].u64[2] / src[1].u64[2];
-   dst->u64[3] = src[0].u64[3] / src[1].u64[3];
+   dst->u64[0] = src[1].u64[0] ? src[0].u64[0] / src[1].u64[0] : ~0ull;
+   dst->u64[1] = src[1].u64[1] ? src[0].u64[1] / src[1].u64[1] : ~0ull;
+   dst->u64[2] = src[1].u64[2] ? src[0].u64[2] / src[1].u64[2] : ~0ull;
+   dst->u64[3] = src[1].u64[3] ? src[0].u64[3] / src[1].u64[3] : ~0ull;
 }
 
 static void
 micro_i64div(union tgsi_double_channel *dst,
  const union tgsi_double_channel *src)
 {
-   dst->i64[0] = src[0].i64[0] / src[1].i64[0];
-   dst->i64[1] = src[0].i64[1] / src[1].i64[1];
-   dst->i64[2] = src[0].i64[2] / src[1].i64[2];
-   dst->i64[3] = src[0].i64[3] / src[1].i64[3];
+   dst->i64[0] = src[1].i64[0] ? src[0].i64[0] / src[1].i64[0] : 0;
+   dst->i64[1] = src[1].i64[1] ? src[0].i64[1] / src[1].i64[1] : 0;
+   dst->i64[2] = src[1].i64[2] ? src[0].i64[2] / src[1].i64[2] : 0;
+   dst->i64[3] = src[1].i64[3] ? src[0].i64[3] / src[1].i64[3] : 0;
 }
 
 static void
 micro_u64mod(union tgsi_double_channel *dst,
  const union tgsi_double_channel *src)
 {
-   dst->u64[0] = src[0].u64[0] % src[1].u64[0];
-   dst->u64[1] = src[0].u64[1] % src[1].u64[1];
-   dst->u64[2] = src[0].u64[2] % src[1].u64[2];
-   dst->u64[3] = src[0].u64[3] % src[1].u64[3];
+   dst->u64[0] = src[1].u64[0] ? src[0].u64[0] % src[1].u64[0] : ~0ull;
+   dst->u64[1] = src[1].u64[1] ? src[0].u64[1] % src[1].u64[1] : ~0ull;
+   dst->u64[2] = src[1].u64[2] ? src[0].u64[2] % src[1].u64[2] : ~0ull;
+   dst->u64[3] = src[1].u64[3] ? src[0].u64[3] % src[1].u64[3] : ~0ull;
 }
 
 static void
 micro_i64mod(union tgsi_double_channel *dst,
  const union tgsi_double_channel *src)
 {
-   dst->i64[0] = src[0].i64[0] % src[1].i64[0];
-   dst->i64[1] = src[0].i64[1] % src[1].i64[1];
-   dst->i64[2] = src[0].i64[2] % src[1].i64[2];
-   dst->i64[3] = src[0].i64[3] % src[1].i64[3];
+   dst->i64[0] = src[1].i64[0] ? src[0].i64[0] % src[1].i64[0] : ~0ll;
+   dst->i64[1] = src[1].i64[1] ? src[0].i64[1] % src[1].i64[1] : ~0ll;
+   dst->i64[2] = src[1].i64[2] ? src[0].i64[2] % src[1].i64[2] : ~0ll;
+   dst->i64[3] = src[1].i64[3] ? src[0].i64[3] % src[1].i64[3] : ~0ll;
 }
 
 static void
@@ -4653,10 +4653,10 @@ micro_mod(union tgsi_exec_channel *dst,
   const union tgsi_exec_channel *src0,
   const union tgsi_exec_channel *src1)
 {
-   dst->i[0] = src0->i[0] % src1->i[0];
-   dst->i[1] = src0->i[1] % src1->i[1];
-   dst->i[2] = src0->i[2] % src1->i[2];
-   dst->i[3] = src0->i[3] % src1->i[3];
+   dst->i[0] = src1->i[0] ? src0->i[0] % src1->i[0] : ~0;
+   dst->i[1] = src1->i[1] ? src0->i[1] % src1->i[1] : ~0;
+   dst->i[2] = src1->i[2] ? src0->i[2] % src1->i[2] : ~0;
+   dst->i[3] = src1->i[3] ? src0->i[3] % src1->i[3] : ~0;
 }
 
 static void

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): llvmpipe: add LP_NEW_GS flag for updating vertex info

2017-05-27 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: d2724fe5bddb1ca9cb61c79ddfe78a09b92eebc5
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=d2724fe5bddb1ca9cb61c79ddfe78a09b92eebc5

Author: Roland Scheidegger 
Date:   Sat May 27 04:34:14 2017 +0200

llvmpipe: add LP_NEW_GS flag for updating vertex info

The vertex information we compute here is really dependent on the last
stage before FS. It just happened to work most of the time because new
GS tend to come with new VS and/or FS...
(The LP_NEW_GS flag was previously set but never used.)

Reviewed-by: Brian Paul 
Reviewed-by: Jose Fonseca 

---

 src/gallium/drivers/llvmpipe/lp_state_derived.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c 
b/src/gallium/drivers/llvmpipe/lp_state_derived.c
index fa9d4fb2fd..3e75d44dac 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_derived.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c
@@ -194,6 +194,7 @@ void llvmpipe_update_derived( struct llvmpipe_context 
*llvmpipe )
/* This needs LP_NEW_RASTERIZER because of draw_prepare_shader_outputs(). */
if (llvmpipe->dirty & (LP_NEW_RASTERIZER |
   LP_NEW_FS |
+  LP_NEW_GS |
   LP_NEW_VS))
   compute_vertex_info(llvmpipe);
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): gallivm: (trivial) remove duplicated line

2017-03-15 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: e1f9e9bafdb6da44c6bd6be8414913e481f8b031
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=e1f9e9bafdb6da44c6bd6be8414913e481f8b031

Author: Roland Scheidegger 
Date:   Thu Mar 16 04:01:41 2017 +0100

gallivm: (trivial) remove duplicated line

pointed out by clang (stored value never read)

---

 src/gallium/auxiliary/gallivm/lp_bld_format_soa.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c 
b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
index cd17040..98eb694 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
@@ -505,7 +505,6 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
* First, figure out fetch order.
*/
   fetch_width = util_next_power_of_two(format_desc->block.bits);
-  num_gather = fetch_width / type.width;
   /*
* fp64 are treated like fp32 except we fetch twice wide values
* (as we shuffle after trunc). The shuffles for that work out

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): draw: (trivial) remove a unnecessary lp_build_alloca()

2017-03-15 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 9d104dfd55afa4477fcc4037b992a8c99ac97431
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=9d104dfd55afa4477fcc4037b992a8c99ac97431

Author: Roland Scheidegger 
Date:   Thu Mar 16 03:59:52 2017 +0100

draw: (trivial) remove a unnecessary lp_build_alloca()

pointed out by clang (stored value never read)

---

 src/gallium/auxiliary/draw/draw_llvm.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_llvm.c 
b/src/gallium/auxiliary/draw/draw_llvm.c
index 104965b..bb08f66 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -1670,8 +1670,6 @@ draw_llvm_generate(struct draw_llvm *llvm, struct 
draw_llvm_variant *variant)
   ind_vec = LLVMBuildInsertElement(builder, ind_vec, index, index, "");
}
 
-   fetch_max = lp_build_alloca(gallivm, int32_type, "fetch_max");
-
have_elts = LLVMBuildICmp(builder, LLVMIntNE,
  LLVMConstPointerNull(arg_types[10]), fetch_elts, 
"");
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): gallium/util: (trivial) fix util_clear_render_target

2017-02-24 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: c3a94d9195bff3a870d5a78dd53bd69c26eb23af
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=c3a94d9195bff3a870d5a78dd53bd69c26eb23af

Author: Roland Scheidegger 
Date:   Fri Feb 24 18:40:40 2017 +0100

gallium/util: (trivial) fix util_clear_render_target

the format of the rt can be different than the one of the texture, so must
propagate the format explicitly to the helper. Broken since
3f9c5d62441eba38e8b1592aba965ed5db6fd89b (but unused by st/mesa).

---

 src/gallium/auxiliary/util/u_surface.c | 15 ---
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_surface.c 
b/src/gallium/auxiliary/util/u_surface.c
index f2a471d..5abf966 100644
--- a/src/gallium/auxiliary/util/u_surface.c
+++ b/src/gallium/auxiliary/util/u_surface.c
@@ -423,6 +423,7 @@ util_clear_color_texture_helper(struct pipe_transfer 
*dst_trans,
 static void
 util_clear_color_texture(struct pipe_context *pipe,
  struct pipe_resource *texture,
+ enum pipe_format format,
  const union pipe_color_union *color,
  unsigned level,
  unsigned dstx, unsigned dsty, unsigned dstz,
@@ -430,7 +431,6 @@ util_clear_color_texture(struct pipe_context *pipe,
 {
struct pipe_transfer *dst_trans;
ubyte *dst_map;
-   enum pipe_format format = texture->format;
 
dst_map = pipe_transfer_map_3d(pipe,
   texture,
@@ -491,16 +491,16 @@ util_clear_render_target(struct pipe_context *pipe,
   dx, 0, w, 1,
   &dst_trans);
   if (dst_map) {
- util_clear_color_texture_helper(dst_trans, dst_map, dst->format, 
color,
- width, height, 1);
+ util_clear_color_texture_helper(dst_trans, dst_map, dst->format,
+ color, width, height, 1);
  pipe->transfer_unmap(pipe, dst_trans);
   }
}
else {
   unsigned depth = dst->u.tex.last_layer - dst->u.tex.first_layer + 1;
-  util_clear_color_texture(pipe, dst->texture, color, dst->u.tex.level,
-   dstx, dsty, dst->u.tex.first_layer,
-   width, height, depth);
+  util_clear_color_texture(pipe, dst->texture, dst->format, color,
+   dst->u.tex.level, dstx, dsty,
+   dst->u.tex.first_layer, width, height, depth);
}
 }
 
@@ -674,7 +674,8 @@ util_clear_texture(struct pipe_context *pipe,
   else
  desc->unpack_rgba_float(color.f, 0, data, 0, 1, 1);
 
-  util_clear_color_texture(pipe, tex, &color, level, box->x, box->y, 
box->z,
+  util_clear_color_texture(pipe, tex, tex->format, &color, level,
+   box->x, box->y, box->z,
box->width, box->height, box->depth);
}
 }

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): gallium: implement util_clear_texture

2017-02-24 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 3f9c5d62441eba38e8b1592aba965ed5db6fd89b
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=3f9c5d62441eba38e8b1592aba965ed5db6fd89b

Author: Lars Hamre 
Date:   Wed Feb 22 10:56:41 2017 -0500

gallium: implement util_clear_texture

v3: have util_clear_texture mirror the pipe function (Roland Scheidegger)
v2: rework util clear functions such that they operate on a resource
instead of a surface (Roland Scheidegger)

Creates a util_clear_texture function for implementing the GL_ARB_clear_texture
in softpipe and llvmpipe.

Signed-off-by: Lars Hamre 
Reviewed-by: Roland Scheidegger 
Reviewed-by: Edward O'Callaghan 

---

 src/gallium/auxiliary/util/u_surface.c | 386 -
 src/gallium/auxiliary/util/u_surface.h |   7 +
 2 files changed, 248 insertions(+), 145 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_surface.c 
b/src/gallium/auxiliary/util/u_surface.c
index a9ed006..f2a471d 100644
--- a/src/gallium/auxiliary/util/u_surface.c
+++ b/src/gallium/auxiliary/util/u_surface.c
@@ -388,6 +388,66 @@ no_src_map:
;
 }
 
+static void
+util_clear_color_texture_helper(struct pipe_transfer *dst_trans,
+ubyte *dst_map,
+enum pipe_format format,
+const union pipe_color_union *color,
+unsigned width, unsigned height, unsigned 
depth)
+{
+   union util_color uc;
+
+   assert(dst_trans->stride > 0);
+
+   if (util_format_is_pure_integer(format)) {
+  /*
+   * We expect int/uint clear values here, though some APIs
+   * might disagree (but in any case util_pack_color()
+   * couldn't handle it)...
+   */
+  if (util_format_is_pure_sint(format)) {
+ util_format_write_4i(format, color->i, 0, &uc, 0, 0, 0, 1, 1);
+  } else {
+ assert(util_format_is_pure_uint(format));
+ util_format_write_4ui(format, color->ui, 0, &uc, 0, 0, 0, 1, 1);
+  }
+   } else {
+  util_pack_color(color->f, format, &uc);
+   }
+
+   util_fill_box(dst_map, format,
+ dst_trans->stride, dst_trans->layer_stride,
+ 0, 0, 0, width, height, depth, &uc);
+}
+
+static void
+util_clear_color_texture(struct pipe_context *pipe,
+ struct pipe_resource *texture,
+ const union pipe_color_union *color,
+ unsigned level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ unsigned width, unsigned height, unsigned depth)
+{
+   struct pipe_transfer *dst_trans;
+   ubyte *dst_map;
+   enum pipe_format format = texture->format;
+
+   dst_map = pipe_transfer_map_3d(pipe,
+  texture,
+  level,
+  PIPE_TRANSFER_WRITE,
+  dstx, dsty, dstz,
+  width, height, depth,
+  &dst_trans);
+   if (!dst_map)
+  return;
+
+   if (dst_trans->stride > 0) {
+  util_clear_color_texture_helper(dst_trans, dst_map, format, color,
+  width, height, depth);
+   }
+   pipe->transfer_unmap(pipe, dst_trans);
+}
 
 
 #define UBYTE_TO_USHORT(B) ((B) | ((B) << 8))
@@ -410,8 +470,6 @@ util_clear_render_target(struct pipe_context *pipe,
 {
struct pipe_transfer *dst_trans;
ubyte *dst_map;
-   union util_color uc;
-   unsigned max_layer;
 
assert(dst->texture);
if (!dst->texture)
@@ -426,56 +484,202 @@ util_clear_render_target(struct pipe_context *pipe,
   unsigned pixstride = util_format_get_blocksize(dst->format);
   dx = (dst->u.buf.first_element + dstx) * pixstride;
   w = width * pixstride;
-  max_layer = 0;
   dst_map = pipe_transfer_map(pipe,
   dst->texture,
   0, 0,
   PIPE_TRANSFER_WRITE,
   dx, 0, w, 1,
   &dst_trans);
+  if (dst_map) {
+ util_clear_color_texture_helper(dst_trans, dst_map, dst->format, 
color,
+ width, height, 1);
+ pipe->transfer_unmap(pipe, dst_trans);
+  }
}
else {
-  max_layer = dst->u.tex.last_layer - dst->u.tex.first_layer;
-  dst_map = pipe_transfer_map_3d(pipe,
- dst->texture,
- dst->u.tex.level,
- PIPE_TRANSFER_WRITE,
- dstx, dsty, dst->u.tex.first_layer,
- width, height, max_layer + 1, &dst_trans);
+  unsigned depth = dst->u.tex.

Mesa (master): docs: update features.txt for GL_ARB_clear_texture with llvmpipe and softpipe

2017-02-24 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: caf4252a01dc95fbc47e8a50988040f67396b278
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=caf4252a01dc95fbc47e8a50988040f67396b278

Author: Lars Hamre 
Date:   Wed Feb 22 10:56:44 2017 -0500

docs: update features.txt for GL_ARB_clear_texture with llvmpipe and softpipe

Signed-off-by: Lars Hamre 
Reviewed-by: Roland Scheidegger 
Reviewed-by: Edward O'Callaghan 

---

 docs/features.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/features.txt b/docs/features.txt
index 346ba28..d9528e9 100644
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -192,7 +192,7 @@ GL 4.4, GLSL 4.40 -- all DONE: i965/gen8+, nvc0, radeonsi
 
   GL_MAX_VERTEX_ATTRIB_STRIDE   DONE (all drivers)
   GL_ARB_buffer_storage DONE (i965, nv50, r600)
-  GL_ARB_clear_texture  DONE (i965, nv50, r600)
+  GL_ARB_clear_texture  DONE (i965, nv50, 
r600, llvmpipe, softpipe)
   GL_ARB_enhanced_layouts   DONE (i965, nv50, 
llvmpipe, softpipe)
   - compile-time constant expressions   DONE
   - explicit byte offsets for blocksDONE

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): llvmpipe: enable clear_texture with util_clear_texture

2017-02-24 Thread Roland Scheidegger
Module: Mesa
Branch: master
Commit: 12f2058b47c51f1357b622e77c703d5eb05bce50
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=12f2058b47c51f1357b622e77c703d5eb05bce50

Author: Lars Hamre 
Date:   Wed Feb 22 10:56:42 2017 -0500

llvmpipe: enable clear_texture with util_clear_texture

Passes all corresponding piglit tests.

Signed-off-by: Lars Hamre 
Reviewed-by: Roland Scheidegger 
Reviewed-by: Edward O'Callaghan 

---

 src/gallium/drivers/llvmpipe/lp_screen.c  | 3 ++-
 src/gallium/drivers/llvmpipe/lp_surface.c | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c 
b/src/gallium/drivers/llvmpipe/lp_screen.c
index 76a30a6..2633b0c 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -307,6 +307,8 @@ llvmpipe_get_param(struct pipe_screen *screen, enum 
pipe_cap param)
   return 1;
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
   return 1;
+   case PIPE_CAP_CLEAR_TEXTURE:
+  return 1;
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
@@ -315,7 +317,6 @@ llvmpipe_get_param(struct pipe_screen *screen, enum 
pipe_cap param)
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
-   case PIPE_CAP_CLEAR_TEXTURE:
case PIPE_CAP_DRAW_PARAMETERS:
case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
case PIPE_CAP_MULTI_DRAW_INDIRECT:
diff --git a/src/gallium/drivers/llvmpipe/lp_surface.c 
b/src/gallium/drivers/llvmpipe/lp_surface.c
index 784db7f..953b26e 100644
--- a/src/gallium/drivers/llvmpipe/lp_surface.c
+++ b/src/gallium/drivers/llvmpipe/lp_surface.c
@@ -231,7 +231,8 @@ llvmpipe_init_surface_functions(struct llvmpipe_context *lp)
lp->pipe.clear_depth_stencil = llvmpipe_clear_depth_stencil;
lp->pipe.create_surface = llvmpipe_create_surface;
lp->pipe.surface_destroy = llvmpipe_surface_destroy;
-   /* These two are not actually functions dealing with surfaces */
+   /* These are not actually functions dealing with surfaces */
+   lp->pipe.clear_texture = util_clear_texture;
lp->pipe.resource_copy_region = lp_resource_copy;
lp->pipe.blit = lp_blit;
lp->pipe.flush_resource = lp_flush_resource;

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


  1   2   3   4   5   6   7   8   9   10   >