[Mesa-dev] [PATCH 3/9] nir/opt_peephole_select: Don't peephole_select expensive math instructions

2018-08-29 Thread Ian Romanick
From: Ian Romanick 

On some GPUs, especially older Intel GPUs, some math instructions are
very expensive.  On those architectures, don't reduce flow control to a
csel if one of the branches contains one of these expensive math
instructions.

This prevents a bunch of cycle count regressions on pre-Gen6 platforms
with a later patch (intel/compiler: More peephole select for pre-Gen6).

Signed-off-by: Ian Romanick 
---
 src/amd/vulkan/radv_shader.c |  2 +-
 src/broadcom/compiler/nir_to_vir.c   |  2 +-
 src/compiler/nir/nir.h   |  2 +-
 src/compiler/nir/nir_opt_peephole_select.c   | 46 +++-
 src/gallium/drivers/freedreno/ir3/ir3_nir.c  |  2 +-
 src/gallium/drivers/radeonsi/si_shader_nir.c |  2 +-
 src/gallium/drivers/vc4/vc4_program.c|  2 +-
 src/intel/compiler/brw_nir.c |  4 +--
 src/mesa/state_tracker/st_glsl_to_nir.cpp|  2 +-
 9 files changed, 47 insertions(+), 17 deletions(-)

diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 632512db09b..c8d502a9e3a 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -143,7 +143,7 @@ radv_optimize_nir(struct nir_shader *shader, bool 
optimize_conservatively)
 NIR_PASS(progress, shader, nir_opt_if);
 NIR_PASS(progress, shader, nir_opt_dead_cf);
 NIR_PASS(progress, shader, nir_opt_cse);
-NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true);
+NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true, 
true);
 NIR_PASS(progress, shader, nir_opt_algebraic);
 NIR_PASS(progress, shader, nir_opt_constant_folding);
 NIR_PASS(progress, shader, nir_opt_undef);
diff --git a/src/broadcom/compiler/nir_to_vir.c 
b/src/broadcom/compiler/nir_to_vir.c
index 0d23cea4d5b..ec0ff4b907a 100644
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -1210,7 +1210,7 @@ v3d_optimize_nir(struct nir_shader *s)
 NIR_PASS(progress, s, nir_opt_dce);
 NIR_PASS(progress, s, nir_opt_dead_cf);
 NIR_PASS(progress, s, nir_opt_cse);
-NIR_PASS(progress, s, nir_opt_peephole_select, 8, true);
+NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
 NIR_PASS(progress, s, nir_opt_algebraic);
 NIR_PASS(progress, s, nir_opt_constant_folding);
 NIR_PASS(progress, s, nir_opt_undef);
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 67fa46d5557..feb69be6b59 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -3003,7 +3003,7 @@ bool nir_opt_move_comparisons(nir_shader *shader);
 bool nir_opt_move_load_ubo(nir_shader *shader);
 
 bool nir_opt_peephole_select(nir_shader *shader, unsigned limit,
- bool indirect_load_ok);
+ bool indirect_load_ok, bool expensive_alu_ok);
 
 bool nir_opt_remove_phis_impl(nir_function_impl *impl);
 bool nir_opt_remove_phis(nir_shader *shader);
diff --git a/src/compiler/nir/nir_opt_peephole_select.c 
b/src/compiler/nir/nir_opt_peephole_select.c
index 6808d3eda6c..09b55f3739e 100644
--- a/src/compiler/nir/nir_opt_peephole_select.c
+++ b/src/compiler/nir/nir_opt_peephole_select.c
@@ -59,7 +59,8 @@
 
 static bool
 block_check_for_allowed_instrs(nir_block *block, unsigned *count,
-   bool alu_ok, bool indirect_load_ok)
+   bool alu_ok, bool indirect_load_ok,
+   bool expensive_alu_ok)
 {
nir_foreach_instr(instr, block) {
   switch (instr->type) {
@@ -117,6 +118,25 @@ block_check_for_allowed_instrs(nir_block *block, unsigned 
*count,
  case nir_op_vec3:
  case nir_op_vec4:
 break;
+
+ case nir_op_fcos:
+ case nir_op_fdiv:
+ case nir_op_fexp2:
+ case nir_op_flog2:
+ case nir_op_fmod:
+ case nir_op_fpow:
+ case nir_op_frcp:
+ case nir_op_frem:
+ case nir_op_frsq:
+ case nir_op_fsin:
+ case nir_op_idiv:
+ case nir_op_irem:
+ case nir_op_udiv:
+if (!alu_ok || !expensive_alu_ok)
+   return false;
+
+break;
+
  default:
 if (!alu_ok) {
/* It must be a move-like operation. */
@@ -160,7 +180,8 @@ block_check_for_allowed_instrs(nir_block *block, unsigned 
*count,
 
 static bool
 nir_opt_peephole_select_block(nir_block *block, nir_shader *shader,
-  unsigned limit, bool indirect_load_ok)
+  unsigned limit, bool indirect_load_ok,
+  bool expensive_alu_ok)
 {
if (nir_cf_node_is_first(>cf_node))
   return false;
@@ -180,10 +201,17 @@ nir_opt_peephole_select_block(nir_block *block, 
nir_shader 

[Mesa-dev] [PATCH 8/9 v2] nir: Add partial redundancy elimination for compares

2018-08-29 Thread Ian Romanick
From: Ian Romanick 

This pass attempts to dectect code sequences like

if (x < y) {
z = y - x;
...
}

and replace them with sequences like

t = x - y;
if (t < 0) {
z = t;
...
}

On architectures where the subtract can generate the flags used by the
if-statement, this saves an instruction.  It's also possible that moving
an instruction out of the if-statement will allow
nir_opt_peephole_select to convert the whole thing to a bcsel.

Currently only floating point compares and adds are supported.  Adding
support for integer will be a challenge due to integer overflow.  There
are a couple possible solutions, but they may not apply to all
architectures.

v2: Fix a typo in the commit message and a couple typos in comments.
Fix possible NULL pointer deref from result of push_block().  Add
missing (-A + B) case.  Suggested by Caio.

Signed-off-by: Ian Romanick 
---
 src/compiler/Makefile.sources |   1 +
 src/compiler/nir/meson.build  |   1 +
 src/compiler/nir/nir.h|   2 +
 src/compiler/nir/nir_opt_comparison_pre.c | 360 ++
 src/compiler/nir/nir_search_helpers.h |  29 +++
 5 files changed, 393 insertions(+)
 create mode 100644 src/compiler/nir/nir_opt_comparison_pre.c

diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index d3b06564832..9fe8d5b8904 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -267,6 +267,7 @@ NIR_FILES = \
nir/nir_move_load_const.c \
nir/nir_move_vec_src_uses_to_dest.c \
nir/nir_normalize_cubemap_coords.c \
+   nir/nir_opt_comparison_pre.c \
nir/nir_opt_conditional_discard.c \
nir/nir_opt_constant_folding.c \
nir/nir_opt_copy_prop_vars.c \
diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
index 5438c17a8f8..2bcc854829e 100644
--- a/src/compiler/nir/meson.build
+++ b/src/compiler/nir/meson.build
@@ -151,6 +151,7 @@ files_libnir = files(
   'nir_move_load_const.c',
   'nir_move_vec_src_uses_to_dest.c',
   'nir_normalize_cubemap_coords.c',
+  'nir_opt_comparison_pre.c',
   'nir_opt_conditional_discard.c',
   'nir_opt_constant_folding.c',
   'nir_opt_copy_prop_vars.c',
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index ddbcb3c647e..c78387d0acf 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -3000,6 +3000,8 @@ bool nir_convert_from_ssa(nir_shader *shader, bool 
phi_webs_only);
 bool nir_lower_phis_to_regs_block(nir_block *block);
 bool nir_lower_ssa_defs_to_regs_block(nir_block *block);
 
+bool nir_opt_comparison_pre(nir_shader *shader);
+
 bool nir_opt_algebraic(nir_shader *shader);
 bool nir_opt_algebraic_before_ffma(nir_shader *shader);
 bool nir_opt_algebraic_late(nir_shader *shader);
diff --git a/src/compiler/nir/nir_opt_comparison_pre.c 
b/src/compiler/nir/nir_opt_comparison_pre.c
new file mode 100644
index 000..b2827c21816
--- /dev/null
+++ b/src/compiler/nir/nir_opt_comparison_pre.c
@@ -0,0 +1,360 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir_instr_set.h"
+#include "nir_search_helpers.h"
+#include "nir_builder.h"
+#include "util/u_vector.h"
+
+/* Partial redundancy elimination of compares
+ *
+ * Seaches for comparisons of the form 'a cmp b' that dominate arithmetic
+ * instructions like 'b - a'.  The comparison is replaced by the arithmetic
+ * instruction, and the result is compared with zero.  For example,
+ *
+ *   vec1 32 ssa_111 = flt 0.37, ssa_110.w
+ *   if ssa_111 {
+ *   block block_1:
+ *  vec1 32 ssa_112 = fadd ssa_110.w, -0.37
+ *  ...
+ *
+ * becomes
+ *
+ *   vec1 32 ssa_111 = fadd ssa_110.w, -0.37
+ *   vec1 32 ssa_112 = flt 0.0, ssa_111
+ *   if ssa_112 {
+ *   block block_1:
+ 

[Mesa-dev] [PATCH 6/9] nir: Add nir_const_value_negative_equal

2018-08-29 Thread Ian Romanick
From: Ian Romanick 

Signed-off-by: Ian Romanick 
---
 src/compiler/nir/meson.build|  12 +
 src/compiler/nir/nir.h  |   6 +
 src/compiler/nir/nir_instr_set.c|  98 +
 src/compiler/nir/tests/negative_equal_tests.cpp | 278 
 4 files changed, 394 insertions(+)
 create mode 100644 src/compiler/nir/tests/negative_equal_tests.cpp

diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
index 090aa7a628f..5438c17a8f8 100644
--- a/src/compiler/nir/meson.build
+++ b/src/compiler/nir/meson.build
@@ -245,4 +245,16 @@ if with_tests
   link_with : libmesa_util,
 )
   )
+
+  test(
+'negative_equal',
+executable(
+  'negative_equal',
+  files('tests/negative_equal_tests.cpp'),
+  c_args : [c_vis_args, c_msvc_compat_args, no_override_init_args],
+  include_directories : [inc_common],
+  dependencies : [dep_thread, idep_gtest, idep_nir],
+  link_with : libmesa_util,
+)
+  )
 endif
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 9bca6d487e9..f94538e0782 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -955,6 +955,12 @@ nir_ssa_alu_instr_src_components(const nir_alu_instr 
*instr, unsigned src)
return instr->dest.dest.ssa.num_components;
 }
 
+bool nir_const_value_negative_equal(const nir_const_value *c1,
+const nir_const_value *c2,
+unsigned components,
+nir_alu_type base_type,
+unsigned bits);
+
 bool nir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2,
 unsigned src1, unsigned src2);
 
diff --git a/src/compiler/nir/nir_instr_set.c b/src/compiler/nir/nir_instr_set.c
index 19771fcd9dd..009d9661e60 100644
--- a/src/compiler/nir/nir_instr_set.c
+++ b/src/compiler/nir/nir_instr_set.c
@@ -23,6 +23,7 @@
 
 #include "nir_instr_set.h"
 #include "nir_vla.h"
+#include "util/half_float.h"
 
 #define HASH(hash, data) _mesa_fnv32_1a_accumulate((hash), (data))
 
@@ -261,6 +262,103 @@ nir_srcs_equal(nir_src src1, nir_src src2)
}
 }
 
+bool
+nir_const_value_negative_equal(const nir_const_value *c1,
+   const nir_const_value *c2,
+   unsigned components,
+   nir_alu_type base_type,
+   unsigned bits)
+{
+   assert(base_type == nir_alu_type_get_base_type(base_type));
+   assert(base_type != nir_type_invalid);
+
+   switch (base_type) {
+   case nir_type_float:
+  switch (bits) {
+  case 16:
+ for (unsigned i = 0; i < components; i++) {
+if (_mesa_half_to_float(c1->u16[i]) !=
+-_mesa_half_to_float(c2->u16[i])) {
+   return false;
+}
+ }
+
+ return true;
+
+  case 32:
+ for (unsigned i = 0; i < components; i++) {
+if (c1->f32[i] != -c2->f32[i])
+   return false;
+ }
+
+ return true;
+
+  case 64:
+ for (unsigned i = 0; i < components; i++) {
+if (c1->f64[i] != -c2->f64[i])
+   return false;
+ }
+
+ return true;
+
+  default:
+ unreachable("unknown bit size");
+  }
+
+  break;
+
+   case nir_type_int:
+   case nir_type_uint:
+  switch (bits) {
+  case 8:
+ for (unsigned i = 0; i < components; i++) {
+if (c1->i8[i] != -c2->i8[i])
+   return false;
+ }
+
+ return true;
+
+  case 16:
+ for (unsigned i = 0; i < components; i++) {
+if (c1->i16[i] != -c2->i16[i])
+   return false;
+ }
+
+ return true;
+ break;
+
+  case 32:
+ for (unsigned i = 0; i < components; i++) {
+if (c1->i32[i] != -c2->i32[i])
+   return false;
+ }
+
+ return true;
+
+  case 64:
+ for (unsigned i = 0; i < components; i++) {
+if (c1->i64[i] != -c2->i64[i])
+   return false;
+ }
+
+ return true;
+
+  default:
+ unreachable("unknown bit size");
+  }
+
+  break;
+
+   case nir_type_bool:
+  return false;
+
+   default:
+  break;
+   }
+
+   return false;
+}
+
 bool
 nir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2,
unsigned src1, unsigned src2)
diff --git a/src/compiler/nir/tests/negative_equal_tests.cpp 
b/src/compiler/nir/tests/negative_equal_tests.cpp
new file mode 100644
index 000..e450a8172db
--- /dev/null
+++ b/src/compiler/nir/tests/negative_equal_tests.cpp
@@ -0,0 +1,278 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal 

[Mesa-dev] [PATCH 5/9] nir: Add helper functions to get the instruction that generated a nir_src

2018-08-29 Thread Ian Romanick
From: Ian Romanick 

Signed-off-by: Ian Romanick 
---
 src/compiler/nir/nir.h | 23 +++
 1 file changed, 23 insertions(+)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index feb69be6b59..9bca6d487e9 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2474,6 +2474,29 @@ bool nir_foreach_dest(nir_instr *instr, 
nir_foreach_dest_cb cb, void *state);
 bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state);
 
 nir_const_value *nir_src_as_const_value(nir_src src);
+
+static inline struct nir_instr *
+nir_src_instr(const struct nir_src *src)
+{
+   return src->is_ssa ? src->ssa->parent_instr : NULL;
+}
+
+#define NIR_SRC_AS_(name, c_type, type_enum, cast_macro)\
+static inline c_type *  \
+nir_src_as_ ## name (struct nir_src *src)   \
+{   \
+return src->is_ssa && src->ssa->parent_instr->type == type_enum \
+   ? cast_macro(src->ssa->parent_instr) : NULL; \
+}   \
+static inline const c_type *\
+nir_src_as_ ## name ## _const(const struct nir_src *src)\
+{   \
+return src->is_ssa && src->ssa->parent_instr->type == type_enum \
+   ? cast_macro(src->ssa->parent_instr) : NULL; \
+}
+
+NIR_SRC_AS_(alu_instr, nir_alu_instr, nir_instr_type_alu, nir_instr_as_alu)
+
 bool nir_src_is_dynamically_uniform(nir_src src);
 bool nir_srcs_equal(nir_src src1, nir_src src2);
 void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src);
-- 
2.14.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RESEND PATCH 0/9] Partial redundancy elimination for compares

2018-08-29 Thread Ian Romanick
This is mostly a resend of this series.  Several patches, noted with
"v#", have been updated.

Patches 3 and 4 are new.  Right before sending the series, I decided to
update the shader-db results AND update shader-db.  The update to
shader-db added a bunch of new shaders hurt by patch 4.  It may be
reasonable to drop 3 and 4 for now.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/9 v3] nir/opt_peephole_select: Don't try to remove flow control around indirect loads

2018-08-29 Thread Ian Romanick
From: Ian Romanick 

That flow control may be trying to avoid invalid loads.  On at least
some platforms, those loads can also be expensive.

No shader-db changes on any Intel platform (even with the later patch
"intel/compiler: More peephole select").

NOTE: I've tried to CC everyone whose driver is affected by this change.

v2: Add a 'indirect_load_ok' flag to nir_opt_peephole_select.  Suggested
by Rob.  See also the big comment in src/intel/compiler/brw_nir.c.

v3: Use nir_deref_instr_has_indirect instead of deref_has_indirect (from
nir_lower_io_arrays_to_elements.c).

Signed-off-by: Ian Romanick 
Cc: Eric Anholt 
Cc: Rob Clark 
Cc: Marek Olšák 
---
 src/amd/vulkan/radv_shader.c |  2 +-
 src/broadcom/compiler/nir_to_vir.c   |  2 +-
 src/compiler/nir/nir.h   |  3 ++-
 src/compiler/nir/nir_opt_peephole_select.c   | 37 
 src/gallium/drivers/freedreno/ir3/ir3_nir.c  |  2 +-
 src/gallium/drivers/radeonsi/si_shader_nir.c |  2 +-
 src/gallium/drivers/vc4/vc4_program.c|  2 +-
 src/intel/compiler/brw_nir.c | 13 +-
 src/mesa/state_tracker/st_glsl_to_nir.cpp|  2 +-
 9 files changed, 47 insertions(+), 18 deletions(-)

diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 207e5b050eb..632512db09b 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -143,7 +143,7 @@ radv_optimize_nir(struct nir_shader *shader, bool 
optimize_conservatively)
 NIR_PASS(progress, shader, nir_opt_if);
 NIR_PASS(progress, shader, nir_opt_dead_cf);
 NIR_PASS(progress, shader, nir_opt_cse);
-NIR_PASS(progress, shader, nir_opt_peephole_select, 8);
+NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true);
 NIR_PASS(progress, shader, nir_opt_algebraic);
 NIR_PASS(progress, shader, nir_opt_constant_folding);
 NIR_PASS(progress, shader, nir_opt_undef);
diff --git a/src/broadcom/compiler/nir_to_vir.c 
b/src/broadcom/compiler/nir_to_vir.c
index 158c1c3e9f3..0d23cea4d5b 100644
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -1210,7 +1210,7 @@ v3d_optimize_nir(struct nir_shader *s)
 NIR_PASS(progress, s, nir_opt_dce);
 NIR_PASS(progress, s, nir_opt_dead_cf);
 NIR_PASS(progress, s, nir_opt_cse);
-NIR_PASS(progress, s, nir_opt_peephole_select, 8);
+NIR_PASS(progress, s, nir_opt_peephole_select, 8, true);
 NIR_PASS(progress, s, nir_opt_algebraic);
 NIR_PASS(progress, s, nir_opt_constant_folding);
 NIR_PASS(progress, s, nir_opt_undef);
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 12cad6029cd..67fa46d5557 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -3002,7 +3002,8 @@ bool nir_opt_move_comparisons(nir_shader *shader);
 
 bool nir_opt_move_load_ubo(nir_shader *shader);
 
-bool nir_opt_peephole_select(nir_shader *shader, unsigned limit);
+bool nir_opt_peephole_select(nir_shader *shader, unsigned limit,
+ bool indirect_load_ok);
 
 bool nir_opt_remove_phis_impl(nir_function_impl *impl);
 bool nir_opt_remove_phis(nir_shader *shader);
diff --git a/src/compiler/nir/nir_opt_peephole_select.c 
b/src/compiler/nir/nir_opt_peephole_select.c
index ad9d0abec03..6808d3eda6c 100644
--- a/src/compiler/nir/nir_opt_peephole_select.c
+++ b/src/compiler/nir/nir_opt_peephole_select.c
@@ -58,7 +58,8 @@
  */
 
 static bool
-block_check_for_allowed_instrs(nir_block *block, unsigned *count, bool alu_ok)
+block_check_for_allowed_instrs(nir_block *block, unsigned *count,
+   bool alu_ok, bool indirect_load_ok)
 {
nir_foreach_instr(instr, block) {
   switch (instr->type) {
@@ -66,16 +67,26 @@ block_check_for_allowed_instrs(nir_block *block, unsigned 
*count, bool alu_ok)
  nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
 
  switch (intrin->intrinsic) {
- case nir_intrinsic_load_deref:
-switch (nir_src_as_deref(intrin->src[0])->mode) {
+ case nir_intrinsic_load_deref: {
+nir_deref_instr *const deref = nir_src_as_deref(intrin->src[0]);
+
+switch (deref->mode) {
 case nir_var_shader_in:
 case nir_var_uniform:
+   /* Don't try to remove flow control around an indirect load
+* because that flow control may be trying to avoid invalid
+* loads.
+*/
+   if (!indirect_load_ok && nir_deref_instr_has_indirect(deref))
+  return false;
+
break;
 
 default:
return false;
 }
 break;
+ }
 
  case nir_intrinsic_load_uniform:
 if (!alu_ok)
@@ -149,7 +160,7 @@ 

[Mesa-dev] [PATCH 7/9 v2] nir: Add nir_alu_srcs_negative_equal

2018-08-29 Thread Ian Romanick
From: Ian Romanick 

v2: Move bug fix in get_neg_instr from the next patch to this patch
(where it was intended to be in the first place).  Noticed by Caio.

Signed-off-by: Ian Romanick 
---
 src/compiler/nir/nir.h  |   4 +
 src/compiler/nir/nir_instr_set.c| 104 
 src/compiler/nir/tests/negative_equal_tests.cpp |  84 +++
 3 files changed, 192 insertions(+)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index f94538e0782..ddbcb3c647e 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -964,6 +964,10 @@ bool nir_const_value_negative_equal(const nir_const_value 
*c1,
 bool nir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2,
 unsigned src1, unsigned src2);
 
+bool nir_alu_srcs_negative_equal(const nir_alu_instr *alu1,
+ const nir_alu_instr *alu2,
+ unsigned src1, unsigned src2);
+
 typedef enum {
nir_deref_type_var,
nir_deref_type_array,
diff --git a/src/compiler/nir/nir_instr_set.c b/src/compiler/nir/nir_instr_set.c
index 009d9661e60..7dbb7fc3097 100644
--- a/src/compiler/nir/nir_instr_set.c
+++ b/src/compiler/nir/nir_instr_set.c
@@ -262,6 +262,20 @@ nir_srcs_equal(nir_src src1, nir_src src2)
}
 }
 
+/**
+ * If the \p s is an SSA value that was generated by a negation instruction,
+ * that instruction is returned as a \c nir_alu_instr.  Otherwise \c NULL is
+ * returned.
+ */
+static const struct nir_alu_instr *
+get_neg_instr(const nir_src *s)
+{
+   const struct nir_alu_instr *const alu = nir_src_as_alu_instr_const(s);
+
+   return alu != NULL && (alu->op == nir_op_fneg || alu->op == nir_op_ineg)
+  ? alu : NULL;
+}
+
 bool
 nir_const_value_negative_equal(const nir_const_value *c1,
const nir_const_value *c2,
@@ -359,6 +373,96 @@ nir_const_value_negative_equal(const nir_const_value *c1,
return false;
 }
 
+/**
+ * Shallow compare of ALU srcs to determine if one is the negation of the other
+ *
+ * This function detects cases where \p alu1 is a constant and \p alu2 is a
+ * constant that is its negation.  It will also detect cases where \p alu2 is
+ * an SSA value that is a \c nir_op_fneg applied to \p alu1 (and vice versa).
+ *
+ * This function does not detect the general case when \p alu1 and \p alu2 are
+ * SSA values that are the negations of each other (e.g., \p alu1 represents
+ * (a * b) and \p alu2 represents (-a * b)).
+ */
+bool
+nir_alu_srcs_negative_equal(const nir_alu_instr *alu1,
+const nir_alu_instr *alu2,
+unsigned src1, unsigned src2)
+{
+   if (alu1->src[src1].abs != alu2->src[src2].abs)
+  return false;
+
+   bool parity = alu1->src[src1].negate != alu2->src[src2].negate;
+
+   /* Handling load_const instructions is tricky. */
+
+   const nir_const_value *const const1 =
+  nir_src_as_const_value(alu1->src[src1].src);
+
+   if (const1 != NULL) {
+  /* Assume that constant folding will eliminate source mods and unary
+   * ops.
+   */
+  if (parity)
+ return false;
+
+  const nir_const_value *const const2 =
+ nir_src_as_const_value(alu2->src[src2].src);
+
+  if (const2 == NULL)
+ return false;
+
+  /* FINISHME: Apply the swizzle? */
+  return nir_const_value_negative_equal(const1,
+const2,
+
nir_ssa_alu_instr_src_components(alu1, src1),
+
nir_op_infos[alu1->op].input_types[src1],
+alu1->dest.dest.ssa.bit_size);
+   }
+
+   uint8_t alu1_swizzle[4] = {};
+   nir_src alu1_actual_src;
+   const struct nir_alu_instr *const neg1 = 
get_neg_instr(>src[src1].src);
+
+   if (neg1) {
+  parity = !parity;
+  alu1_actual_src = neg1->src[0].src;
+
+  for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(neg1, 0); i++)
+ alu1_swizzle[i] = neg1->src[0].swizzle[i];
+   } else {
+  alu1_actual_src = alu1->src[src1].src;
+
+  for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu1, src1); 
i++)
+ alu1_swizzle[i] = i;
+   }
+
+   uint8_t alu2_swizzle[4] = {};
+   nir_src alu2_actual_src;
+   const struct nir_alu_instr *const neg2 = 
get_neg_instr(>src[src2].src);
+
+   if (neg2) {
+  parity = !parity;
+  alu2_actual_src = neg2->src[0].src;
+
+  for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(neg2, 0); i++)
+ alu2_swizzle[i] = neg2->src[0].swizzle[i];
+   } else {
+  alu2_actual_src = alu2->src[src2].src;
+
+  for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu2, src2); 
i++)
+ alu2_swizzle[i] = i;
+   }
+
+   for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu1, src1); i++) 
{
+  if (alu1_swizzle[alu1->src[src1].swizzle[i]] !=
+  

[Mesa-dev] [PATCH 4/9] intel/compiler: More peephole_select for pre-Gen6

2018-08-29 Thread Ian Romanick
From: Ian Romanick 

No shader-db changes on any Gen6+ platform.

All of the shaders with cycles hurt by more than ~2% are from Master of
Orion.  All of the shaders have instructions helped.  It looks like the
pass enables some control flow to be converted to bcsels, then the
scheduler does dumb things.  These are new shaders (just added before
doing this shader-db run), so there's probably some low-hanging fruit.

Iron Lake
total instructions in shared programs: 8207949 -> 8207310 (<.01%)
instructions in affected programs: 84465 -> 83826 (-0.76%)
helped: 114
HURT: 26
helped stats (abs) min: 2 max: 18 x̄: 7.71 x̃: 9
helped stats (rel) min: 0.17% max: 13.73% x̄: 2.41% x̃: 1.05%
HURT stats (abs)   min: 2 max: 20 x̄: 9.23 x̃: 8
HURT stats (rel)   min: 0.70% max: 2.48% x̄: 1.66% x̃: 1.61%
95% mean confidence interval for instructions value: -5.84 -3.29
95% mean confidence interval for instructions %-change: -2.19% -1.12%
Instructions are helped.

total cycles in shared programs: 187478874 -> 187491364 (<.01%)
cycles in affected programs: 506734 -> 519224 (2.46%)
helped: 100
HURT: 40
helped stats (abs) min: 2 max: 76 x̄: 22.68 x̃: 16
helped stats (rel) min: 0.02% max: 6.16% x̄: 0.87% x̃: 0.63%
HURT stats (abs)   min: 4 max: 1402 x̄: 368.95 x̃: 30
HURT stats (rel)   min: 0.33% max: 23.12% x̄: 5.36% x̃: 1.43%
95% mean confidence interval for cycles value: 28.54 149.89
95% mean confidence interval for cycles %-change: 0.09% 1.74%
Cycles are HURT.

GM45
total instructions in shared programs: 5047454 -> 5047096 (<.01%)
instructions in affected programs: 46751 -> 46393 (-0.77%)
helped: 63
HURT: 13
helped stats (abs) min: 2 max: 29 x̄: 7.59 x̃: 9
helped stats (rel) min: 0.17% max: 13.73% x̄: 2.74% x̃: 1.04%
HURT stats (abs)   min: 2 max: 20 x̄: 9.23 x̃: 8
HURT stats (rel)   min: 0.66% max: 2.35% x̄: 1.58% x̃: 1.52%
95% mean confidence interval for instructions value: -6.49 -2.93
95% mean confidence interval for instructions %-change: -2.80% -1.19%
Instructions are helped.

total cycles in shared programs: 128068304 -> 128075484 (<.01%)
cycles in affected programs: 324560 -> 331740 (2.21%)
helped: 53
HURT: 23
helped stats (abs) min: 6 max: 76 x̄: 32.57 x̃: 32
helped stats (rel) min: 0.08% max: 4.74% x̄: 1.04% x̃: 0.79%
HURT stats (abs)   min: 4 max: 1400 x̄: 387.22 x̃: 40
HURT stats (rel)   min: 0.56% max: 19.94% x̄: 5.05% x̃: 1.49%
95% mean confidence interval for cycles value: 7.43 181.52
95% mean confidence interval for cycles %-change: -0.23% 1.83%
Inconclusive result (%-change mean confidence interval includes 0).

Signed-off-by: Ian Romanick 
---
 src/intel/compiler/brw_nir.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index 04d399fd0b4..2eba6a42266 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -588,8 +588,8 @@ brw_nir_optimize(nir_shader *nir, const struct brw_compiler 
*compiler,
  (nir->info.stage == MESA_SHADER_TESS_CTRL ||
   nir->info.stage == MESA_SHADER_TESS_EVAL);
   OPT(nir_opt_peephole_select, 0, is_vec4_tessellation, false);
-  if (compiler->devinfo->gen >= 6)
- OPT(nir_opt_peephole_select, 1, is_vec4_tessellation, true);
+  OPT(nir_opt_peephole_select, 1, is_vec4_tessellation,
+  compiler->devinfo->gen >= 6);
 
   OPT(nir_opt_intrinsics);
   OPT(nir_opt_algebraic);
-- 
2.14.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/9 v2] intel/compiler: More peephole select

2018-08-29 Thread Ian Romanick
From: Ian Romanick 

Shader-db results:

The one shader hurt for instructions is a compute shader that had both
spills and fills hurt.

v2: Fix typo in comment noticed by Caio.

Skylake, Broadwell, and Haswell had similar results. (Skylake shown)
total instructions in shared programs: 15108590 -> 15083798 (-0.16%)
instructions in affected programs: 893759 -> 868967 (-2.77%)
helped: 3616
HURT: 1
helped stats (abs) min: 1 max: 181 x̄: 6.88 x̃: 4
helped stats (rel) min: 0.10% max: 25.00% x̄: 3.93% x̃: 3.20%
HURT stats (abs)   min: 92 max: 92 x̄: 92.00 x̃: 92
HURT stats (rel)   min: 1.92% max: 1.92% x̄: 1.92% x̃: 1.92%
95% mean confidence interval for instructions value: -7.09 -6.62
95% mean confidence interval for instructions %-change: -4.03% -3.82%
Instructions are helped.

total cycles in shared programs: 566165228 -> 565911206 (-0.04%)
cycles in affected programs: 69290937 -> 69036915 (-0.37%)
helped: 2600
HURT: 1050
helped stats (abs) min: 1 max: 4980 x̄: 180.20 x̃: 77
helped stats (rel) min: <.01% max: 71.30% x̄: 9.17% x̃: 5.60%
HURT stats (abs)   min: 1 max: 6 x̄: 204.27 x̃: 20
HURT stats (rel)   min: <.01% max: 47.61% x̄: 2.95% x̃: 1.43%
95% mean confidence interval for cycles value: -106.62 -32.57
95% mean confidence interval for cycles %-change: -6.04% -5.33%
Cycles are helped.

total spills in shared programs: 0 -> 1 (<.01%)
spills in affected programs: 166 -> 167 (0.60%)
helped: 1
HURT: 1

total fills in shared programs: 23168 -> 23182 (0.06%)
fills in affected programs: 438 -> 452 (3.20%)
helped: 1
HURT: 1

Ivy Bridge
total instructions in shared programs: 12030850 -> 11999872 (-0.26%)
instructions in affected programs: 94 -> 880136 (-3.40%)
helped: 3338
HURT: 18
helped stats (abs) min: 1 max: 99 x̄: 9.32 x̃: 6
helped stats (rel) min: 0.11% max: 31.18% x̄: 5.20% x̃: 3.32%
HURT stats (abs)   min: 2 max: 20 x̄: 7.89 x̃: 6
HURT stats (rel)   min: 0.70% max: 2.59% x̄: 1.63% x̃: 1.70%
95% mean confidence interval for instructions value: -9.52 -8.94
95% mean confidence interval for instructions %-change: -5.33% -4.99%
Instructions are helped.

total cycles in shared programs: 256248948 -> 255778729 (-0.18%)
cycles in affected programs: 70148230 -> 69678011 (-0.67%)
helped: 2745
HURT: 628
helped stats (abs) min: 1 max: 6100 x̄: 210.19 x̃: 90
helped stats (rel) min: <.01% max: 75.90% x̄: 9.68% x̃: 6.31%
HURT stats (abs)   min: 1 max: 31166 x̄: 170.00 x̃: 10
HURT stats (rel)   min: <.01% max: 36.36% x̄: 2.80% x̃: 0.57%
95% mean confidence interval for cycles value: -162.72 -116.09
95% mean confidence interval for cycles %-change: -7.72% -7.00%
Cycles are helped.

total spills in shared programs: 4570 -> 4558 (-0.26%)
spills in affected programs: 173 -> 161 (-6.94%)
helped: 3
HURT: 0

total fills in shared programs: 4823 -> 4814 (-0.19%)
fills in affected programs: 250 -> 241 (-3.60%)
helped: 3
HURT: 0

Sandy Bridge
total instructions in shared programs: 10831562 -> 10822747 (-0.08%)
instructions in affected programs: 235807 -> 226992 (-3.74%)
helped: 800
HURT: 0
helped stats (abs) min: 1 max: 88 x̄: 11.02 x̃: 8
helped stats (rel) min: 0.11% max: 23.08% x̄: 4.70% x̃: 3.36%
95% mean confidence interval for instructions value: -11.93 -10.10
95% mean confidence interval for instructions %-change: -5.00% -4.40%
Instructions are helped.

total cycles in shared programs: 154501635 -> 154382369 (-0.08%)
cycles in affected programs: 4031486 -> 3912220 (-2.96%)
helped: 582
HURT: 270
helped stats (abs) min: 1 max: 2556 x̄: 231.18 x̃: 58
helped stats (rel) min: 0.03% max: 39.24% x̄: 4.25% x̃: 1.75%
HURT stats (abs)   min: 1 max: 1966 x̄: 56.59 x̃: 12
HURT stats (rel)   min: 0.02% max: 67.10% x̄: 3.05% x̃: 0.70%
95% mean confidence interval for cycles value: -167.32 -112.65
95% mean confidence interval for cycles %-change: -2.40% -1.47%
Cycles are helped.

No change on Iron Lake or GM45.

Signed-off-by: Ian Romanick 
---
 src/intel/compiler/brw_nir.c | 15 ++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index 6ce8325a4dd..1d65107a93d 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -567,7 +567,18 @@ brw_nir_optimize(nir_shader *nir, const struct 
brw_compiler *compiler,
   OPT(nir_opt_dce);
   OPT(nir_opt_cse);
 
-  /* For indirect loads of uniforms (push constants), we assume that array
+  /* Passing 0 to the peephole select pass causes it to convert
+   * if-statements that contain only move instructions in the branches
+   * regardless of the count.
+   *
+   * Passing 1 to the peephole select pass causes it to convert
+   * if-statements that contain at most a single ALU instruction (total)
+   * in both branches.  Before Gen6, some math instructions were
+   * prohibitively expensive and the results of compare operations need an
+   * extra resolve step.  For these reasons, this pass is more harmful
+   * than good on those 

[Mesa-dev] [PATCH 9/9] intel/compiler: Use partial redundancy elimination for compares

2018-08-29 Thread Ian Romanick
From: Ian Romanick 

Almost all of the hurt shaders are repeated instances of the same shader
in synmark's compilation speed tests.

shader-db results:

All Gen6+ platforms had similar results. (Skylake shown)
total instructions in shared programs: 15083798 -> 15083033 (<.01%)
instructions in affected programs: 81932 -> 81167 (-0.93%)
helped: 395
HURT: 0
helped stats (abs) min: 1 max: 12 x̄: 1.94 x̃: 1
helped stats (rel) min: 0.06% max: 26.67% x̄: 1.81% x̃: 0.80%
95% mean confidence interval for instructions value: -2.09 -1.78
95% mean confidence interval for instructions %-change: -2.17% -1.46%
Instructions are helped.

total cycles in shared programs: 565911206 -> 565894509 (<.01%)
cycles in affected programs: 1101634 -> 1084937 (-1.52%)
helped: 333
HURT: 51
helped stats (abs) min: 1 max: 366 x̄: 54.49 x̃: 8
helped stats (rel) min: 0.02% max: 21.45% x̄: 3.44% x̃: 0.82%
HURT stats (abs)   min: 2 max: 130 x̄: 28.39 x̃: 33
HURT stats (rel)   min: <.01% max: 12.31% x̄: 0.85% x̃: 0.63%
95% mean confidence interval for cycles value: -53.40 -33.56
95% mean confidence interval for cycles %-change: -3.48% -2.27%
Cycles are helped.

Iron Lake
total instructions in shared programs: 8207310 -> 8206971 (<.01%)
instructions in affected programs: 42319 -> 41980 (-0.80%)
helped: 136
HURT: 0
helped stats (abs) min: 1 max: 9 x̄: 2.49 x̃: 2
helped stats (rel) min: 0.27% max: 14.29% x̄: 1.86% x̃: 0.66%
95% mean confidence interval for instructions value: -2.73 -2.25
95% mean confidence interval for instructions %-change: -2.40% -1.31%
Instructions are helped.

total cycles in shared programs: 187491364 -> 187489998 (<.01%)
cycles in affected programs: 665008 -> 663642 (-0.21%)
helped: 125
HURT: 2
helped stats (abs) min: 2 max: 36 x̄: 10.96 x̃: 12
helped stats (rel) min: 0.04% max: 3.85% x̄: 0.50% x̃: 0.23%
HURT stats (abs)   min: 2 max: 2 x̄: 2.00 x̃: 2
HURT stats (rel)   min: 0.04% max: 0.09% x̄: 0.06% x̃: 0.06%
95% mean confidence interval for cycles value: -11.76 -9.75
95% mean confidence interval for cycles %-change: -0.61% -0.37%
Cycles are helped.

GM45
total instructions in shared programs: 5047096 -> 5046872 (<.01%)
instructions in affected programs: 26103 -> 25879 (-0.86%)
helped: 82
HURT: 0
helped stats (abs) min: 1 max: 9 x̄: 2.73 x̃: 2
helped stats (rel) min: 0.27% max: 13.33% x̄: 1.71% x̃: 0.94%
95% mean confidence interval for instructions value: -3.04 -2.42
95% mean confidence interval for instructions %-change: -2.33% -1.09%
Instructions are helped.

total cycles in shared programs: 128075484 -> 12807 (<.01%)
cycles in affected programs: 416730 -> 415690 (-0.25%)
helped: 81
HURT: 1
helped stats (abs) min: 2 max: 36 x̄: 12.86 x̃: 12
helped stats (rel) min: 0.06% max: 3.85% x̄: 0.58% x̃: 0.30%
HURT stats (abs)   min: 2 max: 2 x̄: 2.00 x̃: 2
HURT stats (rel)   min: 0.09% max: 0.09% x̄: 0.09% x̃: 0.09%
95% mean confidence interval for cycles value: -14.00 -11.36
95% mean confidence interval for cycles %-change: -0.74% -0.40%
Cycles are helped.

Signed-off-by: Ian Romanick 
---
 src/intel/compiler/brw_nir.c | 20 
 1 file changed, 20 insertions(+)

diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index 2eba6a42266..38d14ca960d 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -810,6 +810,26 @@ brw_postprocess_nir(nir_shader *nir, const struct 
brw_compiler *compiler,
   OPT(brw_nir_opt_peephole_ffma);
}
 
+   if (OPT(nir_opt_comparison_pre)) {
+  OPT(nir_copy_prop);
+  OPT(nir_opt_dce);
+  OPT(nir_opt_cse);
+
+  /* Do the select peepehole again.  nir_opt_comparison_pre (combined with
+   * the other optimization passes) will have removed at least one
+   * instruction from one of the branches of the if-statement, so now it
+   * might be under the threshold of conversion to bcsel.
+   *
+   * See brw_nir_optimize for the explanation of is_vec4_tessellation.
+   */
+  const bool is_vec4_tessellation = !is_scalar &&
+ (nir->info.stage == MESA_SHADER_TESS_CTRL ||
+  nir->info.stage == MESA_SHADER_TESS_EVAL);
+  OPT(nir_opt_peephole_select, 0, is_vec4_tessellation, false);
+  OPT(nir_opt_peephole_select, 1, is_vec4_tessellation,
+  compiler->devinfo->gen >= 6);
+   }
+
OPT(nir_opt_algebraic_late);
 
OPT(nir_lower_to_source_mods);
-- 
2.14.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH] mesa: allow GL_UNSIGNED_BYTE type for SNORM reads

2018-08-29 Thread Tapani Pälli



On 08/30/2018 01:04 AM, Andres Gomez wrote:

On Wed, 2018-08-29 at 08:22 -0700, Dylan Baker wrote:

Quoting Tapani Pälli (2018-08-27 04:46:37)

OpenGL ES spec states:
"For normalized fixed-point rendering surfaces, the combination format
 RGBA and type UNSIGNED_BYTE is accepted."

This fixes following failing VK-GL-CTS tests:

KHR-GLES3.packed_pixels.pbo_rectangle.rgba8_snorm
KHR-GLES3.packed_pixels.rectangle.rgba8_snorm
KHR-GLES3.packed_pixels.varied_rectangle.rgba8_snorm

Signed-off-by: Tapani Pälli 
https://bugs.freedesktop.org/show_bug.cgi?id=107658
Cc: mesa-sta...@lists.freedesktop.org
---

This is a partial fix to the bug. I believe there are 2 separate
issues within reported bug and this fixes the first one.

  src/mesa/main/readpix.c | 9 +
  1 file changed, 9 insertions(+)

diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c
index 2cbb578a37f..556c860d393 100644
--- a/src/mesa/main/readpix.c
+++ b/src/mesa/main/readpix.c
@@ -958,6 +958,15 @@ read_pixels_es3_error_check(struct gl_context *ctx, GLenum 
format, GLenum type,
 return GL_NO_ERROR;
   }
}
+  if (type == GL_UNSIGNED_BYTE) {
+ switch (internalFormat) {
+ case GL_R8_SNORM:
+ case GL_RG8_SNORM:
+ case GL_RGBA8_SNORM:
+if (_mesa_has_EXT_render_snorm(ctx))
+   return GL_NO_ERROR;
+ }
+  }
break;
 case GL_BGRA:
/* GL_EXT_read_format_bgra */
--
2.14.4



Hi Tapani,

This doesn't apply cleanly to 18.1 because "mesa: enable EXT_render_snorm
extension" isn't present on the branch. Does it still make sense to pull this
into 18.1?


Likewise for 18.2.

The offending commit that caused the regression didn't make it for any
of the current stable queues so I think it is safe just to ignore this
patch. Can you confirm?



Yes, please ignore. I thought it made to some branch but it seems not :)

// Tapani
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH] mesa: allow GL_UNSIGNED_BYTE type for SNORM reads

2018-08-29 Thread Tapani Pälli



On 08/29/2018 06:22 PM, Dylan Baker wrote:

Quoting Tapani Pälli (2018-08-27 04:46:37)

OpenGL ES spec states:
"For normalized fixed-point rendering surfaces, the combination format
 RGBA and type UNSIGNED_BYTE is accepted."

This fixes following failing VK-GL-CTS tests:

KHR-GLES3.packed_pixels.pbo_rectangle.rgba8_snorm
KHR-GLES3.packed_pixels.rectangle.rgba8_snorm
KHR-GLES3.packed_pixels.varied_rectangle.rgba8_snorm

Signed-off-by: Tapani Pälli 
https://bugs.freedesktop.org/show_bug.cgi?id=107658
Cc: mesa-sta...@lists.freedesktop.org
---

This is a partial fix to the bug. I believe there are 2 separate
issues within reported bug and this fixes the first one.

  src/mesa/main/readpix.c | 9 +
  1 file changed, 9 insertions(+)

diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c
index 2cbb578a37f..556c860d393 100644
--- a/src/mesa/main/readpix.c
+++ b/src/mesa/main/readpix.c
@@ -958,6 +958,15 @@ read_pixels_es3_error_check(struct gl_context *ctx, GLenum 
format, GLenum type,
 return GL_NO_ERROR;
   }
}
+  if (type == GL_UNSIGNED_BYTE) {
+ switch (internalFormat) {
+ case GL_R8_SNORM:
+ case GL_RG8_SNORM:
+ case GL_RGBA8_SNORM:
+if (_mesa_has_EXT_render_snorm(ctx))
+   return GL_NO_ERROR;
+ }
+  }
break;
 case GL_BGRA:
/* GL_EXT_read_format_bgra */
--
2.14.4



Hi Tapani,

This doesn't apply cleanly to 18.1 because "mesa: enable EXT_render_snorm
extension" isn't present on the branch. Does it still make sense to pull this
into 18.1?



Ah nope, patch makes sense only with EXT_render_snorm.

// Tapani
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallivm: Detect VSX separately from Altivec

2018-08-29 Thread Roland Scheidegger
Am 24.08.2018 um 18:22 schrieb Vicki Pfau:
> Is there anything else I need to do on this? This is my first mesa patch
> so I'm not entirely clear what next steps are for getting it committed.
No that's all needed, pushed, sorry for the delay.
Thanks!

Roland


> 
> 
> On 08/20/2018 02:44 PM, Roland Scheidegger wrote:
>> Alright, I guess it's ok then.
>> In theory the u_cpu_detect bits could be used in different places, for
>> instance the translate code emits its own sse code, and as long as a
>> feature was detected properly it may make sense to disable it only for
>> some users. Albeit llvm setup and the gallivm code need to agree
>> generally, and there's no good way to deal with this right now (I
>> suppose gallivm actually should use its own copy of the u_cpu bits). The
>> fiddling we do in lp_bld_init() wrt SSE (LP_FORCE_SSE2 and also avx
>> disabling) isn't a clean way neither.
>> So this looks like as good a solution as others.
>>
>> Reviewed-by: Roland Scheidegger 
>>
>> Am 20.08.2018 um 22:15 schrieb Vicki Pfau:
>>> I was mostly following what was done earlier in the file for Altivec. I
>>> can move it but then ideally the Alitvec check should also be moved.
>>>
>>>
>>> Vicki
>>>
>>>
>>> On 08/20/2018 08:53 AM, Roland Scheidegger wrote:
 u_cpu_detect should detect what's really available, not what is used
 (though indeed we actually disable u_cpu bits explicitly in gallivm for
 some sse features, but this is a hack).
 So I think it would be better if u_cpu_detect sets the has_vsx bit
 regardless what the env var is and then enable it based on this bit and
 the env var.
 Otherwise looks good to me.

 Roland

 Am 19.08.2018 um 23:17 schrieb Vicki Pfau:
> Previously gallivm would attempt to use VSX instructions on all
> systems
> where it detected that Altivec is supported; however, VSX was added to
> POWER long after Altivec, causing lots of crashes on older POWER/PPC
> hardware, e.g. PPC Macs. By detecting VSX separately from Altivec
> we can
> automatically disable it on hardware that supports Altivec but not VSX
>
> Signed-off-by: Vicki Pfau 
> ---
>    src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 21
> +++
>    src/gallium/auxiliary/util/u_cpu_detect.c | 14 -
>    src/gallium/auxiliary/util/u_cpu_detect.h |  1 +
>    3 files changed, 17 insertions(+), 19 deletions(-)
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
> b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
> index 79dbedbb56..fcbdd5050f 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
> @@ -650,26 +650,11 @@
> lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef
> *OutJIT,
>    * which are fixed in LLVM 4.0.
>    *
>    * With LLVM 4.0 or higher:
> -    * Make sure VSX instructions are ENABLED, unless
> -    * a) the entire -mattr option is overridden via
> GALLIVM_MATTRS, or
> -    * b) VSX instructions are explicitly enabled/disabled via
> GALLIVM_VSX=1 or 0.
> +    * Make sure VSX instructions are ENABLED (if supported), unless
> +    * VSX instructions are explicitly enabled/disabled via
> GALLIVM_VSX=1 or 0.
>    */
>   if (util_cpu_caps.has_altivec) {
> -  char *env_mattrs = getenv("GALLIVM_MATTRS");
> -  if (env_mattrs) {
> - MAttrs.push_back(env_mattrs);
> -  }
> -  else {
> - boolean enable_vsx = true;
> - char *env_vsx = getenv("GALLIVM_VSX");
> - if (env_vsx && env_vsx[0] == '0') {
> -    enable_vsx = false;
> - }
> - if (enable_vsx)
> -    MAttrs.push_back("+vsx");
> - else
> -    MAttrs.push_back("-vsx");
> -  }
> +  MAttrs.push_back(util_cpu_caps.has_vsx ? "+vsx" : "-vsx");
>   }
>    #endif
>    #endif
> diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c
> b/src/gallium/auxiliary/util/u_cpu_detect.c
> index 3c6ae4ea1a..14003aa769 100644
> --- a/src/gallium/auxiliary/util/u_cpu_detect.c
> +++ b/src/gallium/auxiliary/util/u_cpu_detect.c
> @@ -133,6 +133,7 @@ check_os_altivec_support(void)
>  signal(SIGILL, SIG_DFL);
>   } else {
>  boolean enable_altivec = TRUE;    /* Default: enable  if
> available, and if not overridden */
> +  boolean enable_vsx = TRUE;
>    #ifdef DEBUG
>  /* Disabling Altivec code generation is not the same as
> disabling VSX code generation,
>   * which can be done simply by passing -mattr=-vsx to the
> LLVM compiler; cf.
> @@ -144,6 +145,11 @@ check_os_altivec_support(void)
>     enable_altivec = FALSE;
>  }
>    #endif
> +  /* VSX 

Re: [Mesa-dev] [PATCH v4 1/7] nir: evaluate if condition uses inside the if branches

2018-08-29 Thread Timothy Arceri

On 30/08/18 12:56, Jason Ekstrand wrote:
On Wed, Aug 29, 2018 at 9:45 PM Timothy Arceri > wrote:


On 30/08/18 10:57, Ian Romanick wrote:
 > On 08/27/2018 02:08 AM, Timothy Arceri wrote:
 >> Since we know what side of the branch we ended up on we can just
 >> replace the use with a constant.
 >>
 >> All the spill changes in shader-db are from Dolphin uber shaders,
 >> despite some small regressions the change is clearly positive.
 >>
 >> V2: insert new constant after any phis in the
 >>      use->parent_instr->type == nir_instr_type_phi path.
 >>
 >> v3:
 >>   - use nir_after_block_before_jump() for inserting const
 >>   - check dominance of phi uses correctly
 >>
 >> v4:
 >>   - create some helpers as suggested by Jason.
 >>
 >> shader-db results IVB:
 >>
 >> total instructions in shared programs: 201 -> 9993483 (-0.06%)
 >> instructions in affected programs: 163235 -> 157517 (-3.50%)
 >> helped: 132
 >> HURT: 2
 >>
 >> total cycles in shared programs: 231670754 -> 219476091 (-5.26%)
 >> cycles in affected programs: 143424120 -> 131229457 (-8.50%)
 >> helped: 115
 >> HURT: 24
 >>
 >> total spills in shared programs: 4383 -> 4370 (-0.30%)
 >> spills in affected programs: 1656 -> 1643 (-0.79%)
 >> helped: 9
 >> HURT: 18
 >>
 >> total fills in shared programs: 4610 -> 4581 (-0.63%)
 >> fills in affected programs: 374 -> 345 (-7.75%)
 >> helped: 6
 >> HURT: 0
 >> ---
 >>   src/compiler/nir/nir.h        |  22 +++
 >>   src/compiler/nir/nir_opt_if.c | 113
++
 >>   2 files changed, 135 insertions(+)
 >>
 >> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
 >> index 009a6d60371..0caacd30173 100644
 >> --- a/src/compiler/nir/nir.h
 >> +++ b/src/compiler/nir/nir.h
 >> @@ -2331,6 +2331,28 @@ nir_after_block_before_jump(nir_block *block)
 >>      }
 >>   }
 >>
 >> +static inline nir_cursor
 >> +nir_before_src(nir_src *src, bool is_if_condition)
 >> +{
 >> +   if (is_if_condition) {
 >> +      nir_block *prev_block =
 >> +   
  nir_cf_node_as_block(nir_cf_node_prev(>parent_if->cf_node));

 >> +      assert(!nir_block_ends_in_jump(prev_block));
 >> +      return nir_after_block(prev_block);
 >> +   } else if (src->parent_instr->type == nir_instr_type_phi) {
 >> +      nir_phi_instr *cond_phi =
nir_instr_as_phi(src->parent_instr);
 >> +      nir_foreach_phi_src(phi_src, cond_phi) {
 >> +         if (phi_src->src.ssa == src->ssa) {
 >> +            return nir_after_block_before_jump(phi_src->pred);
 >> +         }
 >> +      }
 >> +
 >> +      unreachable("failed to find phi src");
 >> +   } else {
 >> +      return nir_before_instr(src->parent_instr);
 >> +   }
 >> +}
 >> +
 >>   static inline nir_cursor
 >>   nir_before_cf_node(nir_cf_node *node)
 >>   {
 >> diff --git a/src/compiler/nir/nir_opt_if.c
b/src/compiler/nir/nir_opt_if.c
 >> index dacf2d6c667..11c6693d302 100644
 >> --- a/src/compiler/nir/nir_opt_if.c
 >> +++ b/src/compiler/nir/nir_opt_if.c
 >> @@ -369,6 +369,76 @@ opt_if_loop_terminator(nir_if *nif)
 >>      return true;
 >>   }
 >>
 >> +static void
 >> +replace_if_condition_use_with_const(nir_src *use, void *mem_ctx,
 >> +                                    nir_cursor cursor, unsigned
nir_boolean,
 >> +                                    bool if_condition)
 >> +{
 >> +   /* Create const */
 >> +   nir_load_const_instr *load =
nir_load_const_instr_create(mem_ctx, 1, 32);


The one actual use of mem_ctx is right here.

 >> +   load->value.u32[0] = nir_boolean;
 >> +   nir_instr_insert(cursor, >instr);
 >> +
 >> +   /* Rewrite use to use const */
 >> +   nir_src new_src = nir_src_for_ssa(>def);
 >> +
 >> +   if (if_condition)
 >> +      nir_if_rewrite_condition(use->parent_if, new_src);
 >> +   else
 >> +      nir_instr_rewrite_src(use->parent_instr, use, new_src);
 >> +}
 >> +
 >> +static bool
 >> +evaluate_if_condition(nir_if *nif, nir_cursor cursor, uint32_t
*value)
 >> +{
 >> +   nir_block *use_block = nir_cursor_current_block(cursor);
 >> +   if (nir_block_dominates(nir_if_first_then_block(nif),
use_block)) {
 >> +      *value = NIR_TRUE;
 >> +      return true;
 >> +   } else if (nir_block_dominates(nir_if_first_else_block(nif),
use_block)) {
 >> +      *value = NIR_FALSE;
 >> +      return true;
 >> +   } else {
 >> +      return false;
 >> +   }
 >> +}
 >> +
 >> +static bool
 >> +evaluate_condition_use(nir_if *nif, nir_src *use_src, void
*mem_ctx,
 >> +                       bool is_if_condition)
 

[Mesa-dev] [Bug 107477] [DXVK] Setting high shader quality in GTA V results in LLVM error

2018-08-29 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107477

--- Comment #18 from Clément Guérin  ---
I confirm it works. I'm able to run the game at Ultra settings without any
visible glitches. Thanks for your hard work!

There's another small issue with inverted reflections, it's still unclear if
it's a game bug, DXVK bug or driver bug. I'll open up another bug report if
necessary.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/2] freedreno: fix crashes of deqp gles31

2018-08-29 Thread Hyunjun Ko
This series fixes some crashes of tests from deqp gles31 on freedreno.
Thanks for review.

Hyunjun Ko (2):
  freedreno/ir3: make immediates array dynamic
  freedreno/ir3: insert mov if same instruction in the outputs.

 .../drivers/freedreno/ir3/ir3_compiler_nir.c   | 14 ++
 src/gallium/drivers/freedreno/ir3/ir3_cp.c |  7 +++
 src/gallium/drivers/freedreno/ir3/ir3_shader.c |  2 ++
 src/gallium/drivers/freedreno/ir3/ir3_shader.h |  3 ++-
 4 files changed, 25 insertions(+), 1 deletion(-)

-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] freedreno/ir3: make immediates array dynamic

2018-08-29 Thread Hyunjun Ko
Since most shaders wouldn't need that large array of immediates, making
the array dynamic could save unnecessary spaces.

In addition, sometimes we can potentially have a much larger array
of immediates to be lowered, which might be more than 64.
---
 src/gallium/drivers/freedreno/ir3/ir3_cp.c | 7 +++
 src/gallium/drivers/freedreno/ir3/ir3_shader.c | 2 ++
 src/gallium/drivers/freedreno/ir3/ir3_shader.h | 3 ++-
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cp.c 
b/src/gallium/drivers/freedreno/ir3/ir3_cp.c
index 0ee8ea2e0e..ea92f6b857 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_cp.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_cp.c
@@ -286,6 +286,13 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register 
*reg, unsigned new_flags
new_flags &= ~IR3_REG_FNEG;
}
 
+   /* Reallocate for 4 more elements whenever it's necessary */
+   if (ctx->immediate_idx == ctx->so->immediates_size * 4) {
+   ctx->so->immediates_size += 4;
+   ctx->so->immediates = realloc (ctx->so->immediates,
+   ctx->so->immediates_size * sizeof 
(ctx->so->immediates[0]));
+   }
+
for (i = 0; i < ctx->immediate_idx; i++) {
swiz = i % 4;
idx  = i / 4;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c 
b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
index 7bb4263b17..125bf3b983 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
@@ -54,6 +54,8 @@ delete_variant(struct ir3_shader_variant *v)
ir3_destroy(v->ir);
if (v->bo)
fd_bo_del(v->bo);
+   if (v->immediates)
+   free(v->immediates);
free(v);
 }
 
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h 
b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
index 288e9fa4e7..456701be7d 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
@@ -325,9 +325,10 @@ struct ir3_shader_variant {
} constbase;
 
unsigned immediates_count;
+   unsigned immediates_size;
struct {
uint32_t val[4];
-   } immediates[64];
+   } *immediates;
 
/* for astc srgb workaround, the number/base of additional
 * alpha tex states we need, and index of original tex states
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] freedreno/ir3: insert mov if same instruction in the outputs.

2018-08-29 Thread Hyunjun Ko
For example,

result0 = texture(sampler[indexBase + 5], coords);
result1 = texture(sampler[indexBase + 0], coords);
result2 = texture(sampler[indexBase + 0], coords);
out_result0 = result0;
out_result1 = result1;
out_result2 = result2;

In this kind of case we need to insert an extra mov to the outputs
so that the result could be assigned to each register respectively.
---
 .../drivers/freedreno/ir3/ir3_compiler_nir.c   | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c 
b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index e4979a60a0..9f29a8afea 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -3637,6 +3637,20 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
 
ir3_cp(ir, so);
 
+   /* Insert mov if there's same instruction for each output.
+* eg. 
dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.const_expression.vertex.sampler2dshadow
+*/
+   for (int i = ir->noutputs - 1; i >= 0; i--) {
+   if (!ir->outputs[i])
+   continue;
+   for (unsigned j = 0; j < i; j++) {
+   if (ir->outputs[i] == ir->outputs[j]) {
+   ir->outputs[i] =
+   ir3_MOV(ir->outputs[i]->block, 
ir->outputs[i], TYPE_F32);
+   }
+   }
+   }
+
if (fd_mesa_debug & FD_DBG_OPTMSGS) {
printf("BEFORE GROUPING:\n");
ir3_print(ir);
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v4 1/7] nir: evaluate if condition uses inside the if branches

2018-08-29 Thread Jason Ekstrand
On Wed, Aug 29, 2018 at 9:45 PM Timothy Arceri 
wrote:

> On 30/08/18 10:57, Ian Romanick wrote:
> > On 08/27/2018 02:08 AM, Timothy Arceri wrote:
> >> Since we know what side of the branch we ended up on we can just
> >> replace the use with a constant.
> >>
> >> All the spill changes in shader-db are from Dolphin uber shaders,
> >> despite some small regressions the change is clearly positive.
> >>
> >> V2: insert new constant after any phis in the
> >>  use->parent_instr->type == nir_instr_type_phi path.
> >>
> >> v3:
> >>   - use nir_after_block_before_jump() for inserting const
> >>   - check dominance of phi uses correctly
> >>
> >> v4:
> >>   - create some helpers as suggested by Jason.
> >>
> >> shader-db results IVB:
> >>
> >> total instructions in shared programs: 201 -> 9993483 (-0.06%)
> >> instructions in affected programs: 163235 -> 157517 (-3.50%)
> >> helped: 132
> >> HURT: 2
> >>
> >> total cycles in shared programs: 231670754 -> 219476091 (-5.26%)
> >> cycles in affected programs: 143424120 -> 131229457 (-8.50%)
> >> helped: 115
> >> HURT: 24
> >>
> >> total spills in shared programs: 4383 -> 4370 (-0.30%)
> >> spills in affected programs: 1656 -> 1643 (-0.79%)
> >> helped: 9
> >> HURT: 18
> >>
> >> total fills in shared programs: 4610 -> 4581 (-0.63%)
> >> fills in affected programs: 374 -> 345 (-7.75%)
> >> helped: 6
> >> HURT: 0
> >> ---
> >>   src/compiler/nir/nir.h|  22 +++
> >>   src/compiler/nir/nir_opt_if.c | 113 ++
> >>   2 files changed, 135 insertions(+)
> >>
> >> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> >> index 009a6d60371..0caacd30173 100644
> >> --- a/src/compiler/nir/nir.h
> >> +++ b/src/compiler/nir/nir.h
> >> @@ -2331,6 +2331,28 @@ nir_after_block_before_jump(nir_block *block)
> >>  }
> >>   }
> >>
> >> +static inline nir_cursor
> >> +nir_before_src(nir_src *src, bool is_if_condition)
> >> +{
> >> +   if (is_if_condition) {
> >> +  nir_block *prev_block =
> >> +
>  nir_cf_node_as_block(nir_cf_node_prev(>parent_if->cf_node));
> >> +  assert(!nir_block_ends_in_jump(prev_block));
> >> +  return nir_after_block(prev_block);
> >> +   } else if (src->parent_instr->type == nir_instr_type_phi) {
> >> +  nir_phi_instr *cond_phi = nir_instr_as_phi(src->parent_instr);
> >> +  nir_foreach_phi_src(phi_src, cond_phi) {
> >> + if (phi_src->src.ssa == src->ssa) {
> >> +return nir_after_block_before_jump(phi_src->pred);
> >> + }
> >> +  }
> >> +
> >> +  unreachable("failed to find phi src");
> >> +   } else {
> >> +  return nir_before_instr(src->parent_instr);
> >> +   }
> >> +}
> >> +
> >>   static inline nir_cursor
> >>   nir_before_cf_node(nir_cf_node *node)
> >>   {
> >> diff --git a/src/compiler/nir/nir_opt_if.c
> b/src/compiler/nir/nir_opt_if.c
> >> index dacf2d6c667..11c6693d302 100644
> >> --- a/src/compiler/nir/nir_opt_if.c
> >> +++ b/src/compiler/nir/nir_opt_if.c
> >> @@ -369,6 +369,76 @@ opt_if_loop_terminator(nir_if *nif)
> >>  return true;
> >>   }
> >>
> >> +static void
> >> +replace_if_condition_use_with_const(nir_src *use, void *mem_ctx,
> >> +nir_cursor cursor, unsigned
> nir_boolean,
> >> +bool if_condition)
> >> +{
> >> +   /* Create const */
> >> +   nir_load_const_instr *load = nir_load_const_instr_create(mem_ctx,
> 1, 32);
>

The one actual use of mem_ctx is right here.


> >> +   load->value.u32[0] = nir_boolean;
> >> +   nir_instr_insert(cursor, >instr);
> >> +
> >> +   /* Rewrite use to use const */
> >> +   nir_src new_src = nir_src_for_ssa(>def);
> >> +
> >> +   if (if_condition)
> >> +  nir_if_rewrite_condition(use->parent_if, new_src);
> >> +   else
> >> +  nir_instr_rewrite_src(use->parent_instr, use, new_src);
> >> +}
> >> +
> >> +static bool
> >> +evaluate_if_condition(nir_if *nif, nir_cursor cursor, uint32_t *value)
> >> +{
> >> +   nir_block *use_block = nir_cursor_current_block(cursor);
> >> +   if (nir_block_dominates(nir_if_first_then_block(nif), use_block)) {
> >> +  *value = NIR_TRUE;
> >> +  return true;
> >> +   } else if (nir_block_dominates(nir_if_first_else_block(nif),
> use_block)) {
> >> +  *value = NIR_FALSE;
> >> +  return true;
> >> +   } else {
> >> +  return false;
> >> +   }
> >> +}
> >> +
> >> +static bool
> >> +evaluate_condition_use(nir_if *nif, nir_src *use_src, void *mem_ctx,
> >> +   bool is_if_condition)
> >> +{
> >> +   bool progress = false;
> >> +
> >> +   uint32_t value;
> >> +   nir_cursor cursor = nir_before_src(use_src, is_if_condition);
> >> +   if (evaluate_if_condition(nif, cursor, )) {
> >> +  replace_if_condition_use_with_const(use_src, mem_ctx, cursor,
> value,
> >> +  is_if_condition);
> >> +  progress = true;
> >> +   }
> >> +
> >> +   return progress;
> >> +}
> >> +
> >> +static bool
> >> 

Re: [Mesa-dev] [PATCH v3] nir: propagates if condition evaluation down some alu chains

2018-08-29 Thread Timothy Arceri

On 30/08/18 11:05, Ian Romanick wrote:

I feel like this would be a good candidate for some unit tests.

As a follow up, you might also consider adding bcsel.  I have seen quite
a few cases of bcsel(bool, bool, bool).


Ah yes that is probably a good one to try. I'll give it a go.



On 08/29/2018 04:48 PM, Timothy Arceri wrote:

v2:
  - only allow nir_op_inot or nir_op_b2i when alu input is 1.
  - use some helpers as suggested by Jason.

v3:
  - evaluate alu op for single input alu ops
  - add helper function to decide if to propagate through alu
  - make use of nir_before_src in another spot

shader-db IVB results:

total instructions in shared programs: 9993483 -> 9993472 (-0.00%)
instructions in affected programs: 1300 -> 1289 (-0.85%)
helped: 11
HURT: 0

total cycles in shared programs: 219476091 -> 219476059 (-0.00%)
cycles in affected programs: 7675 -> 7643 (-0.42%)
helped: 10
HURT: 1
---
  src/compiler/nir/nir_opt_if.c | 145 --
  1 file changed, 139 insertions(+), 6 deletions(-)

diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c
index 11c6693d302..9e9d8edda21 100644
--- a/src/compiler/nir/nir_opt_if.c
+++ b/src/compiler/nir/nir_opt_if.c
@@ -23,6 +23,7 @@
  
  #include "nir.h"

  #include "nir/nir_builder.h"
+#include "nir_constant_expressions.h"
  #include "nir_control_flow.h"
  #include "nir_loop_analyze.h"
  
@@ -403,9 +404,127 @@ evaluate_if_condition(nir_if *nif, nir_cursor cursor, uint32_t *value)

 }
  }
  
+/*

+ * This propagates if condition evaluation down the chain of some alu
+ * instructions. For example by checking the use of some of the following alu
+ * instruction we can eventually replace ssa_107 with NIR_TRUE.
+ *
+ *   loop {
+ *  block block_1:
+ *  vec1 32 ssa_85 = load_const (0x0002)
+ *  vec1 32 ssa_86 = ieq ssa_48, ssa_85
+ *  vec1 32 ssa_87 = load_const (0x0001)
+ *  vec1 32 ssa_88 = ieq ssa_48, ssa_87
+ *  vec1 32 ssa_89 = ior ssa_86, ssa_88
+ *  vec1 32 ssa_90 = ieq ssa_48, ssa_0
+ *  vec1 32 ssa_91 = ior ssa_89, ssa_90
+ *  if ssa_86 {
+ * block block_2:
+ * ...
+ *break
+ *  } else {
+ *block block_3:
+ *  }
+ *  block block_4:
+ *  if ssa_88 {
+ *block block_5:
+ * ...
+ *break
+ *  } else {
+ *block block_6:
+ *  }
+ *  block block_7:
+ *  if ssa_90 {
+ *block block_8:
+ * ...
+ *break
+ *  } else {
+ *block block_9:
+ *  }
+ *  block block_10:
+ *  vec1 32 ssa_107 = inot ssa_91
+ *  if ssa_107 {
+ *block block_11:
+ *break
+ *  } else {
+ *block block_12:
+ *  }
+ *   }
+ */
  static bool
-evaluate_condition_use(nir_if *nif, nir_src *use_src, void *mem_ctx,
-   bool is_if_condition)
+propagate_condition_eval(nir_builder *b, nir_if *nif, nir_src *use_src,
+ nir_src *alu_use, nir_alu_instr *alu, void *mem_ctx,
+ bool is_if_condition)
+{
+   bool progress = false;
+
+   uint32_t bool_value;
+   b->cursor = nir_before_src(alu_use, is_if_condition);
+   if (nir_op_infos[alu->op].num_inputs == 1) {
+  assert(alu->op == nir_op_inot || alu->op == nir_op_b2i);
+
+  if (evaluate_if_condition(nif, b->cursor, _value)) {
+ nir_const_value bool_src;
+ bool_src.u32[0] = bool_value;
+
+ unsigned bit_size = nir_src_bit_size(alu->src[0].src);
+ nir_const_value result =
+nir_eval_const_opcode(alu->op, 1, bit_size, _src);
+
+ replace_if_condition_use_with_const(alu_use, mem_ctx, b->cursor,
+ result.u32[0], is_if_condition);
+ progress = true;
+  }
+   } else {
+  assert(alu->op == nir_op_ior || alu->op == nir_op_iand);
+
+  if (evaluate_if_condition(nif, b->cursor, _value)) {
+ nir_ssa_def *def[2];
+ for (unsigned i = 0; i < 2; i++) {
+if (alu->src[i].src.ssa == use_src->ssa) {
+   nir_const_value const_value;
+   const_value.u32[0] = bool_value;
+
+   def[i] = nir_build_imm(b, 1, 32, const_value);
+} else {
+   def[i] = alu->src[i].src.ssa;
+}
+ }
+
+ nir_ssa_def *nalu =
+nir_build_alu(b, alu->op, def[0], def[1], NULL, NULL);
+
+ /* Rewrite use to use new alu instruction */
+ nir_src new_src = nir_src_for_ssa(nalu);
+
+ if (is_if_condition)
+nir_if_rewrite_condition(alu_use->parent_if, new_src);
+ else
+nir_instr_rewrite_src(alu_use->parent_instr, alu_use, new_src);
+
+ progress = true;
+  }
+   }
+
+   return progress;
+}
+
+static bool
+can_propagate_through_alu(nir_src *src)
+{
+   if (src->parent_instr->type == nir_instr_type_alu &&
+   

Re: [Mesa-dev] [PATCH v4 1/7] nir: evaluate if condition uses inside the if branches

2018-08-29 Thread Timothy Arceri

On 30/08/18 10:57, Ian Romanick wrote:

On 08/27/2018 02:08 AM, Timothy Arceri wrote:

Since we know what side of the branch we ended up on we can just
replace the use with a constant.

All the spill changes in shader-db are from Dolphin uber shaders,
despite some small regressions the change is clearly positive.

V2: insert new constant after any phis in the
 use->parent_instr->type == nir_instr_type_phi path.

v3:
  - use nir_after_block_before_jump() for inserting const
  - check dominance of phi uses correctly

v4:
  - create some helpers as suggested by Jason.

shader-db results IVB:

total instructions in shared programs: 201 -> 9993483 (-0.06%)
instructions in affected programs: 163235 -> 157517 (-3.50%)
helped: 132
HURT: 2

total cycles in shared programs: 231670754 -> 219476091 (-5.26%)
cycles in affected programs: 143424120 -> 131229457 (-8.50%)
helped: 115
HURT: 24

total spills in shared programs: 4383 -> 4370 (-0.30%)
spills in affected programs: 1656 -> 1643 (-0.79%)
helped: 9
HURT: 18

total fills in shared programs: 4610 -> 4581 (-0.63%)
fills in affected programs: 374 -> 345 (-7.75%)
helped: 6
HURT: 0
---
  src/compiler/nir/nir.h|  22 +++
  src/compiler/nir/nir_opt_if.c | 113 ++
  2 files changed, 135 insertions(+)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 009a6d60371..0caacd30173 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2331,6 +2331,28 @@ nir_after_block_before_jump(nir_block *block)
 }
  }
  
+static inline nir_cursor

+nir_before_src(nir_src *src, bool is_if_condition)
+{
+   if (is_if_condition) {
+  nir_block *prev_block =
+ nir_cf_node_as_block(nir_cf_node_prev(>parent_if->cf_node));
+  assert(!nir_block_ends_in_jump(prev_block));
+  return nir_after_block(prev_block);
+   } else if (src->parent_instr->type == nir_instr_type_phi) {
+  nir_phi_instr *cond_phi = nir_instr_as_phi(src->parent_instr);
+  nir_foreach_phi_src(phi_src, cond_phi) {
+ if (phi_src->src.ssa == src->ssa) {
+return nir_after_block_before_jump(phi_src->pred);
+ }
+  }
+
+  unreachable("failed to find phi src");
+   } else {
+  return nir_before_instr(src->parent_instr);
+   }
+}
+
  static inline nir_cursor
  nir_before_cf_node(nir_cf_node *node)
  {
diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c
index dacf2d6c667..11c6693d302 100644
--- a/src/compiler/nir/nir_opt_if.c
+++ b/src/compiler/nir/nir_opt_if.c
@@ -369,6 +369,76 @@ opt_if_loop_terminator(nir_if *nif)
 return true;
  }
  
+static void

+replace_if_condition_use_with_const(nir_src *use, void *mem_ctx,
+nir_cursor cursor, unsigned nir_boolean,
+bool if_condition)
+{
+   /* Create const */
+   nir_load_const_instr *load = nir_load_const_instr_create(mem_ctx, 1, 32);
+   load->value.u32[0] = nir_boolean;
+   nir_instr_insert(cursor, >instr);
+
+   /* Rewrite use to use const */
+   nir_src new_src = nir_src_for_ssa(>def);
+
+   if (if_condition)
+  nir_if_rewrite_condition(use->parent_if, new_src);
+   else
+  nir_instr_rewrite_src(use->parent_instr, use, new_src);
+}
+
+static bool
+evaluate_if_condition(nir_if *nif, nir_cursor cursor, uint32_t *value)
+{
+   nir_block *use_block = nir_cursor_current_block(cursor);
+   if (nir_block_dominates(nir_if_first_then_block(nif), use_block)) {
+  *value = NIR_TRUE;
+  return true;
+   } else if (nir_block_dominates(nir_if_first_else_block(nif), use_block)) {
+  *value = NIR_FALSE;
+  return true;
+   } else {
+  return false;
+   }
+}
+
+static bool
+evaluate_condition_use(nir_if *nif, nir_src *use_src, void *mem_ctx,
+   bool is_if_condition)
+{
+   bool progress = false;
+
+   uint32_t value;
+   nir_cursor cursor = nir_before_src(use_src, is_if_condition);
+   if (evaluate_if_condition(nif, cursor, )) {
+  replace_if_condition_use_with_const(use_src, mem_ctx, cursor, value,
+  is_if_condition);
+  progress = true;
+   }
+
+   return progress;
+}
+
+static bool
+opt_if_evaluate_condition_use(nir_if *nif, void *mem_ctx)
+{
+   bool progress = false;
+
+   /* Evaluate any uses of the if condition inside the if branches */
+   assert(nif->condition.is_ssa);
+   nir_foreach_use_safe(use_src, nif->condition.ssa) {
+  progress |= evaluate_condition_use(nif, use_src, mem_ctx, false);
+   }
+
+   nir_foreach_if_use_safe(use_src, nif->condition.ssa) {
+  if (use_src->parent_if != nif)
+ progress |= evaluate_condition_use(nif, use_src, mem_ctx, true);
+   }
+
+   return progress;
+}
+
  static bool
  opt_if_cf_list(nir_builder *b, struct exec_list *cf_list)
  {
@@ -402,6 +472,41 @@ opt_if_cf_list(nir_builder *b, struct exec_list *cf_list)
 return progress;
  }
  
+/**

+ * These optimisations depend on nir_metadata_block_index and 

Re: [Mesa-dev] [PATCH v4 1/7] nir: evaluate if condition uses inside the if branches

2018-08-29 Thread Jason Ekstrand
On Wed, Aug 29, 2018 at 7:58 PM Ian Romanick  wrote:

> On 08/27/2018 02:08 AM, Timothy Arceri wrote:
> > Since we know what side of the branch we ended up on we can just
> > replace the use with a constant.
> >
> > All the spill changes in shader-db are from Dolphin uber shaders,
> > despite some small regressions the change is clearly positive.
> >
> > V2: insert new constant after any phis in the
> > use->parent_instr->type == nir_instr_type_phi path.
> >
> > v3:
> >  - use nir_after_block_before_jump() for inserting const
> >  - check dominance of phi uses correctly
> >
> > v4:
> >  - create some helpers as suggested by Jason.
> >
> > shader-db results IVB:
> >
> > total instructions in shared programs: 201 -> 9993483 (-0.06%)
> > instructions in affected programs: 163235 -> 157517 (-3.50%)
> > helped: 132
> > HURT: 2
> >
> > total cycles in shared programs: 231670754 -> 219476091 (-5.26%)
> > cycles in affected programs: 143424120 -> 131229457 (-8.50%)
> > helped: 115
> > HURT: 24
> >
> > total spills in shared programs: 4383 -> 4370 (-0.30%)
> > spills in affected programs: 1656 -> 1643 (-0.79%)
> > helped: 9
> > HURT: 18
> >
> > total fills in shared programs: 4610 -> 4581 (-0.63%)
> > fills in affected programs: 374 -> 345 (-7.75%)
> > helped: 6
> > HURT: 0
> > ---
> >  src/compiler/nir/nir.h|  22 +++
> >  src/compiler/nir/nir_opt_if.c | 113 ++
> >  2 files changed, 135 insertions(+)
> >
> > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> > index 009a6d60371..0caacd30173 100644
> > --- a/src/compiler/nir/nir.h
> > +++ b/src/compiler/nir/nir.h
> > @@ -2331,6 +2331,28 @@ nir_after_block_before_jump(nir_block *block)
> > }
> >  }
> >
> > +static inline nir_cursor
> > +nir_before_src(nir_src *src, bool is_if_condition)
> > +{
> > +   if (is_if_condition) {
> > +  nir_block *prev_block =
> > +
>  nir_cf_node_as_block(nir_cf_node_prev(>parent_if->cf_node));
> > +  assert(!nir_block_ends_in_jump(prev_block));
> > +  return nir_after_block(prev_block);
> > +   } else if (src->parent_instr->type == nir_instr_type_phi) {
> > +  nir_phi_instr *cond_phi = nir_instr_as_phi(src->parent_instr);
> > +  nir_foreach_phi_src(phi_src, cond_phi) {
> > + if (phi_src->src.ssa == src->ssa) {
> > +return nir_after_block_before_jump(phi_src->pred);
> > + }
> > +  }
> > +
> > +  unreachable("failed to find phi src");
> > +   } else {
> > +  return nir_before_instr(src->parent_instr);
> > +   }
> > +}
> > +
> >  static inline nir_cursor
> >  nir_before_cf_node(nir_cf_node *node)
> >  {
> > diff --git a/src/compiler/nir/nir_opt_if.c
> b/src/compiler/nir/nir_opt_if.c
> > index dacf2d6c667..11c6693d302 100644
> > --- a/src/compiler/nir/nir_opt_if.c
> > +++ b/src/compiler/nir/nir_opt_if.c
> > @@ -369,6 +369,76 @@ opt_if_loop_terminator(nir_if *nif)
> > return true;
> >  }
> >
> > +static void
> > +replace_if_condition_use_with_const(nir_src *use, void *mem_ctx,
> > +nir_cursor cursor, unsigned
> nir_boolean,
> > +bool if_condition)
> > +{
> > +   /* Create const */
> > +   nir_load_const_instr *load = nir_load_const_instr_create(mem_ctx, 1,
> 32);
> > +   load->value.u32[0] = nir_boolean;
> > +   nir_instr_insert(cursor, >instr);
> > +
> > +   /* Rewrite use to use const */
> > +   nir_src new_src = nir_src_for_ssa(>def);
> > +
> > +   if (if_condition)
> > +  nir_if_rewrite_condition(use->parent_if, new_src);
> > +   else
> > +  nir_instr_rewrite_src(use->parent_instr, use, new_src);
> > +}
> > +
> > +static bool
> > +evaluate_if_condition(nir_if *nif, nir_cursor cursor, uint32_t *value)
> > +{
> > +   nir_block *use_block = nir_cursor_current_block(cursor);
> > +   if (nir_block_dominates(nir_if_first_then_block(nif), use_block)) {
> > +  *value = NIR_TRUE;
> > +  return true;
> > +   } else if (nir_block_dominates(nir_if_first_else_block(nif),
> use_block)) {
> > +  *value = NIR_FALSE;
> > +  return true;
> > +   } else {
> > +  return false;
> > +   }
> > +}
> > +
> > +static bool
> > +evaluate_condition_use(nir_if *nif, nir_src *use_src, void *mem_ctx,
> > +   bool is_if_condition)
> > +{
> > +   bool progress = false;
> > +
> > +   uint32_t value;
> > +   nir_cursor cursor = nir_before_src(use_src, is_if_condition);
> > +   if (evaluate_if_condition(nif, cursor, )) {
> > +  replace_if_condition_use_with_const(use_src, mem_ctx, cursor,
> value,
> > +  is_if_condition);
> > +  progress = true;
> > +   }
> > +
> > +   return progress;
> > +}
> > +
> > +static bool
> > +opt_if_evaluate_condition_use(nir_if *nif, void *mem_ctx)
> > +{
> > +   bool progress = false;
> > +
> > +   /* Evaluate any uses of the if condition inside the if branches */
> > +   assert(nif->condition.is_ssa);
> > +   nir_foreach_use_safe(use_src, 

Re: [Mesa-dev] [PATCH v3] nir: propagates if condition evaluation down some alu chains

2018-08-29 Thread Jason Ekstrand
On Wed, Aug 29, 2018 at 6:48 PM Timothy Arceri 
wrote:

> v2:
>  - only allow nir_op_inot or nir_op_b2i when alu input is 1.
>  - use some helpers as suggested by Jason.
>
> v3:
>  - evaluate alu op for single input alu ops
>  - add helper function to decide if to propagate through alu
>  - make use of nir_before_src in another spot
>
> shader-db IVB results:
>
> total instructions in shared programs: 9993483 -> 9993472 (-0.00%)
> instructions in affected programs: 1300 -> 1289 (-0.85%)
> helped: 11
> HURT: 0
>
> total cycles in shared programs: 219476091 -> 219476059 (-0.00%)
> cycles in affected programs: 7675 -> 7643 (-0.42%)
> helped: 10
> HURT: 1
> ---
>  src/compiler/nir/nir_opt_if.c | 145 --
>  1 file changed, 139 insertions(+), 6 deletions(-)
>
> diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c
> index 11c6693d302..9e9d8edda21 100644
> --- a/src/compiler/nir/nir_opt_if.c
> +++ b/src/compiler/nir/nir_opt_if.c
> @@ -23,6 +23,7 @@
>
>  #include "nir.h"
>  #include "nir/nir_builder.h"
> +#include "nir_constant_expressions.h"
>  #include "nir_control_flow.h"
>  #include "nir_loop_analyze.h"
>
> @@ -403,9 +404,127 @@ evaluate_if_condition(nir_if *nif, nir_cursor
> cursor, uint32_t *value)
> }
>  }
>
> +/*
> + * This propagates if condition evaluation down the chain of some alu
> + * instructions. For example by checking the use of some of the following
> alu
> + * instruction we can eventually replace ssa_107 with NIR_TRUE.
> + *
> + *   loop {
> + *  block block_1:
> + *  vec1 32 ssa_85 = load_const (0x0002)
> + *  vec1 32 ssa_86 = ieq ssa_48, ssa_85
> + *  vec1 32 ssa_87 = load_const (0x0001)
> + *  vec1 32 ssa_88 = ieq ssa_48, ssa_87
> + *  vec1 32 ssa_89 = ior ssa_86, ssa_88
> + *  vec1 32 ssa_90 = ieq ssa_48, ssa_0
> + *  vec1 32 ssa_91 = ior ssa_89, ssa_90
> + *  if ssa_86 {
> + * block block_2:
> + * ...
> + *break
> + *  } else {
> + *block block_3:
> + *  }
> + *  block block_4:
> + *  if ssa_88 {
> + *block block_5:
> + * ...
> + *break
> + *  } else {
> + *block block_6:
> + *  }
> + *  block block_7:
> + *  if ssa_90 {
> + *block block_8:
> + * ...
> + *break
> + *  } else {
> + *block block_9:
> + *  }
> + *  block block_10:
> + *  vec1 32 ssa_107 = inot ssa_91
> + *  if ssa_107 {
> + *block block_11:
> + *break
> + *  } else {
> + *block block_12:
> + *  }
> + *   }
> + */
>  static bool
> -evaluate_condition_use(nir_if *nif, nir_src *use_src, void *mem_ctx,
> -   bool is_if_condition)
> +propagate_condition_eval(nir_builder *b, nir_if *nif, nir_src *use_src,
> + nir_src *alu_use, nir_alu_instr *alu, void
> *mem_ctx,
> + bool is_if_condition)
> +{
> +   bool progress = false;
> +
> +   uint32_t bool_value;
> +   b->cursor = nir_before_src(alu_use, is_if_condition);
> +   if (nir_op_infos[alu->op].num_inputs == 1) {
> +  assert(alu->op == nir_op_inot || alu->op == nir_op_b2i);
> +
> +  if (evaluate_if_condition(nif, b->cursor, _value)) {
> + nir_const_value bool_src;
> + bool_src.u32[0] = bool_value;
>

Since you're just going to put bool_value in bool_src and const_value
below, why not just have bool_value be a nir_const_value and do
evaluate_if_condition(nif, b->cursor, _value.u32[0]) and be done with
it?


> +
> + unsigned bit_size = nir_src_bit_size(alu->src[0].src);
>

This had better be 32 or we're toast because we're assuming a uint32_t the
whole time.  Maybe make this an assert instead?

+ nir_const_value result =
> +nir_eval_const_opcode(alu->op, 1, bit_size, _src);
> +
> + replace_if_condition_use_with_const(alu_use, mem_ctx, b->cursor,
> + result.u32[0],
> is_if_condition);
> + progress = true;
> +  }
> +   } else {
> +  assert(alu->op == nir_op_ior || alu->op == nir_op_iand);
> +
> +  if (evaluate_if_condition(nif, b->cursor, _value)) {
> + nir_ssa_def *def[2];
> + for (unsigned i = 0; i < 2; i++) {
> +if (alu->src[i].src.ssa == use_src->ssa) {
> +   nir_const_value const_value;
> +   const_value.u32[0] = bool_value;
> +
> +   def[i] = nir_build_imm(b, 1, 32, const_value);
>

We assume 32 here, for instance.

With the above two clean-ups,

Reviewed-by: Jason Ekstrand 


> +} else {
> +   def[i] = alu->src[i].src.ssa;
> +}
> + }
> +
> + nir_ssa_def *nalu =
> +nir_build_alu(b, alu->op, def[0], def[1], NULL, NULL);
> +
> + /* Rewrite use to use new alu instruction */
> + nir_src new_src = nir_src_for_ssa(nalu);
> +
> 

Re: [Mesa-dev] [PATCH v4 6/7] nir: add loop unroll support for wrapper loops

2018-08-29 Thread Ian Romanick
On 08/27/2018 02:08 AM, Timothy Arceri wrote:
> This adds support for unrolling the classic
> 
> do {
> // ...
> } while (false)
> 
> that is used to wrap multi-line macros. GLSL IR also wraps switch
> statements in a loop like this.

Yes!  This has been several items down on my to-do list for months.

Reviewed-by: Ian Romanick 
Thanked-by: Ian Romanick 

> 
> shader-db results IVB:
> 
> total loops in shared programs: 2515 -> 2512 (-0.12%)
> loops in affected programs: 33 -> 30 (-9.09%)
> helped: 3
> HURT: 0
> ---
>  src/compiler/nir/nir_opt_loop_unroll.c | 77 ++
>  1 file changed, 77 insertions(+)
> 
> diff --git a/src/compiler/nir/nir_opt_loop_unroll.c 
> b/src/compiler/nir/nir_opt_loop_unroll.c
> index e0e0b754716..9c33267cb72 100644
> --- a/src/compiler/nir/nir_opt_loop_unroll.c
> +++ b/src/compiler/nir/nir_opt_loop_unroll.c
> @@ -465,6 +465,65 @@ complex_unroll(nir_loop *loop, nir_loop_terminator 
> *unlimit_term,
> _mesa_hash_table_destroy(remap_table, NULL);
>  }
>  
> +/* Unrolls the classic wrapper loops e.g
> + *
> + *do {
> + *// ...
> + *} while (false)
> + */
> +static bool
> +wrapper_unroll(nir_loop *loop)
> +{
> +   bool progress = false;
> +
> +   nir_block *blk_after_loop =
> +  nir_cursor_current_block(nir_after_cf_node(>cf_node));
> +
> +   /* There may still be some single src phis following the loop that
> +* have not yet been cleaned up by another pass. Tidy those up before
> +* unrolling the loop.
> +*/
> +   nir_foreach_instr_safe(instr, blk_after_loop) {
> +  if (instr->type != nir_instr_type_phi)
> + break;
> +
> +  nir_phi_instr *phi = nir_instr_as_phi(instr);
> +  assert(exec_list_length(>srcs) == 1);
> +
> +  nir_phi_src *phi_src = exec_node_data(nir_phi_src,
> +exec_list_get_head(>srcs),
> +node);
> +
> +  nir_ssa_def_rewrite_uses(>dest.ssa, phi_src->src);
> +  nir_instr_remove(instr);
> +
> +  progress = true;
> +   }
> +
> +   nir_block *last_loop_blk = nir_loop_last_block(loop);
> +   if (nir_block_ends_in_break(last_loop_blk)) {
> +
> +  /* Remove break at end of the loop */
> +  nir_instr *break_instr = nir_block_last_instr(last_loop_blk);
> +  nir_instr_remove(break_instr);
> +
> +  /* Pluck out the loop body. */
> +  nir_cf_list loop_body;
> +  nir_cf_extract(_body, 
> nir_before_block(nir_loop_first_block(loop)),
> + nir_after_block(nir_loop_last_block(loop)));
> +
> +  /* Reinsert loop body after the loop */
> +  nir_cf_reinsert(_body, nir_after_cf_node(>cf_node));
> +
> +  /* The loop has been unrolled so remove it. */
> +  nir_cf_node_remove(>cf_node);
> +
> +  progress = true;
> +   }
> +
> +   return progress;
> +}
> +
>  static bool
>  is_loop_small_enough_to_unroll(nir_shader *shader, nir_loop_info *li)
>  {
> @@ -516,6 +575,24 @@ process_loops(nir_shader *sh, nir_cf_node *cf_node, bool 
> *has_nested_loop_out)
>  */
> if (!progress) {
>  
> +  /* Check for the classic
> +   *
> +   *do {
> +   *// ...
> +   *} while (false)
> +   *
> +   * that is used to wrap multi-line macros. GLSL IR also wraps switch
> +   * statements in a loop like this.
> +   */
> +  if (loop->info->limiting_terminator == NULL &&
> +  list_empty(>info->loop_terminator_list) &&
> +  !loop->info->complex_loop) {
> +
> + progress = wrapper_unroll(loop);
> +
> + goto exit;
> +  }
> +
>if (has_nested_loop || loop->info->limiting_terminator == NULL)
>   goto exit;
>  
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3] nir: propagates if condition evaluation down some alu chains

2018-08-29 Thread Ian Romanick
I feel like this would be a good candidate for some unit tests.

As a follow up, you might also consider adding bcsel.  I have seen quite
a few cases of bcsel(bool, bool, bool).

On 08/29/2018 04:48 PM, Timothy Arceri wrote:
> v2:
>  - only allow nir_op_inot or nir_op_b2i when alu input is 1.
>  - use some helpers as suggested by Jason.
> 
> v3:
>  - evaluate alu op for single input alu ops
>  - add helper function to decide if to propagate through alu
>  - make use of nir_before_src in another spot
> 
> shader-db IVB results:
> 
> total instructions in shared programs: 9993483 -> 9993472 (-0.00%)
> instructions in affected programs: 1300 -> 1289 (-0.85%)
> helped: 11
> HURT: 0
> 
> total cycles in shared programs: 219476091 -> 219476059 (-0.00%)
> cycles in affected programs: 7675 -> 7643 (-0.42%)
> helped: 10
> HURT: 1
> ---
>  src/compiler/nir/nir_opt_if.c | 145 --
>  1 file changed, 139 insertions(+), 6 deletions(-)
> 
> diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c
> index 11c6693d302..9e9d8edda21 100644
> --- a/src/compiler/nir/nir_opt_if.c
> +++ b/src/compiler/nir/nir_opt_if.c
> @@ -23,6 +23,7 @@
>  
>  #include "nir.h"
>  #include "nir/nir_builder.h"
> +#include "nir_constant_expressions.h"
>  #include "nir_control_flow.h"
>  #include "nir_loop_analyze.h"
>  
> @@ -403,9 +404,127 @@ evaluate_if_condition(nir_if *nif, nir_cursor cursor, 
> uint32_t *value)
> }
>  }
>  
> +/*
> + * This propagates if condition evaluation down the chain of some alu
> + * instructions. For example by checking the use of some of the following alu
> + * instruction we can eventually replace ssa_107 with NIR_TRUE.
> + *
> + *   loop {
> + *  block block_1:
> + *  vec1 32 ssa_85 = load_const (0x0002)
> + *  vec1 32 ssa_86 = ieq ssa_48, ssa_85
> + *  vec1 32 ssa_87 = load_const (0x0001)
> + *  vec1 32 ssa_88 = ieq ssa_48, ssa_87
> + *  vec1 32 ssa_89 = ior ssa_86, ssa_88
> + *  vec1 32 ssa_90 = ieq ssa_48, ssa_0
> + *  vec1 32 ssa_91 = ior ssa_89, ssa_90
> + *  if ssa_86 {
> + * block block_2:
> + * ...
> + *break
> + *  } else {
> + *block block_3:
> + *  }
> + *  block block_4:
> + *  if ssa_88 {
> + *block block_5:
> + * ...
> + *break
> + *  } else {
> + *block block_6:
> + *  }
> + *  block block_7:
> + *  if ssa_90 {
> + *block block_8:
> + * ...
> + *break
> + *  } else {
> + *block block_9:
> + *  }
> + *  block block_10:
> + *  vec1 32 ssa_107 = inot ssa_91
> + *  if ssa_107 {
> + *block block_11:
> + *break
> + *  } else {
> + *block block_12:
> + *  }
> + *   }
> + */
>  static bool
> -evaluate_condition_use(nir_if *nif, nir_src *use_src, void *mem_ctx,
> -   bool is_if_condition)
> +propagate_condition_eval(nir_builder *b, nir_if *nif, nir_src *use_src,
> + nir_src *alu_use, nir_alu_instr *alu, void *mem_ctx,
> + bool is_if_condition)
> +{
> +   bool progress = false;
> +
> +   uint32_t bool_value;
> +   b->cursor = nir_before_src(alu_use, is_if_condition);
> +   if (nir_op_infos[alu->op].num_inputs == 1) {
> +  assert(alu->op == nir_op_inot || alu->op == nir_op_b2i);
> +
> +  if (evaluate_if_condition(nif, b->cursor, _value)) {
> + nir_const_value bool_src;
> + bool_src.u32[0] = bool_value;
> +
> + unsigned bit_size = nir_src_bit_size(alu->src[0].src);
> + nir_const_value result =
> +nir_eval_const_opcode(alu->op, 1, bit_size, _src);
> +
> + replace_if_condition_use_with_const(alu_use, mem_ctx, b->cursor,
> + result.u32[0], is_if_condition);
> + progress = true;
> +  }
> +   } else {
> +  assert(alu->op == nir_op_ior || alu->op == nir_op_iand);
> +
> +  if (evaluate_if_condition(nif, b->cursor, _value)) {
> + nir_ssa_def *def[2];
> + for (unsigned i = 0; i < 2; i++) {
> +if (alu->src[i].src.ssa == use_src->ssa) {
> +   nir_const_value const_value;
> +   const_value.u32[0] = bool_value;
> +
> +   def[i] = nir_build_imm(b, 1, 32, const_value);
> +} else {
> +   def[i] = alu->src[i].src.ssa;
> +}
> + }
> +
> + nir_ssa_def *nalu =
> +nir_build_alu(b, alu->op, def[0], def[1], NULL, NULL);
> +
> + /* Rewrite use to use new alu instruction */
> + nir_src new_src = nir_src_for_ssa(nalu);
> +
> + if (is_if_condition)
> +nir_if_rewrite_condition(alu_use->parent_if, new_src);
> + else
> +nir_instr_rewrite_src(alu_use->parent_instr, alu_use, new_src);
> +
> + progress = true;
> +  }
> +   }
> +
> +   

Re: [Mesa-dev] [PATCH] glsl/linker: Link all out vars from a shader objects on a single stage

2018-08-29 Thread Timothy Arceri

On 30/08/18 08:16, Andres Gomez wrote:

Vadym, should we also include this in the stable queues ?



Yes. It should be fine to add this to stable. Thanks.


On Mon, 2018-08-27 at 15:20 +0300, Vadym Shovkoplias wrote:

From: "vadym.shovkoplias" 

During intra stage linking some out variables can be dropped because
it is not used in a shader with the main function. But these out vars
can be referenced on later stages which can lead to further linking
errors.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105731
Signed-off-by: Vadym Shovkoplias 
---
  src/compiler/glsl/linker.cpp | 38 
  1 file changed, 38 insertions(+)

diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
index 3ce78fe642..3b0c01c316 100644
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -2187,6 +2187,41 @@ link_cs_input_layout_qualifiers(struct gl_shader_program 
*prog,
 }
  }
  
+/**

+ * Link all out variables on a single stage which are not
+ * directly used in a shader with the main function.
+ */
+static void
+link_output_variables(struct gl_linked_shader *linked_shader,
+  struct gl_shader **shader_list,
+  unsigned num_shaders)
+{
+   struct glsl_symbol_table *symbols = linked_shader->symbols;
+
+   for (unsigned i = 0; i < num_shaders; i++) {
+
+  /* Skip shader object with main function */
+  if (shader_list[i]->symbols->get_function("main"))
+ continue;
+
+  foreach_in_list (ir_instruction, ir, shader_list[i]->ir) {
+
+ if (ir->ir_type != ir_type_variable)
+continue;
+
+ ir_variable *const var = (ir_variable *) ir;
+
+ if (var->data.mode == ir_var_shader_out &&
+   !symbols->get_variable(var->name)) {
+symbols->add_variable(var);
+linked_shader->ir->push_head(var);
+ }
+  }
+   }
+
+   return;
+}
+
  
  /**

   * Combine a group of shaders for a single stage to generate a linked shader
@@ -2352,6 +2387,9 @@ link_intrastage_shaders(void *mem_ctx,
return NULL;
 }
  
+   if (linked->Stage != MESA_SHADER_FRAGMENT)

+  link_output_variables(linked, shader_list, num_shaders);
+
 /* Make a pass over all variable declarations to ensure that arrays with
  * unspecified sizes have a size specified.  The size is inferred from the
  * max_array_access field.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 105731] linker error "fragment shader input ... has no matching output in the previous stage" when previous stage's output declaration in a separate shader object

2018-08-29 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=105731

--- Comment #4 from Timothy Arceri  ---
(In reply to Mark Janes from comment #3)
> Vadym, can you make a piglit test for this bug?

Hi Mark, this was already done :)

commit c98669cbd1f801c8fda25aceab23b5c54de76b9e
Author: Vadym Shovkoplias 
Date:   Mon Aug 27 15:19:40 2018 +0300

glsl-1.30: add linker test for inter stage in/out vars usage

This test exposes a Mesa GLSL linker bug. The test fails with the
following error message:

   error: fragment shader input `foo' has no matching output in the
previous
  stage

Signed-off-by: Vadym Shovkoplias 
Reviewed-by: Timothy Arceri 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105731

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v4 1/7] nir: evaluate if condition uses inside the if branches

2018-08-29 Thread Ian Romanick
On 08/27/2018 02:08 AM, Timothy Arceri wrote:
> Since we know what side of the branch we ended up on we can just
> replace the use with a constant.
> 
> All the spill changes in shader-db are from Dolphin uber shaders,
> despite some small regressions the change is clearly positive.
> 
> V2: insert new constant after any phis in the
> use->parent_instr->type == nir_instr_type_phi path.
> 
> v3:
>  - use nir_after_block_before_jump() for inserting const
>  - check dominance of phi uses correctly
> 
> v4:
>  - create some helpers as suggested by Jason.
> 
> shader-db results IVB:
> 
> total instructions in shared programs: 201 -> 9993483 (-0.06%)
> instructions in affected programs: 163235 -> 157517 (-3.50%)
> helped: 132
> HURT: 2
> 
> total cycles in shared programs: 231670754 -> 219476091 (-5.26%)
> cycles in affected programs: 143424120 -> 131229457 (-8.50%)
> helped: 115
> HURT: 24
> 
> total spills in shared programs: 4383 -> 4370 (-0.30%)
> spills in affected programs: 1656 -> 1643 (-0.79%)
> helped: 9
> HURT: 18
> 
> total fills in shared programs: 4610 -> 4581 (-0.63%)
> fills in affected programs: 374 -> 345 (-7.75%)
> helped: 6
> HURT: 0
> ---
>  src/compiler/nir/nir.h|  22 +++
>  src/compiler/nir/nir_opt_if.c | 113 ++
>  2 files changed, 135 insertions(+)
> 
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index 009a6d60371..0caacd30173 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -2331,6 +2331,28 @@ nir_after_block_before_jump(nir_block *block)
> }
>  }
>  
> +static inline nir_cursor
> +nir_before_src(nir_src *src, bool is_if_condition)
> +{
> +   if (is_if_condition) {
> +  nir_block *prev_block =
> + nir_cf_node_as_block(nir_cf_node_prev(>parent_if->cf_node));
> +  assert(!nir_block_ends_in_jump(prev_block));
> +  return nir_after_block(prev_block);
> +   } else if (src->parent_instr->type == nir_instr_type_phi) {
> +  nir_phi_instr *cond_phi = nir_instr_as_phi(src->parent_instr);
> +  nir_foreach_phi_src(phi_src, cond_phi) {
> + if (phi_src->src.ssa == src->ssa) {
> +return nir_after_block_before_jump(phi_src->pred);
> + }
> +  }
> +
> +  unreachable("failed to find phi src");
> +   } else {
> +  return nir_before_instr(src->parent_instr);
> +   }
> +}
> +
>  static inline nir_cursor
>  nir_before_cf_node(nir_cf_node *node)
>  {
> diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c
> index dacf2d6c667..11c6693d302 100644
> --- a/src/compiler/nir/nir_opt_if.c
> +++ b/src/compiler/nir/nir_opt_if.c
> @@ -369,6 +369,76 @@ opt_if_loop_terminator(nir_if *nif)
> return true;
>  }
>  
> +static void
> +replace_if_condition_use_with_const(nir_src *use, void *mem_ctx,
> +nir_cursor cursor, unsigned nir_boolean,
> +bool if_condition)
> +{
> +   /* Create const */
> +   nir_load_const_instr *load = nir_load_const_instr_create(mem_ctx, 1, 32);
> +   load->value.u32[0] = nir_boolean;
> +   nir_instr_insert(cursor, >instr);
> +
> +   /* Rewrite use to use const */
> +   nir_src new_src = nir_src_for_ssa(>def);
> +
> +   if (if_condition)
> +  nir_if_rewrite_condition(use->parent_if, new_src);
> +   else
> +  nir_instr_rewrite_src(use->parent_instr, use, new_src);
> +}
> +
> +static bool
> +evaluate_if_condition(nir_if *nif, nir_cursor cursor, uint32_t *value)
> +{
> +   nir_block *use_block = nir_cursor_current_block(cursor);
> +   if (nir_block_dominates(nir_if_first_then_block(nif), use_block)) {
> +  *value = NIR_TRUE;
> +  return true;
> +   } else if (nir_block_dominates(nir_if_first_else_block(nif), use_block)) {
> +  *value = NIR_FALSE;
> +  return true;
> +   } else {
> +  return false;
> +   }
> +}
> +
> +static bool
> +evaluate_condition_use(nir_if *nif, nir_src *use_src, void *mem_ctx,
> +   bool is_if_condition)
> +{
> +   bool progress = false;
> +
> +   uint32_t value;
> +   nir_cursor cursor = nir_before_src(use_src, is_if_condition);
> +   if (evaluate_if_condition(nif, cursor, )) {
> +  replace_if_condition_use_with_const(use_src, mem_ctx, cursor, value,
> +  is_if_condition);
> +  progress = true;
> +   }
> +
> +   return progress;
> +}
> +
> +static bool
> +opt_if_evaluate_condition_use(nir_if *nif, void *mem_ctx)
> +{
> +   bool progress = false;
> +
> +   /* Evaluate any uses of the if condition inside the if branches */
> +   assert(nif->condition.is_ssa);
> +   nir_foreach_use_safe(use_src, nif->condition.ssa) {
> +  progress |= evaluate_condition_use(nif, use_src, mem_ctx, false);
> +   }
> +
> +   nir_foreach_if_use_safe(use_src, nif->condition.ssa) {
> +  if (use_src->parent_if != nif)
> + progress |= evaluate_condition_use(nif, use_src, mem_ctx, true);
> +   }
> +
> +   return progress;
> +}
> +
>  static 

[Mesa-dev] [PATCH] radv/meta: Set num_components on image_store intrinsics

2018-08-29 Thread Jason Ekstrand
Now that image load/store intrinsics are variable-width, we need to set
num_components accordingly.  In 15d39f474b890, both glsl_to_nir and
spirv_to_nir were updated to properly set num_components but radv meta
was left behind.

Fixes: 15d39f474b890 "nir: Make image load/store intrinsics..."
---
 src/amd/vulkan/radv_meta_bufimage.c   | 4 
 src/amd/vulkan/radv_meta_fast_clear.c | 1 +
 src/amd/vulkan/radv_meta_resolve_cs.c | 1 +
 3 files changed, 6 insertions(+)

diff --git a/src/amd/vulkan/radv_meta_bufimage.c 
b/src/amd/vulkan/radv_meta_bufimage.c
index aa17c25833b..8a9ac7a8ea4 100644
--- a/src/amd/vulkan/radv_meta_bufimage.c
+++ b/src/amd/vulkan/radv_meta_bufimage.c
@@ -116,6 +116,7 @@ build_nir_itob_compute_shader(struct radv_device *dev, bool 
is_3d)
 
nir_ssa_def *outval = >dest.ssa;
nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, 
nir_intrinsic_image_deref_store);
+   store->num_components = 4;
store->src[0] = nir_src_for_ssa(_build_deref_var(, 
output_img)->dest.ssa);
store->src[1] = nir_src_for_ssa(coord);
store->src[2] = nir_src_for_ssa(nir_ssa_undef(, 1, 32));
@@ -342,6 +343,7 @@ build_nir_btoi_compute_shader(struct radv_device *dev, bool 
is_3d)
 
nir_ssa_def *outval = >dest.ssa;
nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, 
nir_intrinsic_image_deref_store);
+   store->num_components = 4;
store->src[0] = nir_src_for_ssa(_build_deref_var(, 
output_img)->dest.ssa);
store->src[1] = nir_src_for_ssa(img_coord);
store->src[2] = nir_src_for_ssa(nir_ssa_undef(, 1, 32));
@@ -557,6 +559,7 @@ build_nir_itoi_compute_shader(struct radv_device *dev, bool 
is_3d)
 
nir_ssa_def *outval = >dest.ssa;
nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, 
nir_intrinsic_image_deref_store);
+store->num_components = 4;
store->src[0] = nir_src_for_ssa(_build_deref_var(, 
output_img)->dest.ssa);
store->src[1] = nir_src_for_ssa(dst_coord);
store->src[2] = nir_src_for_ssa(nir_ssa_undef(, 1, 32));
@@ -753,6 +756,7 @@ build_nir_cleari_compute_shader(struct radv_device *dev, 
bool is_3d)
global_id = nir_vec(, comps, 4);
 
nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, 
nir_intrinsic_image_deref_store);
+   store->num_components = 4;
store->src[0] = nir_src_for_ssa(_build_deref_var(, 
output_img)->dest.ssa);
store->src[1] = nir_src_for_ssa(global_id);
store->src[2] = nir_src_for_ssa(nir_ssa_undef(, 1, 32));
diff --git a/src/amd/vulkan/radv_meta_fast_clear.c 
b/src/amd/vulkan/radv_meta_fast_clear.c
index b4cc900028e..9544ee94f5c 100644
--- a/src/amd/vulkan/radv_meta_fast_clear.c
+++ b/src/amd/vulkan/radv_meta_fast_clear.c
@@ -92,6 +92,7 @@ build_dcc_decompress_compute_shader(struct radv_device *dev)
 
nir_ssa_def *outval = >dest.ssa;
nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, 
nir_intrinsic_image_deref_store);
+   store->num_components = 4;
store->src[0] = nir_src_for_ssa(_build_deref_var(, 
output_img)->dest.ssa);
store->src[1] = nir_src_for_ssa(global_id);
store->src[2] = nir_src_for_ssa(nir_ssa_undef(, 1, 32));
diff --git a/src/amd/vulkan/radv_meta_resolve_cs.c 
b/src/amd/vulkan/radv_meta_resolve_cs.c
index fca49a01bb0..2fcfb0aaeff 100644
--- a/src/amd/vulkan/radv_meta_resolve_cs.c
+++ b/src/amd/vulkan/radv_meta_resolve_cs.c
@@ -136,6 +136,7 @@ build_resolve_compute_shader(struct radv_device *dev, bool 
is_integer, bool is_s
 
nir_ssa_def *coord = nir_iadd(, global_id, _offset->dest.ssa);
nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, 
nir_intrinsic_image_deref_store);
+   store->num_components = 4;
store->src[0] = nir_src_for_ssa(_build_deref_var(, 
output_img)->dest.ssa);
store->src[1] = nir_src_for_ssa(coord);
store->src[2] = nir_src_for_ssa(nir_ssa_undef(, 1, 32));
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] intel: limit urb size for SKL/KBL/CFL GT1

2018-08-29 Thread Lionel Landwerlin
The documentation puts the URB size for SKL GT1 as "128K - 192K". I
guess this means we can't tell which one it is, so we have to go for
the lower bound. This change also changes the max VS URB entries which
is lower on GT1 skus.

Fixes a CTS test :

  dEQP-GLES31.functional.geometry_shading.layered.render_with_default_layer_3d

Signed-off-by: Lionel Landwerlin 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107505
---
 src/intel/dev/gen_device_info.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/intel/dev/gen_device_info.c b/src/intel/dev/gen_device_info.c
index b0ae4d18034..ed1e73efa61 100644
--- a/src/intel/dev/gen_device_info.c
+++ b/src/intel/dev/gen_device_info.c
@@ -617,7 +617,8 @@ static const struct gen_device_info gen_device_info_skl_gt1 
= {
.num_subslices = { 2, },
.num_eu_per_subslice = 6,
.l3_banks = 2,
-   .urb.size = 192,
+   .urb.size = 128,
+   .urb.max_entries[MESA_SHADER_VERTEX] = 928,
.simulator_id = 12,
 };
 
@@ -689,6 +690,8 @@ static const struct gen_device_info gen_device_info_kbl_gt1 
= {
.num_subslices = { 2, },
.num_eu_per_subslice = 6,
.l3_banks = 2,
+   .urb.size = 128,
+   .urb.max_entries[MESA_SHADER_VERTEX] = 928,
.simulator_id = 16,
 };
 
@@ -775,6 +778,8 @@ static const struct gen_device_info gen_device_info_cfl_gt1 
= {
.num_subslices = { 2, },
.num_eu_per_subslice = 6,
.l3_banks = 2,
+   .urb.size = 128,
+   .urb.max_entries[MESA_SHADER_VERTEX] = 928,
.simulator_id = 24,
 };
 static const struct gen_device_info gen_device_info_cfl_gt2 = {
-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [ANNOUNCE] mesa 18.2.0-rc5

2018-08-29 Thread Andres Gomez
Hello list,

The fifth release candidate for the Mesa 18.2.0 is now available.

As per the issue tracker [1] we still have a number of outstanding bugs
blocking the release.

[1] https://bugs.freedesktop.org/show_bug.cgi?id=107457

Currently we have:
 - 23 queued
 - 11 nominated (outstanding)
 - and 0 rejected patches


We have in the queue:

In Mesa Core we have included a correction to actually expose the
GL_EXT_robustness extension for GLES.

The GLSL compiler has received a fix preventing an incorrect linking
error when having allowed unused in blocks with not corresponding out
blocks in the previous stage. Also, its tests have been made sure that
they should be executed from the build system check target or, at the
very least, with an explicitly versioned python executable.

AMD's drivers have received multiple fixes, including one to correct
some rendering with radv for Super Mario Sunshine with the Dolphin
emulator and another one detected due to a segmentation fault in Rise
of the Tomb Raider.

Intel's drivers have also received multiple fixes, including one to
correct a GPU hang in DOOM 2016 running under wine.

The documentation has also gotten a couple of fixes, to note 0.8.0 as
the minimum required mako version, and to add 3 more features into the
18.2's release notes.

Finally, from build and integration point of view, we have multiple
fixes for meson, correcting EGL's compilation, making the GLSL tests to
explicitly run with python, and to actually load translation files.


Take a look at section "Mesa stable queue" for more information.


Testing reports/general approval


Any testing reports (or general approval of the state of the branch)
will be greatly appreciated.

The plan is to have the sixth release candidate for 18.2.0 next
Wednesday (2018/09/05), around or shortly after 18:00 EEST.

If you have any questions or suggestions - be that about the current
patch queue or otherwise, please go ahead.


Trivial merge conflicts
---

commit f958837964784315c1dc633f41f1ef5d2a17aea9
Author: Dylan Baker 

meson: Actually load translation files

(cherry picked from commit 7c00db9527245d80cb748ec3442163585a5463a6)

commit f6dccf66865c31b13f48b50891a9f5a0d9949b1c
Author: Emil Velikov 

glsl: remove execute bit and shebang from python tests

(cherry picked from commit 48820ed8da0ad50d51a58f26e156d82b685492e2)


Br,
Andres


Mesa stable queue
-

Nominated (11)
==

Andrii Simiklit (1):
  1b0df8a4602 i965/gen6/xfb: handle case where transform feedback is not 
active

Bas Nieuwenhuizen (1):
  4738b6ac814 radv: Add missing checks in radv_get_image_format_properties.

Jason Ekstrand (6):
  4ffb575da59 vulkan/alloc: Add a vk_strdup helper
  8c048af5890 anv: Copy the appliation info into the instance
  c92a463d234 anv: Claim to support depthBounds for ID games
  cdea5d996ed anv: Free the app and engine name
  116b47fe3c0 nir/algebraic: Be more careful converting ushr to 
extract_u8/16
  7cdf8f93390 nir/format_convert: Fix a bitmask in unpack_11f11f10f

Lionel Landwerlin (1):
  5a1c23d1502 anv: blorp: support multiple aspect blits

Marek Olšák (1):
  1e40f694831 ac/surface: fix CMASK fast clear for NPOT textures with 
mipmapping on SI/CI/VI

Tapani Pälli (1):
  a72dbc461bd mesa: allow GL_UNSIGNED_BYTE type for SNORM reads


Queued (23)
===

Andres Gomez (1):
  Update version to 18.2.0-rc5

Dylan Baker (1):
  meson: Actually load translation files

Emil Velikov (2):
  docs: update required mako version
  glsl: remove execute bit and shebang from python tests

Grazvydas Ignotas (1):
  radv: place pointer length into cache uuid

Gurchetan Singh (2):
  meson: fix egl build for surfaceless
  meson: fix egl build for android

Jason Ekstrand (4):
  anv: Fill holes in the VF VUE to zero
  intel/decoder: Clean up field iteration and fix sub-dword fields
  intel/batch_decoder: Fix dynamic state printing
  intel/batch_decoder: Print blend states properly

Lionel Landwerlin (2):
  intel: decoder: unify MI_BB_START field naming
  intel: decoder: handle 0 sized structs

Marek Olšák (2):
  ac: completely remove +auto-waitcnt-before-barrier
  glapi: actually implement GL_EXT_robustness for GLES

Mathieu Bridon (1):
  meson: Run the test with Python

Nanley Chery (3):
  i965/miptree: Use miptree_map in map_blit functions
  i965/miptree: Fix can_blit_slice()
  intel/isl: Avoid tiling some 16K-wide render targets

Rhys Perry (1):
  docs: add forgotten features to 18.2.0 release notes

Samuel Pitoiset (2):
  radv: remove dead variables after splitting per member structs
  ac/nir: fix getting GLSL type of array of samplers for TG4

vadym.shovkoplias (1):
  glsl/linker: Allow unused in blocks which are not declated on 

[Mesa-dev] [PATCH 1/9] mesa: move legacy TCL dri config options

2018-08-29 Thread Timothy Arceri
---
 src/mesa/drivers/dri/radeon/radeon_screen.c | 10 +
 src/mesa/drivers/dri/radeon/radeon_screen.h |  5 +
 src/util/xmlpool/ca.po  | 23 -
 src/util/xmlpool/de.po  | 23 -
 src/util/xmlpool/es.po  | 23 -
 src/util/xmlpool/fr.po  | 23 -
 src/util/xmlpool/nl.po  | 23 -
 src/util/xmlpool/sv.po  | 22 
 src/util/xmlpool/t_options.h| 14 -
 9 files changed, 15 insertions(+), 151 deletions(-)

diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c 
b/src/mesa/drivers/dri/radeon/radeon_screen.c
index 4c93404607d..fe484abf73f 100644
--- a/src/mesa/drivers/dri/radeon/radeon_screen.c
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.c
@@ -80,6 +80,16 @@ DRI_CONF_OPT_BEGIN_B(hyperz, def) \
 DRI_CONF_DESC(en,"Use HyperZ to boost performance") \
 DRI_CONF_OPT_END
 
+#define DRI_CONF_TCL_MODE(def) \
+DRI_CONF_OPT_BEGIN_V(tcl_mode,enum,def,"0:3") \
+DRI_CONF_DESC_BEGIN(en,"TCL mode (Transformation, Clipping, 
Lighting)") \
+DRI_CONF_ENUM(0,"Use software TCL pipeline") \
+DRI_CONF_ENUM(1,"Use hardware TCL as first TCL pipeline 
stage") \
+DRI_CONF_ENUM(2,"Bypass the TCL pipeline") \
+DRI_CONF_ENUM(3,"Bypass the TCL pipeline with state-based 
machine code generated on-the-fly") \
+DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+
 #if defined(RADEON_R100)   /* R100 */
 static const __DRIconfigOptionsExtension radeon_config_options = {
.base = { __DRI_CONFIG_OPTIONS, 1 },
diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.h 
b/src/mesa/drivers/dri/radeon/radeon_screen.h
index efb2e6016b7..9d69dcd4785 100644
--- a/src/mesa/drivers/dri/radeon/radeon_screen.h
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.h
@@ -48,6 +48,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 
SOFTWARE.
 #include "util/xmlconfig.h"
 
 
+#define DRI_CONF_TCL_SW 0
+#define DRI_CONF_TCL_PIPELINED 1
+#define DRI_CONF_TCL_VTXFMT 2
+#define DRI_CONF_TCL_CODEGEN 3
+
 typedef struct {
drm_handle_t handle;/* Handle to the DRM region */
drmSize size;   /* Size of the DRM region */
diff --git a/src/util/xmlpool/ca.po b/src/util/xmlpool/ca.po
index 2c663d25027..15c5fa0f133 100644
--- a/src/util/xmlpool/ca.po
+++ b/src/util/xmlpool/ca.po
@@ -193,29 +193,6 @@ msgstr ""
 msgid "Performance"
 msgstr "Rendiment"
 
-#: t_options.h:238
-msgid "TCL mode (Transformation, Clipping, Lighting)"
-msgstr "Mode TCL (Transformació, Retall, Il·luminació)"
-
-#: t_options.h:239
-msgid "Use software TCL pipeline"
-msgstr "Utilitza la canonada TCL de programari"
-
-#: t_options.h:240
-msgid "Use hardware TCL as first TCL pipeline stage"
-msgstr "Utilitza el TCL de maquinari com a la primera fase de la canonada TCL"
-
-#: t_options.h:241
-msgid "Bypass the TCL pipeline"
-msgstr "Passa per alt la canonada TCL"
-
-#: t_options.h:242
-msgid ""
-"Bypass the TCL pipeline with state-based machine code generated on-the-fly"
-msgstr ""
-"Passa per alt la canonada TCL amb codi de màquina basat en estats, generat "
-"sobre la marxa"
-
 #: t_options.h:251
 msgid "Method to limit rendering latency"
 msgstr "Mètode per a limitar la latència de renderització"
diff --git a/src/util/xmlpool/de.po b/src/util/xmlpool/de.po
index 40095df3c40..683d33d4bc6 100644
--- a/src/util/xmlpool/de.po
+++ b/src/util/xmlpool/de.po
@@ -167,29 +167,6 @@ msgstr ""
 msgid "Performance"
 msgstr "Leistung"
 
-#: t_options.h:238
-msgid "TCL mode (Transformation, Clipping, Lighting)"
-msgstr "TCL-Modus (Transformation, Clipping, Licht)"
-
-#: t_options.h:239
-msgid "Use software TCL pipeline"
-msgstr "Benutze die Software-TCL-Pipeline"
-
-#: t_options.h:240
-msgid "Use hardware TCL as first TCL pipeline stage"
-msgstr "Benutze Hardware TCL als erste Stufe der TCL-Pipeline"
-
-#: t_options.h:241
-msgid "Bypass the TCL pipeline"
-msgstr "Umgehe die TCL-Pipeline"
-
-#: t_options.h:242
-msgid ""
-"Bypass the TCL pipeline with state-based machine code generated on-the-fly"
-msgstr ""
-"Umgehe die TCL-Pipeline mit zur Laufzeit erzeugtem, zustandsbasiertem "
-"Maschinencode"
-
 #: t_options.h:251
 msgid "Method to limit rendering latency"
 msgstr "Methode zur Begrenzung der Bildverzögerung"
diff --git a/src/util/xmlpool/es.po b/src/util/xmlpool/es.po
index b1cfdd6e146..8b841c682fe 100644
--- a/src/util/xmlpool/es.po
+++ b/src/util/xmlpool/es.po
@@ -174,29 +174,6 @@ msgstr ""
 msgid "Performance"
 msgstr "Rendimiento"
 
-#: t_options.h:238
-msgid "TCL mode (Transformation, Clipping, Lighting)"
-msgstr "Modo TCL (Transformación, Recorte, Iluminación)"
-
-#: t_options.h:239
-msgid "Use software TCL pipeline"
-msgstr "Usar tubería TCL por software"
-
-#: t_options.h:240
-msgid "Use hardware TCL as first TCL 

[Mesa-dev] [PATCH 6/9] mesa: move legacy dri config option no_neg_lod_bias

2018-08-29 Thread Timothy Arceri
---
 src/mesa/drivers/dri/radeon/radeon_screen.c | 5 +
 src/util/xmlpool/ca.po  | 6 --
 src/util/xmlpool/de.po  | 4 
 src/util/xmlpool/es.po  | 4 
 src/util/xmlpool/fr.po  | 4 
 src/util/xmlpool/nl.po  | 4 
 src/util/xmlpool/sv.po  | 4 
 src/util/xmlpool/t_options.h| 5 -
 8 files changed, 5 insertions(+), 31 deletions(-)

diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c 
b/src/mesa/drivers/dri/radeon/radeon_screen.c
index fe484abf73f..6475e2f22c0 100644
--- a/src/mesa/drivers/dri/radeon/radeon_screen.c
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.c
@@ -90,6 +90,11 @@ DRI_CONF_OPT_BEGIN_V(tcl_mode,enum,def,"0:3") \
 DRI_CONF_DESC_END \
 DRI_CONF_OPT_END
 
+#define DRI_CONF_NO_NEG_LOD_BIAS(def) \
+DRI_CONF_OPT_BEGIN_B(no_neg_lod_bias, def) \
+DRI_CONF_DESC(en,"Forbid negative texture LOD bias") \
+DRI_CONF_OPT_END
+
 #if defined(RADEON_R100)   /* R100 */
 static const __DRIconfigOptionsExtension radeon_config_options = {
.base = { __DRI_CONFIG_OPTIONS, 1 },
diff --git a/src/util/xmlpool/ca.po b/src/util/xmlpool/ca.po
index 1710a2ce199..7760da13ac2 100644
--- a/src/util/xmlpool/ca.po
+++ b/src/util/xmlpool/ca.po
@@ -106,12 +106,6 @@ msgstr "Força 16 bits per texel"
 msgid "Initial maximum value for anisotropic texture filtering"
 msgstr "Valor màxim inicial per a la filtració de textura anisòtropa"
 
-#: t_options.h:148
-msgid "Forbid negative texture LOD bias"
-msgstr ""
-"Prohibeix una parcialitat negativa del Nivell de Detalle (LOD) de les "
-"textures"
-
 #: t_options.h:195
 msgid "A post-processing filter to cel-shade the output"
 msgstr "Un filtre de postprocessament per a aplicar cel shading a la sortida"
diff --git a/src/util/xmlpool/de.po b/src/util/xmlpool/de.po
index 5581725a251..9e466f7620a 100644
--- a/src/util/xmlpool/de.po
+++ b/src/util/xmlpool/de.po
@@ -83,10 +83,6 @@ msgstr "Erzwinge 16 bits pro Texel"
 msgid "Initial maximum value for anisotropic texture filtering"
 msgstr "Initialer Maximalwert für anisotropische Texturfilterung"
 
-#: t_options.h:148
-msgid "Forbid negative texture LOD bias"
-msgstr "Verbiete negative Textur-Detailgradverschiebung"
-
 #: t_options.h:195
 msgid "A post-processing filter to cel-shade the output"
 msgstr "Nachbearbeitungsfilter für Cell Shading"
diff --git a/src/util/xmlpool/es.po b/src/util/xmlpool/es.po
index 85288bfbe8a..cd76fa39b4a 100644
--- a/src/util/xmlpool/es.po
+++ b/src/util/xmlpool/es.po
@@ -90,10 +90,6 @@ msgstr "Forzar a 16 bits por texel"
 msgid "Initial maximum value for anisotropic texture filtering"
 msgstr "Valor máximo inicial para filtrado anisotrópico de textura"
 
-#: t_options.h:148
-msgid "Forbid negative texture LOD bias"
-msgstr "Prohibir valores negativos de Nivel De Detalle (LOD) de texturas"
-
 #: t_options.h:195
 msgid "A post-processing filter to cel-shade the output"
 msgstr "Un filtro de postprocesamiento para aplicar cel shading a la salida"
diff --git a/src/util/xmlpool/fr.po b/src/util/xmlpool/fr.po
index 1f2d04cd396..d086b44a4b0 100644
--- a/src/util/xmlpool/fr.po
+++ b/src/util/xmlpool/fr.po
@@ -82,10 +82,6 @@ msgstr "Forcer 16 bits par texel"
 msgid "Initial maximum value for anisotropic texture filtering"
 msgstr "Valeur maximale initiale pour le filtrage anisotropique de texture"
 
-#: t_options.h:148
-msgid "Forbid negative texture LOD bias"
-msgstr "Interdire le LOD bias negatif"
-
 #: t_options.h:195
 msgid "A post-processing filter to cel-shade the output"
 msgstr ""
diff --git a/src/util/xmlpool/nl.po b/src/util/xmlpool/nl.po
index 21845ce879c..38473fdc2de 100644
--- a/src/util/xmlpool/nl.po
+++ b/src/util/xmlpool/nl.po
@@ -82,10 +82,6 @@ msgstr "Dwing 16 bits per texel af"
 msgid "Initial maximum value for anisotropic texture filtering"
 msgstr "Initïele maximum waarde voor anisotrophische textuur filtering"
 
-#: t_options.h:148
-msgid "Forbid negative texture LOD bias"
-msgstr "Verbied negatief niveau detailonderscheid (LOD) van texturen"
-
 #: t_options.h:182
 msgid "Horizontal error diffusion"
 msgstr "Horizontale foutdiffusie"
diff --git a/src/util/xmlpool/sv.po b/src/util/xmlpool/sv.po
index 61d735b86c1..8c3bca72e68 100644
--- a/src/util/xmlpool/sv.po
+++ b/src/util/xmlpool/sv.po
@@ -82,10 +82,6 @@ msgstr "Tvinga 16 bitar per texel"
 msgid "Initial maximum value for anisotropic texture filtering"
 msgstr "Initialt maximalt värde för anisotropisk texturfiltrering"
 
-#: t_options.h:148
-msgid "Forbid negative texture LOD bias"
-msgstr "Förbjud negativ LOD-kompensation för texturer"
-
 #: t_options.h:181
 msgid "Color dithering method"
 msgstr "Färgutjämningsmetod"
diff --git a/src/util/xmlpool/t_options.h b/src/util/xmlpool/t_options.h
index 06217e558d7..c680c16140a 100644
--- a/src/util/xmlpool/t_options.h
+++ b/src/util/xmlpool/t_options.h
@@ -166,11 +166,6 @@ 

[Mesa-dev] [PATCH 8/9] mesa: move legacy dri config option fthrottle_mode

2018-08-29 Thread Timothy Arceri
---
 src/mesa/drivers/dri/radeon/radeon_screen.h | 12 
 src/util/xmlpool/ca.po  | 17 -
 src/util/xmlpool/de.po  | 17 -
 src/util/xmlpool/es.po  | 17 -
 src/util/xmlpool/fr.po  | 16 
 src/util/xmlpool/nl.po  | 19 ---
 src/util/xmlpool/sv.po  | 16 
 src/util/xmlpool/t_options.h| 12 
 8 files changed, 12 insertions(+), 114 deletions(-)

diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.h 
b/src/mesa/drivers/dri/radeon/radeon_screen.h
index e36e438e295..30202aec8b0 100644
--- a/src/mesa/drivers/dri/radeon/radeon_screen.h
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.h
@@ -79,6 +79,18 @@ DRI_CONF_OPT_BEGIN_V(round_mode,enum,def,"0:1") \
 DRI_CONF_DESC_END \
 DRI_CONF_OPT_END
 
+#define DRI_CONF_FTHROTTLE_BUSY 0
+#define DRI_CONF_FTHROTTLE_USLEEPS 1
+#define DRI_CONF_FTHROTTLE_IRQS 2
+#define DRI_CONF_FTHROTTLE_MODE(def) \
+DRI_CONF_OPT_BEGIN_V(fthrottle_mode,enum,def,"0:2") \
+DRI_CONF_DESC_BEGIN(en,"Method to limit rendering latency") \
+DRI_CONF_ENUM(0,"Busy waiting for the graphics hardware") \
+DRI_CONF_ENUM(1,"Sleep for brief intervals while waiting for 
the graphics hardware") \
+DRI_CONF_ENUM(2,"Let the graphics hardware emit a software 
interrupt and sleep") \
+DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+
 
 #define DRI_CONF_TCL_SW 0
 #define DRI_CONF_TCL_PIPELINED 1
diff --git a/src/util/xmlpool/ca.po b/src/util/xmlpool/ca.po
index e8ff8af1bb4..ddb37b6dd8f 100644
--- a/src/util/xmlpool/ca.po
+++ b/src/util/xmlpool/ca.po
@@ -139,23 +139,6 @@ msgstr ""
 msgid "Performance"
 msgstr "Rendiment"
 
-#: t_options.h:251
-msgid "Method to limit rendering latency"
-msgstr "Mètode per a limitar la latència de renderització"
-
-#: t_options.h:252
-msgid "Busy waiting for the graphics hardware"
-msgstr "Espera activa pel maquinari de gràfics"
-
-#: t_options.h:253
-msgid "Sleep for brief intervals while waiting for the graphics hardware"
-msgstr "Dorm per intervals breus mentre s'espera al maquinari de gràfics"
-
-#: t_options.h:254
-msgid "Let the graphics hardware emit a software interrupt and sleep"
-msgstr ""
-"Deixa que el maquinari de gràfics emeti una interrupció de programari i dormi"
-
 #: t_options.h:264
 msgid "Synchronization with vertical refresh (swap intervals)"
 msgstr "Sincronització amb refresc vertical (intervals d'intercanvi)"
diff --git a/src/util/xmlpool/de.po b/src/util/xmlpool/de.po
index 8d5804f4433..7b5b9510cfc 100644
--- a/src/util/xmlpool/de.po
+++ b/src/util/xmlpool/de.po
@@ -115,23 +115,6 @@ msgstr ""
 msgid "Performance"
 msgstr "Leistung"
 
-#: t_options.h:251
-msgid "Method to limit rendering latency"
-msgstr "Methode zur Begrenzung der Bildverzögerung"
-
-#: t_options.h:252
-msgid "Busy waiting for the graphics hardware"
-msgstr "Aktives Warten auf die Grafikhardware"
-
-#: t_options.h:253
-msgid "Sleep for brief intervals while waiting for the graphics hardware"
-msgstr "Kurze Schlafintervalle beim Warten auf die Grafikhardware"
-
-#: t_options.h:254
-msgid "Let the graphics hardware emit a software interrupt and sleep"
-msgstr ""
-"Die Grafikhardware eine Softwareunterbrechnung erzeugen lassen und schlafen"
-
 #: t_options.h:264
 msgid "Synchronization with vertical refresh (swap intervals)"
 msgstr "Synchronisation mit der vertikalen Bildwiederholung"
diff --git a/src/util/xmlpool/es.po b/src/util/xmlpool/es.po
index c79191f6c8b..81106ae7139 100644
--- a/src/util/xmlpool/es.po
+++ b/src/util/xmlpool/es.po
@@ -122,23 +122,6 @@ msgstr ""
 msgid "Performance"
 msgstr "Rendimiento"
 
-#: t_options.h:251
-msgid "Method to limit rendering latency"
-msgstr "Método para limitar la latencia de renderización"
-
-#: t_options.h:252
-msgid "Busy waiting for the graphics hardware"
-msgstr "Esperar activamente al hardware gráfico"
-
-#: t_options.h:253
-msgid "Sleep for brief intervals while waiting for the graphics hardware"
-msgstr "Dormir en intervalos cortos mientras se espera al hardware gráfico"
-
-#: t_options.h:254
-msgid "Let the graphics hardware emit a software interrupt and sleep"
-msgstr ""
-"Permitir que el hardware gráfico emita una interrupción de software y duerma"
-
 #: t_options.h:264
 msgid "Synchronization with vertical refresh (swap intervals)"
 msgstr "Sincronización con el refresco vertical (intervalos de intercambio)"
diff --git a/src/util/xmlpool/fr.po b/src/util/xmlpool/fr.po
index c1856aa0fd3..f1a2ce0748e 100644
--- a/src/util/xmlpool/fr.po
+++ b/src/util/xmlpool/fr.po
@@ -110,22 +110,6 @@ msgstr ""
 msgid "Performance"
 msgstr "Performance"
 
-#: t_options.h:251
-msgid "Method to limit rendering latency"
-msgstr "Méthode d'attente de la carte graphique"
-
-#: t_options.h:252
-msgid "Busy waiting for the graphics hardware"
-msgstr "Attente 

[Mesa-dev] [PATCH 4/9] mesa: remove unused dri option float_depth

2018-08-29 Thread Timothy Arceri
This seems to have only been used by DRI1 drivers which were
removed with e4344161bde2.
---
 src/util/xmlpool/ca.po   | 4 
 src/util/xmlpool/de.po   | 4 
 src/util/xmlpool/es.po   | 4 
 src/util/xmlpool/fr.po   | 4 
 src/util/xmlpool/nl.po   | 4 
 src/util/xmlpool/sv.po   | 4 
 src/util/xmlpool/t_options.h | 5 -
 7 files changed, 29 deletions(-)

diff --git a/src/util/xmlpool/ca.po b/src/util/xmlpool/ca.po
index 5f78915b544..c588832b5d9 100644
--- a/src/util/xmlpool/ca.po
+++ b/src/util/xmlpool/ca.po
@@ -124,10 +124,6 @@ msgstr "Arrodoneix els components de color a baix"
 msgid "Round to nearest color"
 msgstr "Arrodoneix al color més proper"
 
-#: t_options.h:190
-msgid "Floating point depth buffer"
-msgstr "Buffer de profunditat de punt flotant"
-
 #: t_options.h:195
 msgid "A post-processing filter to cel-shade the output"
 msgstr "Un filtre de postprocessament per a aplicar cel shading a la sortida"
diff --git a/src/util/xmlpool/de.po b/src/util/xmlpool/de.po
index 2ef0607cbb2..f40a39fff59 100644
--- a/src/util/xmlpool/de.po
+++ b/src/util/xmlpool/de.po
@@ -99,10 +99,6 @@ msgstr "Farbkomponenten abrunden"
 msgid "Round to nearest color"
 msgstr "Zur ähnlichsten Farbe runden"
 
-#: t_options.h:190
-msgid "Floating point depth buffer"
-msgstr "Fließkomma z-Puffer"
-
 #: t_options.h:195
 msgid "A post-processing filter to cel-shade the output"
 msgstr "Nachbearbeitungsfilter für Cell Shading"
diff --git a/src/util/xmlpool/es.po b/src/util/xmlpool/es.po
index cfb6c0eb18d..e371834070b 100644
--- a/src/util/xmlpool/es.po
+++ b/src/util/xmlpool/es.po
@@ -106,10 +106,6 @@ msgstr "Redondear hacia abajo los componentes de color"
 msgid "Round to nearest color"
 msgstr "Redondear al color más cercano"
 
-#: t_options.h:190
-msgid "Floating point depth buffer"
-msgstr "Búfer de profundidad en coma flotante"
-
 #: t_options.h:195
 msgid "A post-processing filter to cel-shade the output"
 msgstr "Un filtro de postprocesamiento para aplicar cel shading a la salida"
diff --git a/src/util/xmlpool/fr.po b/src/util/xmlpool/fr.po
index 94a248069a0..b898e3423dd 100644
--- a/src/util/xmlpool/fr.po
+++ b/src/util/xmlpool/fr.po
@@ -98,10 +98,6 @@ msgstr "Arrondi à l'inférieur"
 msgid "Round to nearest color"
 msgstr "Arrondi au plus proche"
 
-#: t_options.h:190
-msgid "Floating point depth buffer"
-msgstr "Z-buffer en virgule flottante"
-
 #: t_options.h:195
 msgid "A post-processing filter to cel-shade the output"
 msgstr ""
diff --git a/src/util/xmlpool/nl.po b/src/util/xmlpool/nl.po
index 9beafb42224..21845ce879c 100644
--- a/src/util/xmlpool/nl.po
+++ b/src/util/xmlpool/nl.po
@@ -98,10 +98,6 @@ msgstr "Horizontale foutdiffusie, zet fout bij lijnbegin 
terug"
 msgid "Ordered 2D color dithering"
 msgstr "Geordende 2D kleurrasterisering"
 
-#: t_options.h:190
-msgid "Floating point depth buffer"
-msgstr "Dieptebuffer als commagetal"
-
 #: t_options.h:195
 msgid "A post-processing filter to cel-shade the output"
 msgstr ""
diff --git a/src/util/xmlpool/sv.po b/src/util/xmlpool/sv.po
index d952d5d8388..e340ed5df60 100644
--- a/src/util/xmlpool/sv.po
+++ b/src/util/xmlpool/sv.po
@@ -114,10 +114,6 @@ msgstr "Horisontell felspridning, återställ fel vid 
radbörjan"
 msgid "Ordered 2D color dithering"
 msgstr "Ordnad 2D-färgutjämning"
 
-#: t_options.h:190
-msgid "Floating point depth buffer"
-msgstr "Buffert för flytande punktdjup"
-
 #: t_options.h:195
 msgid "A post-processing filter to cel-shade the output"
 msgstr ""
diff --git a/src/util/xmlpool/t_options.h b/src/util/xmlpool/t_options.h
index 0faa8883fbb..f43bfeaf40d 100644
--- a/src/util/xmlpool/t_options.h
+++ b/src/util/xmlpool/t_options.h
@@ -186,11 +186,6 @@ DRI_CONF_OPT_BEGIN_V(round_mode,enum,def,"0:1") \
 DRI_CONF_DESC_END \
 DRI_CONF_OPT_END
 
-#define DRI_CONF_FLOAT_DEPTH(def) \
-DRI_CONF_OPT_BEGIN_B(float_depth, def) \
-DRI_CONF_DESC(en,gettext("Floating point depth buffer")) \
-DRI_CONF_OPT_END
-
 #define DRI_CONF_PP_CELSHADE(def) \
 DRI_CONF_OPT_BEGIN_V(pp_celshade,enum,def,"0:1") \
 DRI_CONF_DESC(en,gettext("A post-processing filter to cel-shade the 
output")) \
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/9] mesa: move legacy dri config option dither_mode

2018-08-29 Thread Timothy Arceri
---
 src/mesa/drivers/dri/radeon/radeon_screen.h | 12 
 src/util/xmlpool/ca.po  | 16 
 src/util/xmlpool/de.po  | 16 
 src/util/xmlpool/es.po  | 16 
 src/util/xmlpool/fr.po  | 16 
 src/util/xmlpool/nl.po  | 16 
 src/util/xmlpool/t_options.h| 12 
 7 files changed, 12 insertions(+), 92 deletions(-)

diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.h 
b/src/mesa/drivers/dri/radeon/radeon_screen.h
index 35a20996f82..c5b8023d9d1 100644
--- a/src/mesa/drivers/dri/radeon/radeon_screen.h
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.h
@@ -57,6 +57,18 @@ DRI_CONF_OPT_BEGIN_V(color_reduction,enum,def,"0:1") \
 DRI_CONF_DESC_END \
 DRI_CONF_OPT_END
 
+#define DRI_CONF_DITHER_XERRORDIFF 0
+#define DRI_CONF_DITHER_XERRORDIFFRESET 1
+#define DRI_CONF_DITHER_ORDERED 2
+#define DRI_CONF_DITHER_MODE(def) \
+DRI_CONF_OPT_BEGIN_V(dither_mode,enum,def,"0:2") \
+   DRI_CONF_DESC_BEGIN(en,"Color dithering method") \
+DRI_CONF_ENUM(0,"Horizontal error diffusion") \
+DRI_CONF_ENUM(1,"Horizontal error diffusion, reset error at 
line start") \
+DRI_CONF_ENUM(2,"Ordered 2D color dithering") \
+DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+
 
 #define DRI_CONF_TCL_SW 0
 #define DRI_CONF_TCL_PIPELINED 1
diff --git a/src/util/xmlpool/ca.po b/src/util/xmlpool/ca.po
index cab901ae194..5f78915b544 100644
--- a/src/util/xmlpool/ca.po
+++ b/src/util/xmlpool/ca.po
@@ -124,22 +124,6 @@ msgstr "Arrodoneix els components de color a baix"
 msgid "Round to nearest color"
 msgstr "Arrodoneix al color més proper"
 
-#: t_options.h:181
-msgid "Color dithering method"
-msgstr "Mètode de tramat de color"
-
-#: t_options.h:182
-msgid "Horizontal error diffusion"
-msgstr "Difusió d'error horitzontal"
-
-#: t_options.h:183
-msgid "Horizontal error diffusion, reset error at line start"
-msgstr "Difusió d'error horitzontal, reinicia l'error a l'inici de la línia"
-
-#: t_options.h:184
-msgid "Ordered 2D color dithering"
-msgstr "Tramat de color 2D ordenat"
-
 #: t_options.h:190
 msgid "Floating point depth buffer"
 msgstr "Buffer de profunditat de punt flotant"
diff --git a/src/util/xmlpool/de.po b/src/util/xmlpool/de.po
index 421078441db..2ef0607cbb2 100644
--- a/src/util/xmlpool/de.po
+++ b/src/util/xmlpool/de.po
@@ -99,22 +99,6 @@ msgstr "Farbkomponenten abrunden"
 msgid "Round to nearest color"
 msgstr "Zur ähnlichsten Farbe runden"
 
-#: t_options.h:181
-msgid "Color dithering method"
-msgstr "Farbrasterungsmethode"
-
-#: t_options.h:182
-msgid "Horizontal error diffusion"
-msgstr "Horizontale Fehlerstreuung"
-
-#: t_options.h:183
-msgid "Horizontal error diffusion, reset error at line start"
-msgstr "Horizontale Fehlerstreuung, Fehler am Zeilenanfang zurücksetzen"
-
-#: t_options.h:184
-msgid "Ordered 2D color dithering"
-msgstr "Geordnete 2D Farbrasterung"
-
 #: t_options.h:190
 msgid "Floating point depth buffer"
 msgstr "Fließkomma z-Puffer"
diff --git a/src/util/xmlpool/es.po b/src/util/xmlpool/es.po
index fe9bce1bbba..cfb6c0eb18d 100644
--- a/src/util/xmlpool/es.po
+++ b/src/util/xmlpool/es.po
@@ -106,22 +106,6 @@ msgstr "Redondear hacia abajo los componentes de color"
 msgid "Round to nearest color"
 msgstr "Redondear al color más cercano"
 
-#: t_options.h:181
-msgid "Color dithering method"
-msgstr "Método de suavizado de color"
-
-#: t_options.h:182
-msgid "Horizontal error diffusion"
-msgstr "Difusión de error horizontal"
-
-#: t_options.h:183
-msgid "Horizontal error diffusion, reset error at line start"
-msgstr "Difusión de error horizontal, reiniciar error al comienzo de línea"
-
-#: t_options.h:184
-msgid "Ordered 2D color dithering"
-msgstr "Suavizado de color 2D ordenado"
-
 #: t_options.h:190
 msgid "Floating point depth buffer"
 msgstr "Búfer de profundidad en coma flotante"
diff --git a/src/util/xmlpool/fr.po b/src/util/xmlpool/fr.po
index 609c6041e34..94a248069a0 100644
--- a/src/util/xmlpool/fr.po
+++ b/src/util/xmlpool/fr.po
@@ -98,22 +98,6 @@ msgstr "Arrondi à l'inférieur"
 msgid "Round to nearest color"
 msgstr "Arrondi au plus proche"
 
-#: t_options.h:181
-msgid "Color dithering method"
-msgstr "Méthode de tramage"
-
-#: t_options.h:182
-msgid "Horizontal error diffusion"
-msgstr "Diffusion d'erreur horizontale"
-
-#: t_options.h:183
-msgid "Horizontal error diffusion, reset error at line start"
-msgstr "Diffusion d'erreur horizontale, réinitialisé pour chaque ligne"
-
-#: t_options.h:184
-msgid "Ordered 2D color dithering"
-msgstr "Tramage ordonné des couleurs"
-
 #: t_options.h:190
 msgid "Floating point depth buffer"
 msgstr "Z-buffer en virgule flottante"
diff --git a/src/util/xmlpool/nl.po b/src/util/xmlpool/nl.po
index 65071e57451..9beafb42224 100644
--- a/src/util/xmlpool/nl.po
+++ b/src/util/xmlpool/nl.po
@@ -86,22 +86,6 @@ msgstr 

[Mesa-dev] [PATCH 7/9] mesa: move legacy dri config option def_max_anisotropy

2018-08-29 Thread Timothy Arceri
---
 src/mesa/drivers/dri/radeon/radeon_screen.c | 5 +
 src/util/xmlpool/ca.po  | 4 
 src/util/xmlpool/de.po  | 4 
 src/util/xmlpool/es.po  | 4 
 src/util/xmlpool/fr.po  | 4 
 src/util/xmlpool/nl.po  | 4 
 src/util/xmlpool/sv.po  | 4 
 src/util/xmlpool/t_options.h| 5 -
 8 files changed, 5 insertions(+), 29 deletions(-)

diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c 
b/src/mesa/drivers/dri/radeon/radeon_screen.c
index 6475e2f22c0..6345f2ce661 100644
--- a/src/mesa/drivers/dri/radeon/radeon_screen.c
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.c
@@ -95,6 +95,11 @@ DRI_CONF_OPT_BEGIN_B(no_neg_lod_bias, def) \
 DRI_CONF_DESC(en,"Forbid negative texture LOD bias") \
 DRI_CONF_OPT_END
 
+#define DRI_CONF_DEF_MAX_ANISOTROPY(def,range) \
+DRI_CONF_OPT_BEGIN_V(def_max_anisotropy,float,def,range) \
+DRI_CONF_DESC(en,"Initial maximum value for anisotropic texture 
filtering") \
+DRI_CONF_OPT_END
+
 #if defined(RADEON_R100)   /* R100 */
 static const __DRIconfigOptionsExtension radeon_config_options = {
.base = { __DRI_CONFIG_OPTIONS, 1 },
diff --git a/src/util/xmlpool/ca.po b/src/util/xmlpool/ca.po
index 7760da13ac2..e8ff8af1bb4 100644
--- a/src/util/xmlpool/ca.po
+++ b/src/util/xmlpool/ca.po
@@ -102,10 +102,6 @@ msgstr "Prefereix 16 bits per texel"
 msgid "Force 16 bits per texel"
 msgstr "Força 16 bits per texel"
 
-#: t_options.h:143
-msgid "Initial maximum value for anisotropic texture filtering"
-msgstr "Valor màxim inicial per a la filtració de textura anisòtropa"
-
 #: t_options.h:195
 msgid "A post-processing filter to cel-shade the output"
 msgstr "Un filtre de postprocessament per a aplicar cel shading a la sortida"
diff --git a/src/util/xmlpool/de.po b/src/util/xmlpool/de.po
index 9e466f7620a..8d5804f4433 100644
--- a/src/util/xmlpool/de.po
+++ b/src/util/xmlpool/de.po
@@ -79,10 +79,6 @@ msgstr "Bevorzuge 16 bits pro Texel"
 msgid "Force 16 bits per texel"
 msgstr "Erzwinge 16 bits pro Texel"
 
-#: t_options.h:143
-msgid "Initial maximum value for anisotropic texture filtering"
-msgstr "Initialer Maximalwert für anisotropische Texturfilterung"
-
 #: t_options.h:195
 msgid "A post-processing filter to cel-shade the output"
 msgstr "Nachbearbeitungsfilter für Cell Shading"
diff --git a/src/util/xmlpool/es.po b/src/util/xmlpool/es.po
index cd76fa39b4a..c79191f6c8b 100644
--- a/src/util/xmlpool/es.po
+++ b/src/util/xmlpool/es.po
@@ -86,10 +86,6 @@ msgstr "Preferir 16 bits por texel"
 msgid "Force 16 bits per texel"
 msgstr "Forzar a 16 bits por texel"
 
-#: t_options.h:143
-msgid "Initial maximum value for anisotropic texture filtering"
-msgstr "Valor máximo inicial para filtrado anisotrópico de textura"
-
 #: t_options.h:195
 msgid "A post-processing filter to cel-shade the output"
 msgstr "Un filtro de postprocesamiento para aplicar cel shading a la salida"
diff --git a/src/util/xmlpool/fr.po b/src/util/xmlpool/fr.po
index d086b44a4b0..c1856aa0fd3 100644
--- a/src/util/xmlpool/fr.po
+++ b/src/util/xmlpool/fr.po
@@ -78,10 +78,6 @@ msgstr "Prérérer 16 bits par texel"
 msgid "Force 16 bits per texel"
 msgstr "Forcer 16 bits par texel"
 
-#: t_options.h:143
-msgid "Initial maximum value for anisotropic texture filtering"
-msgstr "Valeur maximale initiale pour le filtrage anisotropique de texture"
-
 #: t_options.h:195
 msgid "A post-processing filter to cel-shade the output"
 msgstr ""
diff --git a/src/util/xmlpool/nl.po b/src/util/xmlpool/nl.po
index 38473fdc2de..50fb346 100644
--- a/src/util/xmlpool/nl.po
+++ b/src/util/xmlpool/nl.po
@@ -78,10 +78,6 @@ msgstr "Prefereer 16 bits per texel"
 msgid "Force 16 bits per texel"
 msgstr "Dwing 16 bits per texel af"
 
-#: t_options.h:143
-msgid "Initial maximum value for anisotropic texture filtering"
-msgstr "Initïele maximum waarde voor anisotrophische textuur filtering"
-
 #: t_options.h:182
 msgid "Horizontal error diffusion"
 msgstr "Horizontale foutdiffusie"
diff --git a/src/util/xmlpool/sv.po b/src/util/xmlpool/sv.po
index 8c3bca72e68..3e11072ea89 100644
--- a/src/util/xmlpool/sv.po
+++ b/src/util/xmlpool/sv.po
@@ -78,10 +78,6 @@ msgstr "Föredra 16 bitar per texel"
 msgid "Force 16 bits per texel"
 msgstr "Tvinga 16 bitar per texel"
 
-#: t_options.h:143
-msgid "Initial maximum value for anisotropic texture filtering"
-msgstr "Initialt maximalt värde för anisotropisk texturfiltrering"
-
 #: t_options.h:181
 msgid "Color dithering method"
 msgstr "Färgutjämningsmetod"
diff --git a/src/util/xmlpool/t_options.h b/src/util/xmlpool/t_options.h
index c680c16140a..2974d471fbd 100644
--- a/src/util/xmlpool/t_options.h
+++ b/src/util/xmlpool/t_options.h
@@ -161,11 +161,6 @@ DRI_CONF_OPT_BEGIN_V(texture_depth,enum,def,"0:3") \
 DRI_CONF_DESC_END \
 DRI_CONF_OPT_END
 
-#define DRI_CONF_DEF_MAX_ANISOTROPY(def,range) \

[Mesa-dev] [PATCH 2/9] mesa: move legacy dri config option color_reduction

2018-08-29 Thread Timothy Arceri
---
 src/mesa/drivers/dri/radeon/radeon_screen.h | 10 ++
 src/util/xmlpool/ca.po  | 12 
 src/util/xmlpool/de.po  | 12 
 src/util/xmlpool/es.po  | 12 
 src/util/xmlpool/fr.po  | 12 
 src/util/xmlpool/nl.po  | 12 
 src/util/xmlpool/sv.po  | 12 
 src/util/xmlpool/t_options.h| 10 --
 8 files changed, 10 insertions(+), 82 deletions(-)

diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.h 
b/src/mesa/drivers/dri/radeon/radeon_screen.h
index 9d69dcd4785..35a20996f82 100644
--- a/src/mesa/drivers/dri/radeon/radeon_screen.h
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.h
@@ -47,6 +47,16 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 
SOFTWARE.
 #include "radeon_reg.h"
 #include "util/xmlconfig.h"
 
+#define DRI_CONF_COLOR_REDUCTION_ROUND 0
+#define DRI_CONF_COLOR_REDUCTION_DITHER 1
+#define DRI_CONF_COLOR_REDUCTION(def) \
+DRI_CONF_OPT_BEGIN_V(color_reduction,enum,def,"0:1") \
+DRI_CONF_DESC_BEGIN(en,"Initial color reduction method") \
+DRI_CONF_ENUM(0,"Round colors") \
+DRI_CONF_ENUM(1,"Dither colors") \
+DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+
 
 #define DRI_CONF_TCL_SW 0
 #define DRI_CONF_TCL_PIPELINED 1
diff --git a/src/util/xmlpool/ca.po b/src/util/xmlpool/ca.po
index 15c5fa0f133..cab901ae194 100644
--- a/src/util/xmlpool/ca.po
+++ b/src/util/xmlpool/ca.po
@@ -112,18 +112,6 @@ msgstr ""
 "Prohibeix una parcialitat negativa del Nivell de Detalle (LOD) de les "
 "textures"
 
-#: t_options.h:160
-msgid "Initial color reduction method"
-msgstr "Mètode inicial de reducció de color"
-
-#: t_options.h:161
-msgid "Round colors"
-msgstr "Colors arrodonits"
-
-#: t_options.h:162
-msgid "Dither colors"
-msgstr "Colors tramats"
-
 #: t_options.h:170
 msgid "Color rounding method"
 msgstr "Mètode d'arrodoniment de color"
diff --git a/src/util/xmlpool/de.po b/src/util/xmlpool/de.po
index 683d33d4bc6..421078441db 100644
--- a/src/util/xmlpool/de.po
+++ b/src/util/xmlpool/de.po
@@ -87,18 +87,6 @@ msgstr "Initialer Maximalwert für anisotropische 
Texturfilterung"
 msgid "Forbid negative texture LOD bias"
 msgstr "Verbiete negative Textur-Detailgradverschiebung"
 
-#: t_options.h:160
-msgid "Initial color reduction method"
-msgstr "Initiale Farbreduktionsmethode"
-
-#: t_options.h:161
-msgid "Round colors"
-msgstr "Farben runden"
-
-#: t_options.h:162
-msgid "Dither colors"
-msgstr "Farben rastern"
-
 #: t_options.h:170
 msgid "Color rounding method"
 msgstr "Farbrundungsmethode"
diff --git a/src/util/xmlpool/es.po b/src/util/xmlpool/es.po
index 8b841c682fe..fe9bce1bbba 100644
--- a/src/util/xmlpool/es.po
+++ b/src/util/xmlpool/es.po
@@ -94,18 +94,6 @@ msgstr "Valor máximo inicial para filtrado anisotrópico de 
textura"
 msgid "Forbid negative texture LOD bias"
 msgstr "Prohibir valores negativos de Nivel De Detalle (LOD) de texturas"
 
-#: t_options.h:160
-msgid "Initial color reduction method"
-msgstr "Método inicial de reducción de color"
-
-#: t_options.h:161
-msgid "Round colors"
-msgstr "Colores redondeados"
-
-#: t_options.h:162
-msgid "Dither colors"
-msgstr "Colores suavizados"
-
 #: t_options.h:170
 msgid "Color rounding method"
 msgstr "Método de redondeo de colores"
diff --git a/src/util/xmlpool/fr.po b/src/util/xmlpool/fr.po
index d8bab1a2139..609c6041e34 100644
--- a/src/util/xmlpool/fr.po
+++ b/src/util/xmlpool/fr.po
@@ -86,18 +86,6 @@ msgstr "Valeur maximale initiale pour le filtrage 
anisotropique de texture"
 msgid "Forbid negative texture LOD bias"
 msgstr "Interdire le LOD bias negatif"
 
-#: t_options.h:160
-msgid "Initial color reduction method"
-msgstr "Technique de réduction de couleurs"
-
-#: t_options.h:161
-msgid "Round colors"
-msgstr "Arrondir les valeurs de couleur"
-
-#: t_options.h:162
-msgid "Dither colors"
-msgstr "Tramer les couleurs"
-
 #: t_options.h:170
 msgid "Color rounding method"
 msgstr "Méthode d'arrondi des couleurs"
diff --git a/src/util/xmlpool/nl.po b/src/util/xmlpool/nl.po
index e6fb5d1f9d7..65071e57451 100644
--- a/src/util/xmlpool/nl.po
+++ b/src/util/xmlpool/nl.po
@@ -86,18 +86,6 @@ msgstr "Initïele maximum waarde voor anisotrophische textuur 
filtering"
 msgid "Forbid negative texture LOD bias"
 msgstr "Verbied negatief niveau detailonderscheid (LOD) van texturen"
 
-#: t_options.h:160
-msgid "Initial color reduction method"
-msgstr "Initïele kleurreductie methode"
-
-#: t_options.h:161
-msgid "Round colors"
-msgstr "Rond kleuren af"
-
-#: t_options.h:162
-msgid "Dither colors"
-msgstr "Rasteriseer kleuren"
-
 #: t_options.h:170
 msgid "Color rounding method"
 msgstr "Kleurafrondingmethode"
diff --git a/src/util/xmlpool/sv.po b/src/util/xmlpool/sv.po
index 89072e66f22..d952d5d8388 100644
--- a/src/util/xmlpool/sv.po
+++ b/src/util/xmlpool/sv.po
@@ -86,18 +86,6 @@ msgstr "Initialt maximalt 

[Mesa-dev] [PATCH 5/9] mesa: move legacy dri config option round_mode

2018-08-29 Thread Timothy Arceri
---
 src/mesa/drivers/dri/radeon/radeon_screen.h | 10 ++
 src/util/xmlpool/ca.po  | 12 
 src/util/xmlpool/de.po  | 12 
 src/util/xmlpool/es.po  | 12 
 src/util/xmlpool/fr.po  | 12 
 src/util/xmlpool/sv.po  | 12 
 src/util/xmlpool/t_options.h| 10 --
 7 files changed, 10 insertions(+), 70 deletions(-)

diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.h 
b/src/mesa/drivers/dri/radeon/radeon_screen.h
index c5b8023d9d1..e36e438e295 100644
--- a/src/mesa/drivers/dri/radeon/radeon_screen.h
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.h
@@ -69,6 +69,16 @@ DRI_CONF_OPT_BEGIN_V(dither_mode,enum,def,"0:2") \
 DRI_CONF_DESC_END \
 DRI_CONF_OPT_END
 
+#define DRI_CONF_ROUND_TRUNC 0
+#define DRI_CONF_ROUND_ROUND 1
+#define DRI_CONF_ROUND_MODE(def) \
+DRI_CONF_OPT_BEGIN_V(round_mode,enum,def,"0:1") \
+   DRI_CONF_DESC_BEGIN(en,"Color rounding method") \
+DRI_CONF_ENUM(0,"Round color components downward") \
+DRI_CONF_ENUM(1,"Round to nearest color") \
+DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+
 
 #define DRI_CONF_TCL_SW 0
 #define DRI_CONF_TCL_PIPELINED 1
diff --git a/src/util/xmlpool/ca.po b/src/util/xmlpool/ca.po
index c588832b5d9..1710a2ce199 100644
--- a/src/util/xmlpool/ca.po
+++ b/src/util/xmlpool/ca.po
@@ -112,18 +112,6 @@ msgstr ""
 "Prohibeix una parcialitat negativa del Nivell de Detalle (LOD) de les "
 "textures"
 
-#: t_options.h:170
-msgid "Color rounding method"
-msgstr "Mètode d'arrodoniment de color"
-
-#: t_options.h:171
-msgid "Round color components downward"
-msgstr "Arrodoneix els components de color a baix"
-
-#: t_options.h:172
-msgid "Round to nearest color"
-msgstr "Arrodoneix al color més proper"
-
 #: t_options.h:195
 msgid "A post-processing filter to cel-shade the output"
 msgstr "Un filtre de postprocessament per a aplicar cel shading a la sortida"
diff --git a/src/util/xmlpool/de.po b/src/util/xmlpool/de.po
index f40a39fff59..5581725a251 100644
--- a/src/util/xmlpool/de.po
+++ b/src/util/xmlpool/de.po
@@ -87,18 +87,6 @@ msgstr "Initialer Maximalwert für anisotropische 
Texturfilterung"
 msgid "Forbid negative texture LOD bias"
 msgstr "Verbiete negative Textur-Detailgradverschiebung"
 
-#: t_options.h:170
-msgid "Color rounding method"
-msgstr "Farbrundungsmethode"
-
-#: t_options.h:171
-msgid "Round color components downward"
-msgstr "Farbkomponenten abrunden"
-
-#: t_options.h:172
-msgid "Round to nearest color"
-msgstr "Zur ähnlichsten Farbe runden"
-
 #: t_options.h:195
 msgid "A post-processing filter to cel-shade the output"
 msgstr "Nachbearbeitungsfilter für Cell Shading"
diff --git a/src/util/xmlpool/es.po b/src/util/xmlpool/es.po
index e371834070b..85288bfbe8a 100644
--- a/src/util/xmlpool/es.po
+++ b/src/util/xmlpool/es.po
@@ -94,18 +94,6 @@ msgstr "Valor máximo inicial para filtrado anisotrópico de 
textura"
 msgid "Forbid negative texture LOD bias"
 msgstr "Prohibir valores negativos de Nivel De Detalle (LOD) de texturas"
 
-#: t_options.h:170
-msgid "Color rounding method"
-msgstr "Método de redondeo de colores"
-
-#: t_options.h:171
-msgid "Round color components downward"
-msgstr "Redondear hacia abajo los componentes de color"
-
-#: t_options.h:172
-msgid "Round to nearest color"
-msgstr "Redondear al color más cercano"
-
 #: t_options.h:195
 msgid "A post-processing filter to cel-shade the output"
 msgstr "Un filtro de postprocesamiento para aplicar cel shading a la salida"
diff --git a/src/util/xmlpool/fr.po b/src/util/xmlpool/fr.po
index b898e3423dd..1f2d04cd396 100644
--- a/src/util/xmlpool/fr.po
+++ b/src/util/xmlpool/fr.po
@@ -86,18 +86,6 @@ msgstr "Valeur maximale initiale pour le filtrage 
anisotropique de texture"
 msgid "Forbid negative texture LOD bias"
 msgstr "Interdire le LOD bias negatif"
 
-#: t_options.h:170
-msgid "Color rounding method"
-msgstr "Méthode d'arrondi des couleurs"
-
-#: t_options.h:171
-msgid "Round color components downward"
-msgstr "Arrondi à l'inférieur"
-
-#: t_options.h:172
-msgid "Round to nearest color"
-msgstr "Arrondi au plus proche"
-
 #: t_options.h:195
 msgid "A post-processing filter to cel-shade the output"
 msgstr ""
diff --git a/src/util/xmlpool/sv.po b/src/util/xmlpool/sv.po
index e340ed5df60..61d735b86c1 100644
--- a/src/util/xmlpool/sv.po
+++ b/src/util/xmlpool/sv.po
@@ -86,18 +86,6 @@ msgstr "Initialt maximalt värde för anisotropisk 
texturfiltrering"
 msgid "Forbid negative texture LOD bias"
 msgstr "Förbjud negativ LOD-kompensation för texturer"
 
-#: t_options.h:170
-msgid "Color rounding method"
-msgstr "Färgavrundningsmetod"
-
-#: t_options.h:171
-msgid "Round color components downward"
-msgstr "Avrunda färdkomponenter nedåt"
-
-#: t_options.h:172
-msgid "Round to nearest color"
-msgstr "Avrunda till närmsta färg"
-
 #: t_options.h:181
 msgid "Color dithering method"
 

[Mesa-dev] [PATCH 9/9] mesa: move legacy dri config option texture_depth

2018-08-29 Thread Timothy Arceri
---
 src/mesa/drivers/dri/radeon/radeon_screen.h | 13 +
 src/util/xmlpool/ca.po  | 20 
 src/util/xmlpool/de.po  | 20 
 src/util/xmlpool/es.po  | 20 
 src/util/xmlpool/fr.po  | 20 
 src/util/xmlpool/nl.po  | 20 
 src/util/xmlpool/sv.po  | 20 
 src/util/xmlpool/t_options.h| 14 --
 8 files changed, 13 insertions(+), 134 deletions(-)

diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.h 
b/src/mesa/drivers/dri/radeon/radeon_screen.h
index 30202aec8b0..af1b9454e10 100644
--- a/src/mesa/drivers/dri/radeon/radeon_screen.h
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.h
@@ -91,6 +91,19 @@ DRI_CONF_OPT_BEGIN_V(fthrottle_mode,enum,def,"0:2") \
 DRI_CONF_DESC_END \
 DRI_CONF_OPT_END
 
+#define DRI_CONF_TEXTURE_DEPTH_FB   0
+#define DRI_CONF_TEXTURE_DEPTH_32   1
+#define DRI_CONF_TEXTURE_DEPTH_16   2
+#define DRI_CONF_TEXTURE_DEPTH_FORCE_16 3
+#define DRI_CONF_TEXTURE_DEPTH(def) \
+DRI_CONF_OPT_BEGIN_V(texture_depth,enum,def,"0:3") \
+   DRI_CONF_DESC_BEGIN(en,"Texture color depth") \
+DRI_CONF_ENUM(0,"Prefer frame buffer color depth") \
+DRI_CONF_ENUM(1,"Prefer 32 bits per texel") \
+DRI_CONF_ENUM(2,"Prefer 16 bits per texel") \
+DRI_CONF_ENUM(3,"Force 16 bits per texel") \
+DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
 
 #define DRI_CONF_TCL_SW 0
 #define DRI_CONF_TCL_PIPELINED 1
diff --git a/src/util/xmlpool/ca.po b/src/util/xmlpool/ca.po
index ddb37b6dd8f..91621f2831d 100644
--- a/src/util/xmlpool/ca.po
+++ b/src/util/xmlpool/ca.po
@@ -82,26 +82,6 @@ msgstr "Permet les directives #extension GLSL en el mitjà 
dels shaders"
 msgid "Image Quality"
 msgstr "Qualitat d'imatge"
 
-#: t_options.h:133
-msgid "Texture color depth"
-msgstr "Profunditat de color de textura"
-
-#: t_options.h:134
-msgid "Prefer frame buffer color depth"
-msgstr "Prefereix profunditat de color del framebuffer"
-
-#: t_options.h:135
-msgid "Prefer 32 bits per texel"
-msgstr "Prefereix 32 bits per texel"
-
-#: t_options.h:136
-msgid "Prefer 16 bits per texel"
-msgstr "Prefereix 16 bits per texel"
-
-#: t_options.h:137
-msgid "Force 16 bits per texel"
-msgstr "Força 16 bits per texel"
-
 #: t_options.h:195
 msgid "A post-processing filter to cel-shade the output"
 msgstr "Un filtre de postprocessament per a aplicar cel shading a la sortida"
diff --git a/src/util/xmlpool/de.po b/src/util/xmlpool/de.po
index 7b5b9510cfc..8c1d3df3f11 100644
--- a/src/util/xmlpool/de.po
+++ b/src/util/xmlpool/de.po
@@ -59,26 +59,6 @@ msgstr ""
 msgid "Image Quality"
 msgstr "Bildqualität"
 
-#: t_options.h:133
-msgid "Texture color depth"
-msgstr "Texturfarbtiefe"
-
-#: t_options.h:134
-msgid "Prefer frame buffer color depth"
-msgstr "Bevorzuge Farbtiefe des Framebuffers"
-
-#: t_options.h:135
-msgid "Prefer 32 bits per texel"
-msgstr "Bevorzuge 32 bits pro Texel"
-
-#: t_options.h:136
-msgid "Prefer 16 bits per texel"
-msgstr "Bevorzuge 16 bits pro Texel"
-
-#: t_options.h:137
-msgid "Force 16 bits per texel"
-msgstr "Erzwinge 16 bits pro Texel"
-
 #: t_options.h:195
 msgid "A post-processing filter to cel-shade the output"
 msgstr "Nachbearbeitungsfilter für Cell Shading"
diff --git a/src/util/xmlpool/es.po b/src/util/xmlpool/es.po
index 81106ae7139..e5f44c8efc5 100644
--- a/src/util/xmlpool/es.po
+++ b/src/util/xmlpool/es.po
@@ -66,26 +66,6 @@ msgstr "Permite directivas #extension GLSL en medio de los 
shaders"
 msgid "Image Quality"
 msgstr "Calidad de imagen"
 
-#: t_options.h:133
-msgid "Texture color depth"
-msgstr "Profundidad de color de textura"
-
-#: t_options.h:134
-msgid "Prefer frame buffer color depth"
-msgstr "Preferir profundidad de color del framebuffer"
-
-#: t_options.h:135
-msgid "Prefer 32 bits per texel"
-msgstr "Preferir 32 bits por texel"
-
-#: t_options.h:136
-msgid "Prefer 16 bits per texel"
-msgstr "Preferir 16 bits por texel"
-
-#: t_options.h:137
-msgid "Force 16 bits per texel"
-msgstr "Forzar a 16 bits por texel"
-
 #: t_options.h:195
 msgid "A post-processing filter to cel-shade the output"
 msgstr "Un filtro de postprocesamiento para aplicar cel shading a la salida"
diff --git a/src/util/xmlpool/fr.po b/src/util/xmlpool/fr.po
index f1a2ce0748e..aa8706cbb1c 100644
--- a/src/util/xmlpool/fr.po
+++ b/src/util/xmlpool/fr.po
@@ -58,26 +58,6 @@ msgstr ""
 msgid "Image Quality"
 msgstr "Qualité d'image"
 
-#: t_options.h:133
-msgid "Texture color depth"
-msgstr "Profondeur de texture"
-
-#: t_options.h:134
-msgid "Prefer frame buffer color depth"
-msgstr "Profondeur de couleur"
-
-#: t_options.h:135
-msgid "Prefer 32 bits per texel"
-msgstr "Préférer 32 bits par texel"
-
-#: t_options.h:136
-msgid "Prefer 16 bits per texel"
-msgstr "Prérérer 16 bits par texel"
-
-#: 

[Mesa-dev] [PATCH v3] nir: propagates if condition evaluation down some alu chains

2018-08-29 Thread Timothy Arceri
v2:
 - only allow nir_op_inot or nir_op_b2i when alu input is 1.
 - use some helpers as suggested by Jason.

v3:
 - evaluate alu op for single input alu ops
 - add helper function to decide if to propagate through alu
 - make use of nir_before_src in another spot

shader-db IVB results:

total instructions in shared programs: 9993483 -> 9993472 (-0.00%)
instructions in affected programs: 1300 -> 1289 (-0.85%)
helped: 11
HURT: 0

total cycles in shared programs: 219476091 -> 219476059 (-0.00%)
cycles in affected programs: 7675 -> 7643 (-0.42%)
helped: 10
HURT: 1
---
 src/compiler/nir/nir_opt_if.c | 145 --
 1 file changed, 139 insertions(+), 6 deletions(-)

diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c
index 11c6693d302..9e9d8edda21 100644
--- a/src/compiler/nir/nir_opt_if.c
+++ b/src/compiler/nir/nir_opt_if.c
@@ -23,6 +23,7 @@
 
 #include "nir.h"
 #include "nir/nir_builder.h"
+#include "nir_constant_expressions.h"
 #include "nir_control_flow.h"
 #include "nir_loop_analyze.h"
 
@@ -403,9 +404,127 @@ evaluate_if_condition(nir_if *nif, nir_cursor cursor, 
uint32_t *value)
}
 }
 
+/*
+ * This propagates if condition evaluation down the chain of some alu
+ * instructions. For example by checking the use of some of the following alu
+ * instruction we can eventually replace ssa_107 with NIR_TRUE.
+ *
+ *   loop {
+ *  block block_1:
+ *  vec1 32 ssa_85 = load_const (0x0002)
+ *  vec1 32 ssa_86 = ieq ssa_48, ssa_85
+ *  vec1 32 ssa_87 = load_const (0x0001)
+ *  vec1 32 ssa_88 = ieq ssa_48, ssa_87
+ *  vec1 32 ssa_89 = ior ssa_86, ssa_88
+ *  vec1 32 ssa_90 = ieq ssa_48, ssa_0
+ *  vec1 32 ssa_91 = ior ssa_89, ssa_90
+ *  if ssa_86 {
+ * block block_2:
+ * ...
+ *break
+ *  } else {
+ *block block_3:
+ *  }
+ *  block block_4:
+ *  if ssa_88 {
+ *block block_5:
+ * ...
+ *break
+ *  } else {
+ *block block_6:
+ *  }
+ *  block block_7:
+ *  if ssa_90 {
+ *block block_8:
+ * ...
+ *break
+ *  } else {
+ *block block_9:
+ *  }
+ *  block block_10:
+ *  vec1 32 ssa_107 = inot ssa_91
+ *  if ssa_107 {
+ *block block_11:
+ *break
+ *  } else {
+ *block block_12:
+ *  }
+ *   }
+ */
 static bool
-evaluate_condition_use(nir_if *nif, nir_src *use_src, void *mem_ctx,
-   bool is_if_condition)
+propagate_condition_eval(nir_builder *b, nir_if *nif, nir_src *use_src,
+ nir_src *alu_use, nir_alu_instr *alu, void *mem_ctx,
+ bool is_if_condition)
+{
+   bool progress = false;
+
+   uint32_t bool_value;
+   b->cursor = nir_before_src(alu_use, is_if_condition);
+   if (nir_op_infos[alu->op].num_inputs == 1) {
+  assert(alu->op == nir_op_inot || alu->op == nir_op_b2i);
+
+  if (evaluate_if_condition(nif, b->cursor, _value)) {
+ nir_const_value bool_src;
+ bool_src.u32[0] = bool_value;
+
+ unsigned bit_size = nir_src_bit_size(alu->src[0].src);
+ nir_const_value result =
+nir_eval_const_opcode(alu->op, 1, bit_size, _src);
+
+ replace_if_condition_use_with_const(alu_use, mem_ctx, b->cursor,
+ result.u32[0], is_if_condition);
+ progress = true;
+  }
+   } else {
+  assert(alu->op == nir_op_ior || alu->op == nir_op_iand);
+
+  if (evaluate_if_condition(nif, b->cursor, _value)) {
+ nir_ssa_def *def[2];
+ for (unsigned i = 0; i < 2; i++) {
+if (alu->src[i].src.ssa == use_src->ssa) {
+   nir_const_value const_value;
+   const_value.u32[0] = bool_value;
+
+   def[i] = nir_build_imm(b, 1, 32, const_value);
+} else {
+   def[i] = alu->src[i].src.ssa;
+}
+ }
+
+ nir_ssa_def *nalu =
+nir_build_alu(b, alu->op, def[0], def[1], NULL, NULL);
+
+ /* Rewrite use to use new alu instruction */
+ nir_src new_src = nir_src_for_ssa(nalu);
+
+ if (is_if_condition)
+nir_if_rewrite_condition(alu_use->parent_if, new_src);
+ else
+nir_instr_rewrite_src(alu_use->parent_instr, alu_use, new_src);
+
+ progress = true;
+  }
+   }
+
+   return progress;
+}
+
+static bool
+can_propagate_through_alu(nir_src *src)
+{
+   if (src->parent_instr->type == nir_instr_type_alu &&
+   (nir_instr_as_alu(src->parent_instr)->op == nir_op_ior ||
+nir_instr_as_alu(src->parent_instr)->op == nir_op_iand ||
+nir_instr_as_alu(src->parent_instr)->op == nir_op_inot ||
+nir_instr_as_alu(src->parent_instr)->op == nir_op_b2i))
+  return true;
+
+   return false;
+}
+
+static bool
+evaluate_condition_use(nir_builder *b, nir_if *nif, nir_src 

Re: [Mesa-dev] [PATCH 2/2] i965/vec4: Clamp indirect tes input array reads with 0x0fffffff

2018-08-29 Thread Kenneth Graunke
On Wednesday, August 29, 2018 1:12:28 PM PDT Ian Romanick wrote:
> From: Ian Romanick 
> 
> Page 190 of "Volume 7: 3D Media GPGPU Engine (Haswell)" says the valid
> range of the offset is [0, 0FFFh].
> 
> Signed-off-by: Ian Romanick 
> Cc: mesa-sta...@lists.freedesktop.org
> Cc: Kenneth Graunke 
> ---
>  src/intel/compiler/brw_vec4_tes.cpp | 12 +++-
>  1 file changed, 11 insertions(+), 1 deletion(-)
> 
> diff --git a/src/intel/compiler/brw_vec4_tes.cpp 
> b/src/intel/compiler/brw_vec4_tes.cpp
> index 35aff0f4b78..cf1bff42aa9 100644
> --- a/src/intel/compiler/brw_vec4_tes.cpp
> +++ b/src/intel/compiler/brw_vec4_tes.cpp
> @@ -185,9 +185,19 @@ vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr 
> *instr)
>   first_component /= 2;
>  
>if (indirect_offset.file != BAD_FILE) {
> + src_reg clamped_indirect_offset = src_reg(this, 
> glsl_type::uvec4_type);
> +
> + /* Page 190 of "Volume 7: 3D Media GPGPU Engine (Haswell)" says the
> +  * valid range of the offset is [0, 0FFFh].
> +  */
> + emit_minmax(BRW_CONDITIONAL_L,
> + dst_reg(clamped_indirect_offset),
> + retype(indirect_offset, BRW_REGISTER_TYPE_UD),
> + brw_imm_ud(0x0fffu));
> +
>   header = src_reg(this, glsl_type::uvec4_type);
>   emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header),
> -  input_read_header, indirect_offset);
> +  input_read_header, clamped_indirect_offset);
>} else {
>   /* Arbitrarily only push up to 24 vec4 slots worth of data,
>* which is 12 registers (since each holds 2 vec4 slots).
> 

Both patches are:
Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] gallium: enable GL_AMD_depth_clamp_separate on r600, radeonsi

2018-08-29 Thread Marek Olšák
From: Marek Olšák 

---
 docs/relnotes/18.3.0.html| 1 +
 src/gallium/docs/source/screen.rst   | 4 
 src/gallium/drivers/etnaviv/etnaviv_screen.c | 1 +
 src/gallium/drivers/freedreno/freedreno_screen.c | 1 +
 src/gallium/drivers/i915/i915_screen.c   | 1 +
 src/gallium/drivers/llvmpipe/lp_screen.c | 1 +
 src/gallium/drivers/nouveau/nv30/nv30_screen.c   | 1 +
 src/gallium/drivers/nouveau/nv50/nv50_screen.c   | 1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c   | 1 +
 src/gallium/drivers/r300/r300_screen.c   | 1 +
 src/gallium/drivers/r600/evergreen_state.c   | 2 +-
 src/gallium/drivers/r600/r600_pipe.c | 1 +
 src/gallium/drivers/r600/r600_state.c| 2 +-
 src/gallium/drivers/radeonsi/si_get.c| 1 +
 src/gallium/drivers/radeonsi/si_state.c  | 2 +-
 src/gallium/drivers/softpipe/sp_screen.c | 1 +
 src/gallium/drivers/svga/svga_screen.c   | 1 +
 src/gallium/drivers/swr/swr_screen.cpp   | 1 +
 src/gallium/drivers/v3d/v3d_screen.c | 1 +
 src/gallium/drivers/vc4/vc4_screen.c | 1 +
 src/gallium/drivers/virgl/virgl_screen.c | 1 +
 src/gallium/include/pipe/p_defines.h | 1 +
 src/mesa/state_tracker/st_extensions.c   | 1 +
 23 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/docs/relnotes/18.3.0.html b/docs/relnotes/18.3.0.html
index 71fb41ca86f..5874d3fa330 100644
--- a/docs/relnotes/18.3.0.html
+++ b/docs/relnotes/18.3.0.html
@@ -44,20 +44,21 @@ TBD.
 
 
 
 New features
 
 
 Note: some of the new features are only available with certain drivers.
 
 
 
+GL_AMD_depth_clamp_separate on r600, radeonsi.
 GL_AMD_framebuffer_multisample_advanced on radeonsi.
 GL_AMD_gpu_shader_int64 on i965, nvc0, radeonsi.
 GL_AMD_multi_draw_indirect on all GL 4.x drivers.
 GL_AMD_query_buffer_object on i965, nvc0, r600, radeonsi.
 GL_EXT_disjoint_timer_query on radeonsi and most other Gallium drivers (ES 
extension)
 GL_EXT_vertex_attrib_64bit on i965, nvc0, radeonsi.
 GL_EXT_window_rectangles on radeonsi.
 GL_KHR_texture_compression_astc_sliced_3d on radeonsi.
 GL_INTEL_fragment_shader_ordering on i965.
 GL_NV_fragment_shader_interlock on i965.
diff --git a/src/gallium/docs/source/screen.rst 
b/src/gallium/docs/source/screen.rst
index 485248261df..8da611b24dd 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -62,20 +62,24 @@ The integer capabilities:
 * ``PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT``: Whether the TGSI property
   FS_COORD_ORIGIN with value UPPER_LEFT is supported.
 * ``PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT``: Whether the TGSI property
   FS_COORD_ORIGIN with value LOWER_LEFT is supported.
 * ``PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER``: Whether the TGSI
   property FS_COORD_PIXEL_CENTER with value HALF_INTEGER is supported.
 * ``PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER``: Whether the TGSI
   property FS_COORD_PIXEL_CENTER with value INTEGER is supported.
 * ``PIPE_CAP_DEPTH_CLIP_DISABLE``: Whether the driver is capable of disabling
   depth clipping (through pipe_rasterizer_state)
+* ``PIPE_CAP_DEPTH_CLIP_DISABLE_SEPARATE``: Whether the driver is capable of
+  disabling depth clipping (through pipe_rasterizer_state) separately for
+  the near and far plane. If not, depth_clip_near and depth_clip_far will be
+  equal.
 * ``PIPE_CAP_SHADER_STENCIL_EXPORT``: Whether a stencil reference value can be
   written from a fragment shader.
 * ``PIPE_CAP_TGSI_INSTANCEID``: Whether TGSI_SEMANTIC_INSTANCEID is supported
   in the vertex shader.
 * ``PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR``: Whether the driver supports
   per-instance vertex attribs.
 * ``PIPE_CAP_FRAGMENT_COLOR_CLAMPED``: Whether fragment color clamping is
   supported.  That is, is the pipe_rasterizer_state::clamp_fragment_color
   flag supported by the driver?  If not, the state tracker will insert
   clamping code into the fragment shaders when needed.
diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c 
b/src/gallium/drivers/etnaviv/etnaviv_screen.c
index 9669bd2f601..60ab42cf3cd 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_screen.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c
@@ -178,20 +178,21 @@ etna_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_START_INSTANCE: /* instancing not supported AFAIK */
case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: /* instancing not supported 
AFAIK */
case PIPE_CAP_SHADER_STENCIL_EXPORT: /* Fragment shader cannot export 
stencil value */
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: /* no dual-source supported */
case PIPE_CAP_TEXTURE_MULTISAMPLE: /* no texture multisample */
case PIPE_CAP_TEXTURE_MIRROR_CLAMP: /* only mirrored repeat */
case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE: /* only mirrored repeat */
case PIPE_CAP_INDEP_BLEND_ENABLE:
case PIPE_CAP_INDEP_BLEND_FUNC:
case 

[Mesa-dev] [PATCH 1/2] gallium: split depth_clip into depth_clip_near & depth_clip_far

2018-08-29 Thread Marek Olšák
From: Marek Olšák 

for AMD_depth_clamp_separate.
---
 src/gallium/auxiliary/draw/draw_context.c   | 2 +-
 src/gallium/auxiliary/driver_trace/tr_dump_state.c  | 3 ++-
 src/gallium/auxiliary/hud/hud_context.c | 3 ++-
 src/gallium/auxiliary/postprocess/pp_program.c  | 3 ++-
 src/gallium/auxiliary/util/u_blit.c | 3 ++-
 src/gallium/auxiliary/util/u_blitter.c  | 3 ++-
 src/gallium/auxiliary/util/u_dump_state.c   | 3 ++-
 src/gallium/auxiliary/util/u_tests.c| 3 ++-
 src/gallium/auxiliary/vl/vl_bicubic_filter.c| 4 +++-
 src/gallium/auxiliary/vl/vl_compositor.c| 3 ++-
 src/gallium/auxiliary/vl/vl_deint_filter.c  | 4 +++-
 src/gallium/auxiliary/vl/vl_idct.c  | 4 +++-
 src/gallium/auxiliary/vl/vl_matrix_filter.c | 4 +++-
 src/gallium/auxiliary/vl/vl_mc.c| 4 +++-
 src/gallium/auxiliary/vl/vl_median_filter.c | 4 +++-
 src/gallium/auxiliary/vl/vl_zscan.c | 4 +++-
 src/gallium/drivers/freedreno/a3xx/fd3_emit.c   | 4 ++--
 src/gallium/drivers/freedreno/a3xx/fd3_program.c| 2 +-
 src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c | 2 +-
 src/gallium/drivers/freedreno/a4xx/fd4_emit.c   | 2 +-
 src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c | 2 +-
 src/gallium/drivers/llvmpipe/lp_state_fs.c  | 2 +-
 src/gallium/drivers/nouveau/nv30/nv30_state.c   | 2 +-
 src/gallium/drivers/nouveau/nv50/nv50_state.c   | 2 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_state.c   | 2 +-
 src/gallium/drivers/r600/evergreen_state.c  | 4 ++--
 src/gallium/drivers/r600/r600_state.c   | 4 ++--
 src/gallium/drivers/radeonsi/si_state.c | 4 ++--
 src/gallium/drivers/softpipe/sp_quad_depth_test.c   | 4 ++--
 src/gallium/drivers/svga/svga_pipe_rasterizer.c | 2 +-
 src/gallium/drivers/swr/swr_state.cpp   | 2 +-
 src/gallium/drivers/virgl/virgl_encode.c| 2 +-
 src/gallium/include/pipe/p_state.h  | 6 +-
 src/gallium/state_trackers/nine/nine_pipe.c | 3 ++-
 src/gallium/state_trackers/xa/xa_renderer.c | 3 ++-
 src/gallium/tests/graw/fs-test.c| 3 ++-
 src/gallium/tests/graw/gs-test.c| 3 ++-
 src/gallium/tests/graw/quad-sample.c| 3 ++-
 src/gallium/tests/graw/quad-tex.c   | 3 ++-
 src/gallium/tests/graw/shader-leak.c| 3 ++-
 src/gallium/tests/graw/tri-gs.c | 3 ++-
 src/gallium/tests/graw/tri-instanced.c  | 3 ++-
 src/gallium/tests/graw/tri-large.c  | 3 ++-
 src/gallium/tests/graw/tri.c| 3 ++-
 src/gallium/tests/graw/vs-test.c| 3 ++-
 src/gallium/tests/trivial/quad-tex.c| 3 ++-
 src/gallium/tests/trivial/tri.c | 3 ++-
 src/mesa/state_tracker/st_atom_rasterizer.c | 4 ++--
 src/mesa/state_tracker/st_cb_bitmap.c   | 3 ++-
 src/mesa/state_tracker/st_cb_clear.c| 3 ++-
 src/mesa/state_tracker/st_cb_drawpixels.c   | 4 ++--
 51 files changed, 100 insertions(+), 58 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_context.c 
b/src/gallium/auxiliary/draw/draw_context.c
index e887272e154..3fc096789c0 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -268,21 +268,21 @@ draw_is_vs_window_space(struct draw_context *draw)
 
 void
 draw_update_clip_flags(struct draw_context *draw)
 {
bool window_space = draw_is_vs_window_space(draw);
 
draw->clip_xy = !draw->driver.bypass_clip_xy && !window_space;
draw->guard_band_xy = (!draw->driver.bypass_clip_xy &&
   draw->driver.guard_band_xy);
draw->clip_z = (!draw->driver.bypass_clip_z &&
-   draw->rasterizer && draw->rasterizer->depth_clip) &&
+   draw->rasterizer && draw->rasterizer->depth_clip_near) &&
   !window_space;
draw->clip_user = draw->rasterizer &&
  draw->rasterizer->clip_plane_enable != 0 &&
  !window_space;
draw->guard_band_points_xy = draw->guard_band_xy ||
 (draw->driver.bypass_clip_points &&
 (draw->rasterizer &&
  draw->rasterizer->point_tri_clip));
 }
 
diff --git a/src/gallium/auxiliary/driver_trace/tr_dump_state.c 
b/src/gallium/auxiliary/driver_trace/tr_dump_state.c
index 46fa5747460..88a50b80903 100644
--- a/src/gallium/auxiliary/driver_trace/tr_dump_state.c
+++ b/src/gallium/auxiliary/driver_trace/tr_dump_state.c
@@ -136,21 +136,22 @@ void trace_dump_rasterizer_state(const struct 
pipe_rasterizer_state *state)
trace_dump_member(bool, state, line_stipple_enable);
trace_dump_member(bool, state, line_last_pixel);
 
trace_dump_member(bool, state, 

Re: [Mesa-dev] [PATCH] glsl/linker: Link all out vars from a shader objects on a single stage

2018-08-29 Thread Andres Gomez
Vadym, should we also include this in the stable queues ?


On Mon, 2018-08-27 at 15:20 +0300, Vadym Shovkoplias wrote:
> From: "vadym.shovkoplias" 
> 
> During intra stage linking some out variables can be dropped because
> it is not used in a shader with the main function. But these out vars
> can be referenced on later stages which can lead to further linking
> errors.
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105731
> Signed-off-by: Vadym Shovkoplias 
> ---
>  src/compiler/glsl/linker.cpp | 38 
>  1 file changed, 38 insertions(+)
> 
> diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
> index 3ce78fe642..3b0c01c316 100644
> --- a/src/compiler/glsl/linker.cpp
> +++ b/src/compiler/glsl/linker.cpp
> @@ -2187,6 +2187,41 @@ link_cs_input_layout_qualifiers(struct 
> gl_shader_program *prog,
> }
>  }
>  
> +/**
> + * Link all out variables on a single stage which are not
> + * directly used in a shader with the main function.
> + */
> +static void
> +link_output_variables(struct gl_linked_shader *linked_shader,
> +  struct gl_shader **shader_list,
> +  unsigned num_shaders)
> +{
> +   struct glsl_symbol_table *symbols = linked_shader->symbols;
> +
> +   for (unsigned i = 0; i < num_shaders; i++) {
> +
> +  /* Skip shader object with main function */
> +  if (shader_list[i]->symbols->get_function("main"))
> + continue;
> +
> +  foreach_in_list (ir_instruction, ir, shader_list[i]->ir) {
> +
> + if (ir->ir_type != ir_type_variable)
> +continue;
> +
> + ir_variable *const var = (ir_variable *) ir;
> +
> + if (var->data.mode == ir_var_shader_out &&
> +   !symbols->get_variable(var->name)) {
> +symbols->add_variable(var);
> +linked_shader->ir->push_head(var);
> + }
> +  }
> +   }
> +
> +   return;
> +}
> +
>  
>  /**
>   * Combine a group of shaders for a single stage to generate a linked shader
> @@ -2352,6 +2387,9 @@ link_intrastage_shaders(void *mem_ctx,
>return NULL;
> }
>  
> +   if (linked->Stage != MESA_SHADER_FRAGMENT)
> +  link_output_variables(linked, shader_list, num_shaders);
> +
> /* Make a pass over all variable declarations to ensure that arrays with
>  * unspecified sizes have a size specified.  The size is inferred from the
>  * max_array_access field.
-- 
Br,

Andres
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] st/mesa, gallium: add a workaround for No Mans Sky

2018-08-29 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Wed, Aug 29, 2018 at 1:48 AM, Timothy Arceri  wrote:
> The spec seems clear this is not allowed but the Nvidia binary
> forces apps to add layout qualifiers so this works around the
> issue for No Mans Sky until the CTS can be sorted out.
> ---
>  src/gallium/auxiliary/pipe-loader/driinfo_gallium.h | 1 +
>  src/gallium/include/state_tracker/st_api.h  | 1 +
>  src/gallium/state_trackers/dri/dri_screen.c | 2 ++
>  src/mesa/state_tracker/st_extensions.c  | 3 +++
>  src/util/00-mesa-defaults.conf  | 1 +
>  src/util/xmlpool/t_options.h| 5 +
>  6 files changed, 13 insertions(+)
>
> diff --git a/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h 
> b/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h
> index b8f0fe64098..5f4305d91d7 100644
> --- a/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h
> +++ b/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h
> @@ -29,6 +29,7 @@ DRI_CONF_SECTION_DEBUG
> DRI_CONF_ALLOW_HIGHER_COMPAT_VERSION("false")
> DRI_CONF_FORCE_GLSL_ABS_SQRT("false")
> DRI_CONF_GLSL_CORRECT_DERIVATIVES_AFTER_DISCARD("false")
> +   DRI_CONF_ALLOW_GLSL_LAYOUT_QUALIFIER_ON_FUNCTION_PARAMETERS("false")
>  DRI_CONF_SECTION_END
>
>  DRI_CONF_SECTION_MISCELLANEOUS
> diff --git a/src/gallium/include/state_tracker/st_api.h 
> b/src/gallium/include/state_tracker/st_api.h
> index 8d386a82a63..61152e35468 100644
> --- a/src/gallium/include/state_tracker/st_api.h
> +++ b/src/gallium/include/state_tracker/st_api.h
> @@ -228,6 +228,7 @@ struct st_config_options
> boolean glsl_zero_init;
> boolean force_glsl_abs_sqrt;
> boolean allow_glsl_cross_stage_interpolation_mismatch;
> +   boolean allow_glsl_layout_qualifier_on_function_parameters;
> unsigned char config_options_sha1[20];
>  };
>
> diff --git a/src/gallium/state_trackers/dri/dri_screen.c 
> b/src/gallium/state_trackers/dri/dri_screen.c
> index 3e4de59a433..027e85024f0 100644
> --- a/src/gallium/state_trackers/dri/dri_screen.c
> +++ b/src/gallium/state_trackers/dri/dri_screen.c
> @@ -85,6 +85,8 @@ dri_fill_st_options(struct dri_screen *screen)
>driQueryOptionb(optionCache, "force_glsl_abs_sqrt");
> options->allow_glsl_cross_stage_interpolation_mismatch =
>driQueryOptionb(optionCache, 
> "allow_glsl_cross_stage_interpolation_mismatch");
> +   options->allow_glsl_layout_qualifier_on_function_parameters =
> +  driQueryOptionb(optionCache, 
> "allow_glsl_layout_qualifier_on_function_parameters");
>
> driComputeOptionsSha1(optionCache, options->config_options_sha1);
>  }
> diff --git a/src/mesa/state_tracker/st_extensions.c 
> b/src/mesa/state_tracker/st_extensions.c
> index 8483f7a2a72..29a32513085 100644
> --- a/src/mesa/state_tracker/st_extensions.c
> +++ b/src/mesa/state_tracker/st_extensions.c
> @@ -1255,6 +1255,9 @@ void st_init_extensions(struct pipe_screen *screen,
> if (options->allow_glsl_relaxed_es)
>consts->AllowGLSLRelaxedES = GL_TRUE;
>
> +   if (options->allow_glsl_layout_qualifier_on_function_parameters)
> +  consts->AllowLayoutQualifiersOnFunctionParameters = GL_TRUE;
> +
> consts->MinMapBufferAlignment =
>screen->get_param(screen, PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT);
>
> diff --git a/src/util/00-mesa-defaults.conf b/src/util/00-mesa-defaults.conf
> index ad59efba50b..eb78b75e9b8 100644
> --- a/src/util/00-mesa-defaults.conf
> +++ b/src/util/00-mesa-defaults.conf
> @@ -195,6 +195,7 @@ TODO: document the other workarounds.
>
>  
>  
> + name="allow_glsl_layout_qualifier_on_function_parameters" value="true" />
>  
>
>   2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] glsl: add a mechanism to allow layout qualifiers on function params

2018-08-29 Thread Marek Olšák
Uh.

Reviewed-by: Marek Olšák 

Marek

On Wed, Aug 29, 2018 at 1:48 AM, Timothy Arceri  wrote:
> The spec is quite clear this is not allowed:
>
> From Section 4.4. (Layout Qualifiers) of the GLSL 4.60 spec:
>
>"Layout qualifiers can appear in several forms of declaration.
>They can appear as part of an interface block definition or
>block member, as shown in the grammar in the previous section.
>They can also appear with just an interface-qualifier to establish
>layouts of other declarations made with that qualifier:
>
>   layout-qualifier interface-qualifier ;
>
>Or, they can appear with an individual variable declared with
>an interface qualifier:
>
>   layout-qualifier interface-qualifier declaration ;"
>
> From Section 4.10 (Memory Qualifiers) of the GLSL 4.60 spec:
>
>"Layout qualifiers cannot be used on formal function parameters,
>and layout qualification is not included in parameter matching."
>
> However on the Nvidia binary driver they actually fail to compile
> if image function params don't have a layout qualifier. This results
> in applications such as No Mans Sky using layout qualifiers on params.
>
> I've submitted a CTS test to expose this problem in the Nvidia driver
> but until that is resolved this patch will help Mesa drivers work
> around the issue.
> ---
>  src/compiler/glsl/glsl_parser.yy | 17 +
>  src/compiler/glsl/glsl_parser_extras.cpp |  2 ++
>  src/compiler/glsl/glsl_parser_extras.h   |  1 +
>  src/mesa/main/mtypes.h   |  5 +
>  4 files changed, 25 insertions(+)
>
> diff --git a/src/compiler/glsl/glsl_parser.yy 
> b/src/compiler/glsl/glsl_parser.yy
> index bc2571b6844..fd1592beca0 100644
> --- a/src/compiler/glsl/glsl_parser.yy
> +++ b/src/compiler/glsl/glsl_parser.yy
> @@ -897,6 +897,23 @@ parameter_declarator:
>$$->identifier = $2;
>state->symbols->add_variable(new(state) ir_variable(NULL, $2, 
> ir_var_auto));
> }
> +   | layout_qualifier type_specifier any_identifier
> +   {
> +  if (state->allow_layout_qualifier_on_function_parameter) {
> + void *ctx = state->linalloc;
> + $$ = new(ctx) ast_parameter_declarator();
> + $$->set_location_range(@2, @3);
> + $$->type = new(ctx) ast_fully_specified_type();
> + $$->type->set_location(@2);
> + $$->type->specifier = $2;
> + $$->identifier = $3;
> + state->symbols->add_variable(new(state) ir_variable(NULL, $3, 
> ir_var_auto));
> +  } else {
> + _mesa_glsl_error(&@1, state,
> +  "is is not allowed on function parameter");
> + YYERROR;
> +  }
> +   }
> | type_specifier any_identifier array_specifier
> {
>void *ctx = state->linalloc;
> diff --git a/src/compiler/glsl/glsl_parser_extras.cpp 
> b/src/compiler/glsl/glsl_parser_extras.cpp
> index 0a7d0d78b14..efd1a013dbd 100644
> --- a/src/compiler/glsl/glsl_parser_extras.cpp
> +++ b/src/compiler/glsl/glsl_parser_extras.cpp
> @@ -311,6 +311,8 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct 
> gl_context *_ctx,
>ctx->Const.AllowGLSLExtensionDirectiveMidShader;
> this->allow_builtin_variable_redeclaration =
>ctx->Const.AllowGLSLBuiltinVariableRedeclaration;
> +   this->allow_layout_qualifier_on_function_parameter =
> +  ctx->Const.AllowLayoutQualifiersOnFunctionParameters;
>
> this->cs_input_local_size_variable_specified = false;
>
> diff --git a/src/compiler/glsl/glsl_parser_extras.h 
> b/src/compiler/glsl/glsl_parser_extras.h
> index 2c8353214aa..69aa6cf9cf3 100644
> --- a/src/compiler/glsl/glsl_parser_extras.h
> +++ b/src/compiler/glsl/glsl_parser_extras.h
> @@ -866,6 +866,7 @@ struct _mesa_glsl_parse_state {
>
> bool allow_extension_directive_midshader;
> bool allow_builtin_variable_redeclaration;
> +   bool allow_layout_qualifier_on_function_parameter;
>
> /**
>  * Known subroutine type declarations.
> diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
> index 9d058cef6d9..1f640b063c0 100644
> --- a/src/mesa/main/mtypes.h
> +++ b/src/mesa/main/mtypes.h
> @@ -3764,6 +3764,11 @@ struct gl_constants
>  */
> GLboolean AllowHigherCompatVersion;
>
> +   /**
> +* Allow layout qualifiers on function parameters.
> +*/
> +   GLboolean AllowLayoutQualifiersOnFunctionParameters;
> +
> /**
>  * Force computing the absolute value for sqrt() and inversesqrt() to 
> follow
>  * D3D9 when apps rely on this behaviour.
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH] mesa: allow GL_UNSIGNED_BYTE type for SNORM reads

2018-08-29 Thread Andres Gomez
On Wed, 2018-08-29 at 08:22 -0700, Dylan Baker wrote:
> Quoting Tapani Pälli (2018-08-27 04:46:37)
> > OpenGL ES spec states:
> >"For normalized fixed-point rendering surfaces, the combination format
> > RGBA and type UNSIGNED_BYTE is accepted."
> > 
> > This fixes following failing VK-GL-CTS tests:
> > 
> >KHR-GLES3.packed_pixels.pbo_rectangle.rgba8_snorm
> >KHR-GLES3.packed_pixels.rectangle.rgba8_snorm
> >KHR-GLES3.packed_pixels.varied_rectangle.rgba8_snorm
> > 
> > Signed-off-by: Tapani Pälli 
> > https://bugs.freedesktop.org/show_bug.cgi?id=107658
> > Cc: mesa-sta...@lists.freedesktop.org
> > ---
> > 
> > This is a partial fix to the bug. I believe there are 2 separate
> > issues within reported bug and this fixes the first one.
> > 
> >  src/mesa/main/readpix.c | 9 +
> >  1 file changed, 9 insertions(+)
> > 
> > diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c
> > index 2cbb578a37f..556c860d393 100644
> > --- a/src/mesa/main/readpix.c
> > +++ b/src/mesa/main/readpix.c
> > @@ -958,6 +958,15 @@ read_pixels_es3_error_check(struct gl_context *ctx, 
> > GLenum format, GLenum type,
> > return GL_NO_ERROR;
> >   }
> >}
> > +  if (type == GL_UNSIGNED_BYTE) {
> > + switch (internalFormat) {
> > + case GL_R8_SNORM:
> > + case GL_RG8_SNORM:
> > + case GL_RGBA8_SNORM:
> > +if (_mesa_has_EXT_render_snorm(ctx))
> > +   return GL_NO_ERROR;
> > + }
> > +  }
> >break;
> > case GL_BGRA:
> >/* GL_EXT_read_format_bgra */
> > -- 
> > 2.14.4
> > 
> 
> Hi Tapani,
> 
> This doesn't apply cleanly to 18.1 because "mesa: enable EXT_render_snorm
> extension" isn't present on the branch. Does it still make sense to pull this
> into 18.1?

Likewise for 18.2.

The offending commit that caused the regression didn't make it for any
of the current stable queues so I think it is safe just to ignore this
patch. Can you confirm?

-- 
Br,

Andres
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallivm/radeonsi: allow to pass two swizzles into fetches.

2018-08-29 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Mon, Aug 27, 2018 at 5:16 PM, Dave Airlie  wrote:
> From: Dave Airlie 
>
> This hijacks the top 16-bits of swizzle, to pass in the swizzle
> for the second channel.
>
> This fixes handling .yx swizzles of 64-bit values.
>
> This should fixup radeonsi and llvmpipe.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107524
> ---
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi.c   |  9 ++
>  .../auxiliary/gallivm/lp_bld_tgsi_soa.c   | 86 ---
>  src/gallium/drivers/radeonsi/si_shader.c  |  7 +-
>  .../drivers/radeonsi/si_shader_tgsi_setup.c   | 18 ++--
>  4 files changed, 79 insertions(+), 41 deletions(-)
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c 
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
> index 64d2cd703be..2c3be8fb127 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
> @@ -353,6 +353,15 @@ lp_build_emit_fetch_src(
>   assert(0 && "invalid swizzle in emit_fetch()");
>   return bld_base->base.undef;
>}
> +  if (tgsi_type_is_64bit(stype)) {
> +unsigned swizzle2;
> +swizzle2 = tgsi_util_get_full_src_register_swizzle(reg, chan_index + 
> 1);
> +if (swizzle2 > 3) {
> +   assert(0 && "invalid swizzle in emit_fetch()");
> +   return bld_base->base.undef;
> +}
> +swizzle |= (swizzle2 << 16);
> +  }
> }
>
> assert(reg->Register.Index <= 
> bld_base->info->file_max[reg->Register.File]);
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c 
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> index 83d7dbea9a2..79ece639e35 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> @@ -1190,7 +1190,7 @@ emit_fetch_constant(
> struct lp_build_tgsi_context * bld_base,
> const struct tgsi_full_src_register * reg,
> enum tgsi_opcode_type stype,
> -   unsigned swizzle)
> +   unsigned swizzle_in)
>  {
> struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
> struct gallivm_state *gallivm = bld_base->base.gallivm;
> @@ -1200,6 +1200,7 @@ emit_fetch_constant(
> LLVMValueRef consts_ptr;
> LLVMValueRef num_consts;
> LLVMValueRef res;
> +   unsigned swizzle = swizzle_in & 0x;
>
> /* XXX: Handle fetching xyzw components as a vector */
> assert(swizzle != ~0u);
> @@ -1241,7 +1242,7 @@ emit_fetch_constant(
>
>if (tgsi_type_is_64bit(stype)) {
>   LLVMValueRef swizzle_vec2;
> - swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, 
> swizzle + 1);
> + swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, 
> swizzle_in >> 16);
>   index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2);
>   index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2);
>}
> @@ -1256,21 +1257,42 @@ emit_fetch_constant(
>
>scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
>  , 1, "");
> -  if (stype == TGSI_TYPE_DOUBLE) {
> - LLVMTypeRef dptr_type = 
> LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
> - scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
> - bld_broad = _base->dbl_bld;
> -  } else if (stype == TGSI_TYPE_UNSIGNED64) {
> - LLVMTypeRef u64ptr_type = 
> LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
> - scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, "");
> - bld_broad = _base->uint64_bld;
> -  } else if (stype == TGSI_TYPE_SIGNED64) {
> - LLVMTypeRef i64ptr_type = 
> LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
> - scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, "");
> - bld_broad = _base->int64_bld;
> +
> +  if (tgsi_type_is_64bit(stype) && ((swizzle_in >> 16) != swizzle + 1)) {
> +
> + LLVMValueRef scalar2, scalar2_ptr;
> + LLVMValueRef shuffles[2];
> + index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + 
> (swizzle_in >> 16));
> +
> + scalar2_ptr = LLVMBuildGEP(builder, consts_ptr,
> +, 1, "");
> +
> + scalar = LLVMBuildLoad(builder, scalar_ptr, "");
> + scalar2 = LLVMBuildLoad(builder, scalar2_ptr, "");
> + shuffles[0] = lp_build_const_int32(gallivm, 0);
> + shuffles[1] = lp_build_const_int32(gallivm, 1);
> +
> + res = 
> LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 
> bld_base->base.type.length * 2));
> + res = LLVMBuildInsertElement(builder, res, scalar, shuffles[0], "");
> + res = LLVMBuildInsertElement(builder, res, scalar2, shuffles[1], 
> "");
> +  } else {
> +if (stype == TGSI_TYPE_DOUBLE) {
> +   LLVMTypeRef dptr_type = 
> LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
> +   

Re: [Mesa-dev] [PATCH] docs/relnotes: Add AMD_depth_clamp_separate for i965

2018-08-29 Thread Sagar Ghuge
Hi Marek,

Thanks for reviewing but I don't have commit rights yet. :( 

-- Sagar

On 08/29/2018 02:34 PM, Marek Olšák wrote:
> Looks good. You can push this without an Rb.
> 
> Marek
> 
> On Tue, Aug 28, 2018 at 5:53 PM, Sagar Ghuge  wrote:
>> Signed-off-by: Sagar Ghuge 
>> ---
>>  docs/relnotes/18.3.0.html | 1 +
>>  1 file changed, 1 insertion(+)
>>
>> diff --git a/docs/relnotes/18.3.0.html b/docs/relnotes/18.3.0.html
>> index 71fb41ca86..6e5e3ef93b 100644
>> --- a/docs/relnotes/18.3.0.html
>> +++ b/docs/relnotes/18.3.0.html
>> @@ -51,6 +51,7 @@ Note: some of the new features are only available with 
>> certain drivers.
>>  
>>
>>  
>> +GL_AMD_depth_clamp_separate on i965.
>>  GL_AMD_framebuffer_multisample_advanced on radeonsi.
>>  GL_AMD_gpu_shader_int64 on i965, nvc0, radeonsi.
>>  GL_AMD_multi_draw_indirect on all GL 4.x drivers.
>> --
>> 2.17.1
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] i965/vec4: Clamp indirect tes input array reads with 0x0fffffff

2018-08-29 Thread Jason Ekstrand
On Wed, Aug 29, 2018 at 4:32 PM Ian Romanick  wrote:

> On 08/29/2018 02:22 PM, Jason Ekstrand wrote:
> > Have you seen this cause an actual problem?  There's no way we can
> > actually end up with an input array that big...  I guess this is for the
> > crazy OOB case?
>
> The array can't be that big due to other compiler limits.  This is just
> to prevent a garbage index from being used.  If the previous patch is
> reverted, the test case mentioned in that patch will have this problem
> because the index will be negative.
>

Makes sense.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa/state_tracker: explicitely handle case ir_intrinsic_begin_fragment_shader_ordering in visit_generic_intrinsic()

2018-08-29 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Tue, Aug 28, 2018 at 2:07 PM,   wrote:
> From: Kevin Rogovin 
>
> ---
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
> b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index 7b96947c60..48a7b030ce 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -4068,6 +4068,7 @@ glsl_to_tgsi_visitor::visit(ir_call *ir)
> case ir_intrinsic_generic_atomic_comp_swap:
> case ir_intrinsic_begin_invocation_interlock:
> case ir_intrinsic_end_invocation_interlock:
> +   case ir_intrinsic_begin_fragment_shader_ordering:
>unreachable("Invalid intrinsic");
> }
>  }
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] docs/relnotes: Add AMD_depth_clamp_separate for i965

2018-08-29 Thread Marek Olšák
Looks good. You can push this without an Rb.

Marek

On Tue, Aug 28, 2018 at 5:53 PM, Sagar Ghuge  wrote:
> Signed-off-by: Sagar Ghuge 
> ---
>  docs/relnotes/18.3.0.html | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/docs/relnotes/18.3.0.html b/docs/relnotes/18.3.0.html
> index 71fb41ca86..6e5e3ef93b 100644
> --- a/docs/relnotes/18.3.0.html
> +++ b/docs/relnotes/18.3.0.html
> @@ -51,6 +51,7 @@ Note: some of the new features are only available with 
> certain drivers.
>  
>
>  
> +GL_AMD_depth_clamp_separate on i965.
>  GL_AMD_framebuffer_multisample_advanced on radeonsi.
>  GL_AMD_gpu_shader_int64 on i965, nvc0, radeonsi.
>  GL_AMD_multi_draw_indirect on all GL 4.x drivers.
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] i965/vec4: Clamp indirect tes input array reads with 0x0fffffff

2018-08-29 Thread Ian Romanick
On 08/29/2018 02:22 PM, Jason Ekstrand wrote:
> Have you seen this cause an actual problem?  There's no way we can
> actually end up with an input array that big...  I guess this is for the
> crazy OOB case?

The array can't be that big due to other compiler limits.  This is just
to prevent a garbage index from being used.  If the previous patch is
reverted, the test case mentioned in that patch will have this problem
because the index will be negative.

> On Wed, Aug 29, 2018 at 3:12 PM Ian Romanick  > wrote:
> 
> From: Ian Romanick  >
> 
> Page 190 of "Volume 7: 3D Media GPGPU Engine (Haswell)" says the valid
> range of the offset is [0, 0FFFh].
> 
> Signed-off-by: Ian Romanick  >
> Cc: mesa-sta...@lists.freedesktop.org
> 
> Cc: Kenneth Graunke  >
> ---
>  src/intel/compiler/brw_vec4_tes.cpp | 12 +++-
>  1 file changed, 11 insertions(+), 1 deletion(-)
> 
> diff --git a/src/intel/compiler/brw_vec4_tes.cpp
> b/src/intel/compiler/brw_vec4_tes.cpp
> index 35aff0f4b78..cf1bff42aa9 100644
> --- a/src/intel/compiler/brw_vec4_tes.cpp
> +++ b/src/intel/compiler/brw_vec4_tes.cpp
> @@ -185,9 +185,19 @@
> vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
>           first_component /= 2;
> 
>        if (indirect_offset.file != BAD_FILE) {
> +         src_reg clamped_indirect_offset = src_reg(this,
> glsl_type::uvec4_type);
> +
> +         /* Page 190 of "Volume 7: 3D Media GPGPU Engine (Haswell)"
> says the
> +          * valid range of the offset is [0, 0FFFh].
> +          */
> +         emit_minmax(BRW_CONDITIONAL_L,
> +                     dst_reg(clamped_indirect_offset),
> +                     retype(indirect_offset, BRW_REGISTER_TYPE_UD),
> +                     brw_imm_ud(0x0fffu));
> +
>           header = src_reg(this, glsl_type::uvec4_type);
>           emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header),
> -              input_read_header, indirect_offset);
> +              input_read_header, clamped_indirect_offset);
>        } else {
>           /* Arbitrarily only push up to 24 vec4 slots worth of data,
>            * which is 12 registers (since each holds 2 vec4 slots).
> -- 
> 2.14.4
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org 
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH] ac/radeonsi: fix CIK copy max size

2018-08-29 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Tue, Aug 28, 2018 at 11:53 PM, Dave Airlie  wrote:
> From: Dave Airlie 
>
> While adding transfer queues to radv, I started writing some tests,
> the first test I wrote fell over copying a buffer larger than this
> limit.
>
> Checked AMDVLK and found the correct limit.
>
> Cc: 
> ---
>  src/amd/common/sid.h | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h
> index 0671f7d3998..edb7d06afa6 100644
> --- a/src/amd/common/sid.h
> +++ b/src/amd/common/sid.h
> @@ -9139,7 +9139,9 @@
>  #defineCIK_SDMA_PACKET_SEMAPHORE   0x7
>  #defineCIK_SDMA_PACKET_CONSTANT_FILL   0xb
>  #defineCIK_SDMA_PACKET_SRBM_WRITE  0xe
> -#defineCIK_SDMA_COPY_MAX_SIZE  0x3fffe0
> +/* There is apparently an undocumented HW "feature" that
> +   prevents the HW from copying past 256 bytes of (1 << 22) */
> +#defineCIK_SDMA_COPY_MAX_SIZE  0x3fff00
>
>  enum amd_cmp_class_flags {
> S_NAN = 1 << 0,// Signaling NaN
> --
> 2.17.1
>
> ___
> mesa-stable mailing list
> mesa-sta...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-stable
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH] ac/radeonsi: fix CIK copy max size

2018-08-29 Thread Marek Olšák
Thanks. Not surprising though. :)

Marek

On Tue, Aug 28, 2018 at 11:53 PM, Dave Airlie  wrote:
> From: Dave Airlie 
>
> While adding transfer queues to radv, I started writing some tests,
> the first test I wrote fell over copying a buffer larger than this
> limit.
>
> Checked AMDVLK and found the correct limit.
>
> Cc: 
> ---
>  src/amd/common/sid.h | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h
> index 0671f7d3998..edb7d06afa6 100644
> --- a/src/amd/common/sid.h
> +++ b/src/amd/common/sid.h
> @@ -9139,7 +9139,9 @@
>  #defineCIK_SDMA_PACKET_SEMAPHORE   0x7
>  #defineCIK_SDMA_PACKET_CONSTANT_FILL   0xb
>  #defineCIK_SDMA_PACKET_SRBM_WRITE  0xe
> -#defineCIK_SDMA_COPY_MAX_SIZE  0x3fffe0
> +/* There is apparently an undocumented HW "feature" that
> +   prevents the HW from copying past 256 bytes of (1 << 22) */
> +#defineCIK_SDMA_COPY_MAX_SIZE  0x3fff00
>
>  enum amd_cmp_class_flags {
> S_NAN = 1 << 0,// Signaling NaN
> --
> 2.17.1
>
> ___
> mesa-stable mailing list
> mesa-sta...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-stable
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] i965/vec4: Clamp indirect tes input array reads with 0x0fffffff

2018-08-29 Thread Jason Ekstrand
Have you seen this cause an actual problem?  There's no way we can actually
end up with an input array that big...  I guess this is for the crazy OOB
case?

On Wed, Aug 29, 2018 at 3:12 PM Ian Romanick  wrote:

> From: Ian Romanick 
>
> Page 190 of "Volume 7: 3D Media GPGPU Engine (Haswell)" says the valid
> range of the offset is [0, 0FFFh].
>
> Signed-off-by: Ian Romanick 
> Cc: mesa-sta...@lists.freedesktop.org
> Cc: Kenneth Graunke 
> ---
>  src/intel/compiler/brw_vec4_tes.cpp | 12 +++-
>  1 file changed, 11 insertions(+), 1 deletion(-)
>
> diff --git a/src/intel/compiler/brw_vec4_tes.cpp
> b/src/intel/compiler/brw_vec4_tes.cpp
> index 35aff0f4b78..cf1bff42aa9 100644
> --- a/src/intel/compiler/brw_vec4_tes.cpp
> +++ b/src/intel/compiler/brw_vec4_tes.cpp
> @@ -185,9 +185,19 @@
> vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
>   first_component /= 2;
>
>if (indirect_offset.file != BAD_FILE) {
> + src_reg clamped_indirect_offset = src_reg(this,
> glsl_type::uvec4_type);
> +
> + /* Page 190 of "Volume 7: 3D Media GPGPU Engine (Haswell)" says
> the
> +  * valid range of the offset is [0, 0FFFh].
> +  */
> + emit_minmax(BRW_CONDITIONAL_L,
> + dst_reg(clamped_indirect_offset),
> + retype(indirect_offset, BRW_REGISTER_TYPE_UD),
> + brw_imm_ud(0x0fffu));
> +
>   header = src_reg(this, glsl_type::uvec4_type);
>   emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header),
> -  input_read_header, indirect_offset);
> +  input_read_header, clamped_indirect_offset);
>} else {
>   /* Arbitrarily only push up to 24 vec4 slots worth of data,
>* which is 12 registers (since each holds 2 vec4 slots).
> --
> 2.14.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radv: Add missing checks in radv_get_image_format_properties.

2018-08-29 Thread Samuel Pitoiset

Sounds reasonable to me.

Reviewed-by: Samuel Pitoiset 

On 8/29/18 5:14 PM, Bas Nieuwenhuizen wrote:

CC: 
---
  src/amd/vulkan/radv_formats.c | 19 +++
  1 file changed, 19 insertions(+)

diff --git a/src/amd/vulkan/radv_formats.c b/src/amd/vulkan/radv_formats.c
index f0cc0fc5f95..6253c27b95d 100644
--- a/src/amd/vulkan/radv_formats.c
+++ b/src/amd/vulkan/radv_formats.c
@@ -1112,6 +1112,25 @@ static VkResult radv_get_image_format_properties(struct 
radv_physical_device *ph
}
}
  
+	if (info->usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) {

+   if (!(format_feature_flags & 
VK_FORMAT_FEATURE_TRANSFER_SRC_BIT)) {
+   goto unsupported;
+   }
+   }
+
+   if (info->usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) {
+   if (!(format_feature_flags & 
VK_FORMAT_FEATURE_TRANSFER_DST_BIT)) {
+   goto unsupported;
+   }
+   }
+
+   if (info->usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) {
+   if (!(format_feature_flags & 
(VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT |
+ 
VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT))) {
+   goto unsupported;
+   }
+   }
+
*pImageFormatProperties = (VkImageFormatProperties) {
.maxExtent = maxExtent,
.maxMipLevels = maxMipLevels,


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 107477] [DXVK] Setting high shader quality in GTA V results in LLVM error

2018-08-29 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107477

--- Comment #17 from Samuel Pitoiset  ---
Here's the real fix: https://patchwork.freedesktop.org/patch/246462/

Can you confirm that fixes the issue on your side? Thanks!

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] radv: fix passing clip/cull distances from VS to PS

2018-08-29 Thread Samuel Pitoiset
CTS doesn't test input clip/cull distances for the fragment
shader stage, which explains why this was totally broken. I
wrote a simple test locally that works now.

This fixes a crash with GTA V and DXVK.

Note that we are exporting unused parameters from the vertex
shader now, but this can't be optimized easily because we don't
keep the fragment shader info...

Cc: mesa-sta...@lists.freedesktop.org
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107477
Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_nir_to_llvm.c | 30 +-
 src/amd/vulkan/radv_pipeline.c| 16 
 src/amd/vulkan/radv_shader.h  |  1 +
 src/amd/vulkan/radv_shader_info.c |  4 
 4 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/src/amd/vulkan/radv_nir_to_llvm.c 
b/src/amd/vulkan/radv_nir_to_llvm.c
index 4940e3230f..d7cd8cc069 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -2098,9 +2098,10 @@ handle_fs_input_decl(struct radv_shader_context *ctx,
int idx = variable->data.location;
unsigned attrib_count = glsl_count_attribute_slots(variable->type, 
false);
LLVMValueRef interp = NULL;
+   uint64_t mask;
 
variable->data.driver_location = idx * 4;
-   ctx->input_mask |= ((1ull << attrib_count) - 1) << 
variable->data.location;
+   mask = ((1ull << attrib_count) - 1) << variable->data.location;
 
if (glsl_get_base_type(glsl_without_array(variable->type)) == 
GLSL_TYPE_FLOAT) {
unsigned interp_type;
@@ -2121,6 +2122,15 @@ handle_fs_input_decl(struct radv_shader_context *ctx,
for (unsigned i = 0; i < attrib_count; ++i)
ctx->inputs[ac_llvm_reg_index_soa(idx + i, 0)] = interp;
 
+   if (idx == VARYING_SLOT_CLIP_DIST0) {
+   /* Do not account for the number of components inside the array
+* of clip/cull distances because this might wrongly set other
+* bits like primitive ID or layer.
+*/
+   mask = 1ull << VARYING_SLOT_CLIP_DIST0;
+   }
+
+   ctx->input_mask |= mask;
 }
 
 static void
@@ -2187,6 +2197,17 @@ handle_fs_inputs(struct radv_shader_context *ctx,
if (LLVMIsUndef(interp_param))
ctx->shader_info->fs.flat_shaded_mask |= 1u << 
index;
++index;
+   } else if (i == VARYING_SLOT_CLIP_DIST0) {
+   int length = 
ctx->shader_info->info.ps.num_input_clips_culls;
+
+   for (unsigned j = 0; j < length; j += 4) {
+   inputs = ctx->inputs + ac_llvm_reg_index_soa(i, 
j);
+
+   interp_param = *inputs;
+   interp_fs_input(ctx, index, interp_param,
+   ctx->abi.prim_mask, inputs);
+   ++index;
+   }
} else if (i == VARYING_SLOT_POS) {
for(int i = 0; i < 3; ++i)
inputs[i] = ctx->abi.frag_pos[i];
@@ -2482,6 +2503,13 @@ handle_vs_outputs_post(struct radv_shader_context *ctx,
memcpy(_args[target - V_008DFC_SQ_EXP_POS],
   , sizeof(args));
 
+   /* Export the clip/cull distances values to the next stage. */
+   radv_export_param(ctx, param_count, [0], 0xf);
+   outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0] = 
param_count++;
+   if (ctx->num_output_clips + ctx->num_output_culls > 4) {
+   radv_export_param(ctx, param_count, [4], 0xf);
+   
outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1] = param_count++;
+   }
}
 
LLVMValueRef pos_values[4] = {ctx->ac.f32_0, ctx->ac.f32_0, 
ctx->ac.f32_0, ctx->ac.f32_1};
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index e63c481d1e..0303642d7e 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -3052,6 +3052,22 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf 
*cs,
ps_offset++;
}
 
+   if (ps->info.info.ps.num_input_clips_culls) {
+   unsigned vs_offset;
+
+   vs_offset = 
outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0];
+   if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
+   ps_input_cntl[ps_offset] = 
offset_to_ps_input(vs_offset, true);
+   ++ps_offset;
+   }
+
+   vs_offset = 
outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1];
+   if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
+   ps_input_cntl[ps_offset] = 
offset_to_ps_input(vs_offset, true);
+   ++ps_offset;
+   }
+   }
+
for (unsigned i = 0; i < 32 && (1u << i) <= 

Re: [Mesa-dev] [PATCH 5/9] nir: Add a local dead write vars removal pass

2018-08-29 Thread Jason Ekstrand
On Wed, Aug 29, 2018 at 3:19 PM Caio Marcelo de Oliveira Filho <
caio.olive...@intel.com> wrote:

> Jason Ekstrand  writes:
>
> >> >> +static bool
> >> >> +remove_dead_write_vars_local(struct state *state, nir_block *block)
> >> >> +{
> >> >> +   bool progress = false;
> >> >> +
> >> >> +   struct util_dynarray unused_writes;
> >> >> +   util_dynarray_init(_writes, state->mem_ctx);
> >> >> +
> >> >> +   nir_foreach_instr_safe(instr, block) {
> >> >>
> >> >
> >> > It wouldn't hurt to add a case for call instructions which does a
> barrier
> >> > on everything I mentioned below as well as globals and locals.
> >>
> >> Makes sense.  But I don't get locals are affect?  Is this to cover the
> >> parameters being passed to the call?
> >>
> >
> > Because a deref to a local might be passed in as a parameter.  This is
> the
> > way pass-by-reference works for SPIR-V.
>
> Will the parameter appear to the new function as local too?  If so, will
> they be tagged in a way I can identify the derefs?
>

They'll appear as writes to casts whose source is a load_param intrinsic.


> I'm thinking about what to do with unused writes for locals at the end
> of a function.  If it's the main function, we can just remove them, but
> depending on the answer of the question above, it is not so clear for
> non-main functions.
>

Yeah... That's a bit sticky and I'm not sure what the right answer is.  I
guess we could, in theory, have a new "param" mode which is sort-of like a
local only it crosses function boundaries.  I haven't given that much
though.  However, given that functions are always inlined right now, that's
not something we need to deal with just yet.

--Jason
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] i965/gen7_urb: Re-emit PUSH_CONSTANT_ALLOC on some gen9

2018-08-29 Thread Nanley Chery
According to internal docs, some gen9 platforms have a pixel shader push
constant synchronization issue. Although not listed among said
platforms, this issue seems to be present on the GeminiLake 2x6's we've
tested.

We consider the available workarounds to be too detrimental on
performance. Instead, we mitigate the issue by applying part of one of
the workarounds. Re-emit PUSH_CONSTANT_ALLOC at the top of every batch
(as suggested by Ken).

Fixes ext_framebuffer_multisample-accuracy piglit test failures with the
following options:
* 6 depth_draw small depthstencil
* 8 stencil_draw small depthstencil
* 6 stencil_draw small depthstencil
* 8 depth_resolve small
* 6 stencil_resolve small depthstencil
* 4 stencil_draw small depthstencil
* 16 stencil_draw small depthstencil
* 16 depth_draw small depthstencil
* 2 stencil_resolve small depthstencil
* 6 stencil_draw small
* all_samples stencil_draw small
* 2 depth_draw small depthstencil
* all_samples depth_draw small depthstencil
* all_samples stencil_resolve small
* 4 depth_draw small depthstencil
* all_samples depth_draw small
* all_samples stencil_draw small depthstencil
* 4 stencil_resolve small depthstencil
* 4 depth_resolve small depthstencil
* all_samples stencil_resolve small depthstencil

v2: Include more platforms in WA (Ken).

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106865
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93355
Cc: 
Tested-by: Mark Janes 
---
 src/mesa/drivers/dri/i965/gen7_urb.c | 28 
 1 file changed, 28 insertions(+)

I'm not sure I have enough information about what's happening in the HW
to create a piglit test for this issue.

diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c 
b/src/mesa/drivers/dri/i965/gen7_urb.c
index 2e5f8e60ba9..e7259fc1b8d 100644
--- a/src/mesa/drivers/dri/i965/gen7_urb.c
+++ b/src/mesa/drivers/dri/i965/gen7_urb.c
@@ -118,6 +118,33 @@ gen7_emit_push_constant_state(struct brw_context *brw, 
unsigned vs_size,
const struct gen_device_info *devinfo = >screen->devinfo;
unsigned offset = 0;
 
+   /* From the SKL PRM, Workarounds section (#878):
+*
+*Push constant buffer corruption possible. WA: Insert 2 zero-length
+*PushConst_PS before every intended PushConst_PS update, issue a
+*NULLPRIM after each of the zero len PC update to make sure CS commits
+*them.
+*
+* This workaround is attempting to solve a pixel shader push constant
+* synchronization issue.
+*
+* There's an unpublished WA that involves re-emitting
+* 3DSTATE_PUSH_CONSTANT_ALLOC_PS for every 500-ish 3DSTATE_CONSTANT_PS
+* packets. Since our counting methods may not be reliable due to
+* context-switching and pre-emption, we instead choose to approximate this
+* behavior by re-emitting the packet at the top of the batch.
+*/
+   if (brw->ctx.NewDriverState == BRW_NEW_BATCH) {
+   /* SKL GT2 and GLK 2x6 have reliably demonstrated this issue thus far.
+* We've also seen some intermittent failures from SKL GT4 and BXT in
+* the past.
+*/
+  if (!devinfo->is_skylake &&
+  !devinfo->is_broxton &&
+  !devinfo->is_geminilake)
+ return;
+   }
+
BEGIN_BATCH(10);
OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_VS << 16 | (2 - 2));
OUT_BATCH(vs_size | offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
@@ -154,6 +181,7 @@ const struct brw_tracked_state gen7_push_constant_space = {
.dirty = {
   .mesa = 0,
   .brw = BRW_NEW_CONTEXT |
+ BRW_NEW_BATCH | /* Push constant workaround */
  BRW_NEW_GEOMETRY_PROGRAM |
  BRW_NEW_TESS_PROGRAMS,
},
-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/9] nir: Add a local dead write vars removal pass

2018-08-29 Thread Caio Marcelo de Oliveira Filho
Jason Ekstrand  writes:

>> >> +static bool
>> >> +remove_dead_write_vars_local(struct state *state, nir_block *block)
>> >> +{
>> >> +   bool progress = false;
>> >> +
>> >> +   struct util_dynarray unused_writes;
>> >> +   util_dynarray_init(_writes, state->mem_ctx);
>> >> +
>> >> +   nir_foreach_instr_safe(instr, block) {
>> >>
>> >
>> > It wouldn't hurt to add a case for call instructions which does a barrier
>> > on everything I mentioned below as well as globals and locals.
>>
>> Makes sense.  But I don't get locals are affect?  Is this to cover the
>> parameters being passed to the call?
>>
>
> Because a deref to a local might be passed in as a parameter.  This is the
> way pass-by-reference works for SPIR-V.

Will the parameter appear to the new function as local too?  If so, will
they be tagged in a way I can identify the derefs?

I'm thinking about what to do with unused writes for locals at the end
of a function.  If it's the main function, we can just remove them, but
depending on the answer of the question above, it is not so clear for
non-main functions.


Thanks,
Caio

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/15] radeonsi: merge SI and CI dma_clear_buffer and remove the callback

2018-08-29 Thread Marek Olšák
From: Marek Olšák 

also use assertions for the requirements that offset and size are a multiple
of 4.
---
 src/gallium/drivers/radeon/radeon_video.c |  3 +-
 src/gallium/drivers/radeonsi/cik_sdma.c   | 41 -
 src/gallium/drivers/radeonsi/si_cp_dma.c  |  2 +-
 src/gallium/drivers/radeonsi/si_dma.c | 40 
 src/gallium/drivers/radeonsi/si_dma_cs.c  | 61 ++-
 src/gallium/drivers/radeonsi/si_pipe.c|  2 +-
 src/gallium/drivers/radeonsi/si_pipe.h|  5 +-
 .../drivers/radeonsi/si_test_dma_perf.c   |  2 +-
 8 files changed, 66 insertions(+), 90 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_video.c 
b/src/gallium/drivers/radeon/radeon_video.c
index 749f30c2306..a39ce4cc73e 100644
--- a/src/gallium/drivers/radeon/radeon_video.c
+++ b/src/gallium/drivers/radeon/radeon_video.c
@@ -113,22 +113,21 @@ error:
si_vid_destroy_buffer(new_buf);
*new_buf = old_buf;
return false;
 }
 
 /* clear the buffer with zeros */
 void si_vid_clear_buffer(struct pipe_context *context, struct rvid_buffer* 
buffer)
 {
struct si_context *sctx = (struct si_context*)context;
 
-   sctx->dma_clear_buffer(sctx, >res->b.b, 0,
-  buffer->res->buf->size, 0);
+   si_sdma_clear_buffer(sctx, >res->b.b, 0, 
buffer->res->buf->size, 0);
context->flush(context, NULL, 0);
 }
 
 /**
  * join surfaces into the same buffer with identical tiling params
  * sumup their sizes and replace the backend buffers with a single bo
  */
 void si_vid_join_surfaces(struct si_context *sctx,
  struct pb_buffer** buffers[VL_NUM_COMPONENTS],
  struct radeon_surf *surfaces[VL_NUM_COMPONENTS])
diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c 
b/src/gallium/drivers/radeonsi/cik_sdma.c
index 595f8d49a80..1c2fd0f7b1c 100644
--- a/src/gallium/drivers/radeonsi/cik_sdma.c
+++ b/src/gallium/drivers/radeonsi/cik_sdma.c
@@ -60,60 +60,20 @@ static void cik_sdma_copy_buffer(struct si_context *ctx,
radeon_emit(cs, src_offset);
radeon_emit(cs, src_offset >> 32);
radeon_emit(cs, dst_offset);
radeon_emit(cs, dst_offset >> 32);
dst_offset += csize;
src_offset += csize;
size -= csize;
}
 }
 
-static void cik_sdma_clear_buffer(struct si_context *sctx,
- struct pipe_resource *dst,
- uint64_t offset,
- uint64_t size,
- unsigned clear_value)
-{
-   struct radeon_cmdbuf *cs = sctx->dma_cs;
-   unsigned i, ncopy, csize;
-   struct r600_resource *rdst = r600_resource(dst);
-
-   if (!cs || offset % 4 != 0 || size % 4 != 0 ||
-   dst->flags & PIPE_RESOURCE_FLAG_SPARSE) {
-   sctx->b.clear_buffer(>b, dst, offset, size, _value, 
4);
-   return;
-   }
-
-   /* Mark the buffer range of destination as valid (initialized),
-* so that transfer_map knows it should wait for the GPU when mapping
-* that range. */
-   util_range_add(>valid_buffer_range, offset, offset + size);
-
-   offset += rdst->gpu_address;
-
-   /* the same maximum size as for copying */
-   ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE);
-   si_need_dma_space(sctx, ncopy * 5, rdst, NULL);
-
-   for (i = 0; i < ncopy; i++) {
-   csize = MIN2(size, CIK_SDMA_COPY_MAX_SIZE);
-   radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_PACKET_CONSTANT_FILL, 
0,
-   0x8000 /* dword copy */));
-   radeon_emit(cs, offset);
-   radeon_emit(cs, offset >> 32);
-   radeon_emit(cs, clear_value);
-   radeon_emit(cs, sctx->chip_class >= GFX9 ? csize - 1 : csize);
-   offset += csize;
-   size -= csize;
-   }
-}
-
 static unsigned minify_as_blocks(unsigned width, unsigned level, unsigned 
blk_w)
 {
width = u_minify(width, level);
return DIV_ROUND_UP(width, blk_w);
 }
 
 static unsigned encode_tile_info(struct si_context *sctx,
 struct si_texture *tex, unsigned level,
 bool set_bpp)
 {
@@ -547,12 +507,11 @@ static void cik_sdma_copy(struct pipe_context *ctx,
return;
 
 fallback:
si_resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz,
src, src_level, src_box);
 }
 
 void cik_init_sdma_functions(struct si_context *sctx)
 {
sctx->dma_copy = cik_sdma_copy;
-   sctx->dma_clear_buffer = cik_sdma_clear_buffer;
 }
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c 
b/src/gallium/drivers/radeonsi/si_cp_dma.c
index 486ae75c77f..598d5ecf0dc 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ 

[Mesa-dev] [PATCH 01/15] radeonsi: fix HTILE for NPOT textures with mipmapping on SI/CI

2018-08-29 Thread Marek Olšák
From: Marek Olšák 

VI uses addrlib so it's unaffected.

Cc: 18.1 18.2 
---
 src/gallium/drivers/radeonsi/si_texture.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_texture.c 
b/src/gallium/drivers/radeonsi/si_texture.c
index e55fd815264..bcff226a586 100644
--- a/src/gallium/drivers/radeonsi/si_texture.c
+++ b/src/gallium/drivers/radeonsi/si_texture.c
@@ -924,22 +924,22 @@ static void si_texture_get_htile_size(struct si_screen 
*sscreen,
break;
case 16:
cl_width = 128;
cl_height = 64;
break;
default:
assert(0);
return;
}
 
-   width = align(tex->buffer.b.b.width0, cl_width * 8);
-   height = align(tex->buffer.b.b.height0, cl_height * 8);
+   width = align(tex->surface.u.legacy.level[0].nblk_x, cl_width * 8);
+   height = align(tex->surface.u.legacy.level[0].nblk_y, cl_height * 8);
 
slice_elements = (width * height) / (8 * 8);
slice_bytes = slice_elements * 4;
 
pipe_interleave_bytes = sscreen->info.pipe_interleave_bytes;
base_align = num_pipes * pipe_interleave_bytes;
 
tex->surface.htile_alignment = base_align;
tex->surface.htile_size =
util_num_layers(>buffer.b.b, 0) *
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 14/15] radeonsi: flush when an IB uses more VRAM than available

2018-08-29 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_pipe.h | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 29d7e555a0c..b3d607b93e3 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -1551,20 +1551,26 @@ static inline bool 
util_prim_is_points_or_lines(unsigned prim)
  * \param gtt   GTT memory size not added to the buffer list yet
  */
 static inline bool
 radeon_cs_memory_below_limit(struct si_screen *screen,
 struct radeon_cmdbuf *cs,
 uint64_t vram, uint64_t gtt)
 {
vram += cs->used_vram;
gtt += cs->used_gart;
 
+   /* Flush more often on dGPUs, so that temporarily allocated buffers
+* are released/reused faster and there are fewer of them.
+*/
+   if (!screen->info.has_local_buffers && screen->info.has_dedicated_vram)
+   return vram < screen->info.vram_size;
+
/* Anything that goes above the VRAM size should go to GTT. */
if (vram > screen->info.vram_size)
gtt += vram - screen->info.vram_size;
 
/* Now we just need to check if we have enough GTT. */
return gtt < screen->info.gart_size * 0.7;
 }
 
 /**
  * Add a buffer to the buffer list for the given command stream (CS).
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 13/15] radeonsi: adjust and simplify max_alloc_size determination

2018-08-29 Thread Marek Olšák
From: Marek Olšák 

---
 src/amd/common/ac_gpu_info.c | 16 
 .../winsys/radeon/drm/radeon_drm_winsys.c|  8 +---
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index bfaff45219f..766ad835476 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -248,23 +248,20 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
r = amdgpu_query_info(dev, AMDGPU_INFO_MEMORY, sizeof(meminfo), 
);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_info(memory) 
failed.\n");
return false;
}
 
/* Note: usable_heap_size values can be random and can't be 
relied on. */
info->gart_size = meminfo.gtt.total_heap_size;
info->vram_size = meminfo.vram.total_heap_size;
info->vram_vis_size = 
meminfo.cpu_accessible_vram.total_heap_size;
-
-   info->max_alloc_size = MAX2(meminfo.vram.max_allocation,
-   meminfo.gtt.max_allocation);
} else {
/* This is a deprecated interface, which reports usable sizes
 * (total minus pinned), but the pinned size computation is
 * buggy, so the values returned from these functions can be
 * random.
 */
struct amdgpu_heap_info vram, vram_vis, gtt;
 
r = amdgpu_query_heap_info(dev, AMDGPU_GEM_DOMAIN_VRAM, 0, 
);
if (r) {
@@ -282,25 +279,20 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
 
r = amdgpu_query_heap_info(dev, AMDGPU_GEM_DOMAIN_GTT, 0, );
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_heap_info(gtt) 
failed.\n");
return false;
}
 
info->gart_size = gtt.heap_size;
info->vram_size = vram.heap_size;
info->vram_vis_size = vram_vis.heap_size;
-
-   /* The kernel can split large buffers in VRAM but not in GTT, 
so large
-* allocations can fail or cause buffer movement failures in 
the kernel.
-*/
-   info->max_alloc_size = MAX2(info->vram_size * 0.9, 
info->gart_size * 0.7);
}
 
/* Set chip identification. */
info->pci_id = amdinfo->asic_id; /* TODO: is this correct? */
info->vce_harvest_config = amdinfo->vce_harvest_config;
 
switch (info->pci_id) {
 #define CHIPSET(pci_id, cfamily) \
case pci_id: \
info->family = CHIP_##cfamily; \
@@ -324,20 +316,28 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
info->chip_class = SI;
else {
fprintf(stderr, "amdgpu: Unknown family.\n");
return false;
}
 
/* Set which chips have dedicated VRAM. */
info->has_dedicated_vram =
!(amdinfo->ids_flags & AMDGPU_IDS_FLAGS_FUSION);
 
+   /* The kernel can split large buffers in VRAM but not in GTT, so large
+* allocations can fail or cause buffer movement failures in the kernel.
+*/
+   if (info->has_dedicated_vram)
+   info->max_alloc_size = info->vram_size * 0.8;
+   else
+   info->max_alloc_size = info->gart_size * 0.7;
+
/* Set hardware information. */
info->gds_size = gds.gds_total_size;
info->gds_gfx_partition_size = gds.gds_gfx_partition_size;
/* convert the shader clock from KHz to MHz */
info->max_shader_clock = amdinfo->max_engine_clk / 1000;
info->num_tcc_blocks = device_info.num_tcc_blocks;
info->max_se = amdinfo->num_shader_engines;
info->max_sh_per_se = amdinfo->num_shader_arrays_per_engine;
info->has_hw_decode =
(uvd.available_rings != 0) || (vcn_dec.available_rings != 0);
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c 
b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index 343c80c600f..0c41e1397c7 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -354,25 +354,27 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
 }
 ws->info.gart_size = gem_info.gart_size;
 ws->info.vram_size = gem_info.vram_size;
 ws->info.vram_vis_size = gem_info.vram_visible;
 /* Older versions of the kernel driver reported incorrect values, and
  * didn't support more than 256MB of visible VRAM anyway
  */
 if (ws->info.drm_minor < 49)
 ws->info.vram_vis_size = MIN2(ws->info.vram_vis_size, 256*1024*1024);
 
-/* Radeon allocates all buffers as contigous, which makes large allocations
+/* Radeon allocates all buffers contiguously, which makes large allocations
  * unlikely to succeed. 

[Mesa-dev] [PATCH 15/15] radeonsi/nir: port some bindless and sampler code from TGSI

2018-08-29 Thread Marek Olšák
From: Marek Olšák 

These might be all missing changes for bindless textures.
---
 src/gallium/drivers/radeonsi/si_shader_nir.c | 80 
 1 file changed, 50 insertions(+), 30 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c 
b/src/gallium/drivers/radeonsi/si_shader_nir.c
index 5d6280b80f7..87ca0161b45 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -890,64 +890,84 @@ si_nir_lookup_interp_param(struct ac_shader_abi *abi,
 }
 
 static LLVMValueRef
 si_nir_load_sampler_desc(struct ac_shader_abi *abi,
 unsigned descriptor_set, unsigned base_index,
 unsigned constant_index, LLVMValueRef dynamic_index,
 enum ac_descriptor_type desc_type, bool image,
 bool write, bool bindless)
 {
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+   const struct tgsi_shader_info *info = >shader->selector->info;
LLVMBuilderRef builder = ctx->ac.builder;
-   LLVMValueRef list = LLVMGetParam(ctx->main_fn, 
ctx->param_samplers_and_images);
-   LLVMValueRef index;
+   unsigned const_index = base_index + constant_index;
+   bool dcc_off = write;
+
+   /* TODO: images_store and images_atomic are not set */
+   if (!dynamic_index && image &&
+   (info->images_store | info->images_atomic) & (1 << const_index))
+   dcc_off = true;
 
assert(!descriptor_set);
+   assert(!image || desc_type == AC_DESC_IMAGE || desc_type == 
AC_DESC_BUFFER);
 
-   dynamic_index = dynamic_index ? dynamic_index : ctx->ac.i32_0;
-   index = LLVMBuildAdd(builder, dynamic_index,
-LLVMConstInt(ctx->ac.i32, base_index + 
constant_index, false),
-"");
+   if (bindless) {
+   LLVMValueRef list =
+   LLVMGetParam(ctx->main_fn, 
ctx->param_bindless_samplers_and_images);
 
-   if (image) {
-   assert(desc_type == AC_DESC_IMAGE || desc_type == 
AC_DESC_BUFFER);
-   assert(base_index + constant_index < ctx->num_images);
+   /* dynamic_index is the bindless handle */
+   if (image) {
+   return si_load_image_desc(ctx, list, dynamic_index, 
desc_type,
+ dcc_off, true);
+   }
+
+   /* Since bindless handle arithmetic can contain an unsigned 
integer
+* wraparound and si_load_sampler_desc assumes there isn't any,
+* use GEP without "inbounds" (inside ac_build_pointer_add)
+* to prevent incorrect code generation and hangs.
+*/
+   dynamic_index = LLVMBuildMul(ctx->ac.builder, dynamic_index,
+LLVMConstInt(ctx->i32, 2, 0), "");
+   list = ac_build_pointer_add(>ac, list, dynamic_index);
+   return si_load_sampler_desc(ctx, list, ctx->i32_0, desc_type);
+   }
+
+   unsigned num_slots = image ? ctx->num_images : ctx->num_samplers;
+   assert(const_index < num_slots);
 
-   if (dynamic_index)
-   index = si_llvm_bound_index(ctx, index, 
ctx->num_images);
+   LLVMValueRef list = LLVMGetParam(ctx->main_fn, 
ctx->param_samplers_and_images);
+   LLVMValueRef index = LLVMConstInt(ctx->ac.i32, const_index, false);
+
+   if (dynamic_index) {
+   index = LLVMBuildAdd(builder, index, dynamic_index, "");
+
+   /* From the GL_ARB_shader_image_load_store extension spec:
+*
+*If a shader performs an image load, store, or atomic
+*operation using an image variable declared as an array,
+*and if the index used to select an individual element is
+*negative or greater than or equal to the size of the
+*array, the results of the operation are undefined but may
+*not lead to termination.
+*/
+   index = si_llvm_bound_index(ctx, index, num_slots);
+   }
 
+   if (image) {
index = LLVMBuildSub(ctx->ac.builder,
 LLVMConstInt(ctx->i32, SI_NUM_IMAGES - 1, 
0),
 index, "");
-
-   /* TODO: be smarter about when we use dcc_off */
-   return si_load_image_desc(ctx, list, index, desc_type, write, 
bindless);
+   return si_load_image_desc(ctx, list, index, desc_type, dcc_off, 
false);
}
 
-   assert(base_index + constant_index < ctx->num_samplers);
-
-   if (dynamic_index)
-   index = si_llvm_bound_index(ctx, index, ctx->num_samplers);
-
index = LLVMBuildAdd(ctx->ac.builder, index,
 LLVMConstInt(ctx->i32, 

[Mesa-dev] [PATCH 12/15] radeonsi: split si_copy_buffer

2018-08-29 Thread Marek Olšák
From: Marek Olšák 

compute and SDMA will be added into it.
---
 src/gallium/drivers/radeonsi/si_blit.c|  2 +-
 src/gallium/drivers/radeonsi/si_cp_dma.c  | 33 ---
 src/gallium/drivers/radeonsi/si_pipe.c|  3 +-
 src/gallium/drivers/radeonsi/si_pipe.h|  8 +++--
 .../drivers/radeonsi/si_test_dma_perf.c   |  4 +--
 5 files changed, 33 insertions(+), 17 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
b/src/gallium/drivers/radeonsi/si_blit.c
index fcaff80125c..8f7aa0815b9 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -903,21 +903,21 @@ void si_resource_copy_region(struct pipe_context *ctx,
struct si_context *sctx = (struct si_context *)ctx;
struct si_texture *ssrc = (struct si_texture*)src;
struct pipe_surface *dst_view, dst_templ;
struct pipe_sampler_view src_templ, *src_view;
unsigned dst_width, dst_height, src_width0, src_height0;
unsigned dst_width0, dst_height0, src_force_level = 0;
struct pipe_box sbox, dstbox;
 
/* Handle buffers first. */
if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
-   si_copy_buffer(sctx, dst, src, dstx, src_box->x, 
src_box->width, 0, -1);
+   si_copy_buffer(sctx, dst, src, dstx, src_box->x, 
src_box->width);
return;
}
 
assert(u_max_sample(dst) == u_max_sample(src));
 
/* The driver doesn't decompress resources automatically while
 * u_blitter is rendering. */
si_decompress_subresource(ctx, src, PIPE_MASK_RGBAZS, src_level,
  src_box->z, src_box->z + src_box->depth - 1);
 
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c 
b/src/gallium/drivers/radeonsi/si_cp_dma.c
index ad53682b1b2..e85bb9b1acf 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -426,36 +426,32 @@ static void si_cp_dma_realign_engine(struct si_context 
*sctx, unsigned size,
va = sctx->scratch_buffer->gpu_address;
si_emit_cp_dma(sctx, va, va + SI_CPDMA_ALIGNMENT, size, dma_flags,
   cache_policy);
 }
 
 /**
  * Do memcpy between buffers using CP DMA.
  *
  * \param user_flags   bitmask of SI_CPDMA_*
  */
-void si_copy_buffer(struct si_context *sctx,
-   struct pipe_resource *dst, struct pipe_resource *src,
-   uint64_t dst_offset, uint64_t src_offset, unsigned size,
-   unsigned user_flags, enum si_cache_policy cache_policy)
+void si_cp_dma_copy_buffer(struct si_context *sctx,
+  struct pipe_resource *dst, struct pipe_resource *src,
+  uint64_t dst_offset, uint64_t src_offset, unsigned 
size,
+  unsigned user_flags, enum si_coherency coher,
+  enum si_cache_policy cache_policy)
 {
uint64_t main_dst_offset, main_src_offset;
unsigned skipped_size = 0;
unsigned realign_size = 0;
-   enum si_coherency coher = SI_COHERENCY_SHADER;
bool is_first = true;
 
-   if (!size)
-   return;
-
-   if (cache_policy == -1)
-   cache_policy = get_cache_policy(sctx, coher);
+   assert(size);
 
if (dst != src || dst_offset != src_offset) {
/* Mark the buffer range of destination as valid (initialized),
 * so that transfer_map knows it should wait for the GPU when 
mapping
 * that range. */
util_range_add(_resource(dst)->valid_buffer_range, 
dst_offset,
   dst_offset + size);
}
 
dst_offset += r600_resource(dst)->gpu_address;
@@ -520,35 +516,50 @@ void si_copy_buffer(struct si_context *sctx,
 
si_emit_cp_dma(sctx, dst_offset, src_offset, skipped_size,
   dma_flags, cache_policy);
}
 
/* Finally, realign the engine if the size wasn't aligned. */
if (realign_size) {
si_cp_dma_realign_engine(sctx, realign_size, user_flags, coher,
 cache_policy, _first);
}
+}
+
+void si_copy_buffer(struct si_context *sctx,
+   struct pipe_resource *dst, struct pipe_resource *src,
+   uint64_t dst_offset, uint64_t src_offset, unsigned size)
+{
+   enum si_coherency coher = SI_COHERENCY_SHADER;
+   enum si_cache_policy cache_policy = get_cache_policy(sctx, coher);
+
+   if (!size)
+   return;
+
+   si_cp_dma_copy_buffer(sctx, dst, src, dst_offset, src_offset, size,
+ 0, coher, cache_policy);
 
if (cache_policy != L2_BYPASS)
r600_resource(dst)->TC_L2_dirty = true;
 
/* If it's not a prefetch... */
if (dst_offset != src_offset)
sctx->num_cp_dma_calls++;
 }
 

[Mesa-dev] [PATCH 11/15] radeonsi: don't call VBO prefetch with size=0

2018-08-29 Thread Marek Olšák
From: Marek Olšák 

for the next commit.
---
 src/gallium/drivers/radeonsi/si_cp_dma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c 
b/src/gallium/drivers/radeonsi/si_cp_dma.c
index 598d5ecf0dc..ad53682b1b2 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -548,21 +548,21 @@ static void cik_prefetch_shader_async(struct si_context 
*sctx,
  struct si_pm4_state *state)
 {
struct pipe_resource *bo = >bo[0]->b.b;
assert(state->nbo == 1);
 
cik_prefetch_TC_L2_async(sctx, bo, 0, bo->width0);
 }
 
 static void cik_prefetch_VBO_descriptors(struct si_context *sctx)
 {
-   if (!sctx->vertex_elements)
+   if (!sctx->vertex_elements || 
!sctx->vertex_elements->desc_list_byte_size)
return;
 
cik_prefetch_TC_L2_async(sctx, >vb_descriptors_buffer->b.b,
 sctx->vb_descriptors_offset,
 sctx->vertex_elements->desc_list_byte_size);
 }
 
 /**
  * Prefetch shaders and VBO descriptors.
  *
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 08/15] ac: remove deprecated use of LLVMInt1Type()

2018-08-29 Thread Marek Olšák
From: Marek Olšák 

---
 src/amd/common/ac_nir_to_llvm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 192ef079215..18644107eb4 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1390,21 +1390,21 @@ static LLVMValueRef visit_load_push_constant(struct 
ac_nir_context *ctx,
 
ptr = ac_build_gep0(>ac, ctx->abi->push_constants, addr);
 
if (instr->dest.ssa.bit_size == 16) {
unsigned load_dwords = instr->dest.ssa.num_components / 2 + 1;
LLVMTypeRef vec_type = LLVMVectorType(LLVMInt16Type(), 2 * 
load_dwords);
ptr = ac_cast_ptr(>ac, ptr, vec_type);
LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, "");
res = LLVMBuildBitCast(ctx->ac.builder, res, vec_type, "");
LLVMValueRef cond = LLVMBuildLShr(ctx->ac.builder, addr, 
ctx->ac.i32_1, "");
-   cond = LLVMBuildTrunc(ctx->ac.builder, cond, LLVMInt1Type(), 
"");
+   cond = LLVMBuildTrunc(ctx->ac.builder, cond, ctx->ac.i1, "");
LLVMValueRef mask[] = { LLVMConstInt(ctx->ac.i32, 0, false), 
LLVMConstInt(ctx->ac.i32, 1, false),
LLVMConstInt(ctx->ac.i32, 2, false), 
LLVMConstInt(ctx->ac.i32, 3, false),
LLVMConstInt(ctx->ac.i32, 4, false)};
LLVMValueRef swizzle_aligned = LLVMConstVector([0], 
instr->dest.ssa.num_components);
LLVMValueRef swizzle_unaligned = LLVMConstVector([1], 
instr->dest.ssa.num_components);
LLVMValueRef shuffle_aligned = 
LLVMBuildShuffleVector(ctx->ac.builder, res, res, swizzle_aligned, "");
LLVMValueRef shuffle_unaligned = 
LLVMBuildShuffleVector(ctx->ac.builder, res, res, swizzle_unaligned, "");
res = LLVMBuildSelect(ctx->ac.builder, cond, shuffle_unaligned, 
shuffle_aligned, "");
return LLVMBuildBitCast(ctx->ac.builder, res, get_def_type(ctx, 
>dest.ssa), "");
}
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 06/15] ac: add radeon_info::num_good_cu_per_sh

2018-08-29 Thread Marek Olšák
From: Marek Olšák 

---
 src/amd/common/ac_gpu_info.c  | 3 +++
 src/amd/common/ac_gpu_info.h  | 1 +
 src/gallium/drivers/radeonsi/si_state.c   | 4 +---
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 4 
 4 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index 8705d878f9a..bfaff45219f 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -407,20 +407,22 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
 
info->num_sdma_rings = util_bitcount(dma.available_rings);
info->num_compute_rings = util_bitcount(compute.available_rings);
 
/* Get the number of good compute units. */
info->num_good_compute_units = 0;
for (i = 0; i < info->max_se; i++)
for (j = 0; j < info->max_sh_per_se; j++)
info->num_good_compute_units +=
util_bitcount(amdinfo->cu_bitmap[i][j]);
+   info->num_good_cu_per_sh = info->num_good_compute_units /
+  (info->max_se * info->max_sh_per_se);
 
memcpy(info->si_tile_mode_array, amdinfo->gb_tile_mode,
sizeof(amdinfo->gb_tile_mode));
info->enabled_rb_mask = amdinfo->enabled_rb_pipes_mask;
 
memcpy(info->cik_macrotile_mode_array, amdinfo->gb_macro_tile_mode,
sizeof(amdinfo->gb_macro_tile_mode));
 
info->pte_fragment_size = alignment_info.size_local;
info->gart_page_size = alignment_info.size_remote;
@@ -536,20 +538,21 @@ void ac_print_gpu_info(struct radeon_info *info)
printf("kernel_flushes_tc_l2_after_ib = %u\n", 
info->kernel_flushes_tc_l2_after_ib);
printf("has_indirect_compute_dispatch = %u\n", 
info->has_indirect_compute_dispatch);
printf("has_unaligned_shader_loads = %u\n", 
info->has_unaligned_shader_loads);
printf("has_sparse_vm_mappings = %u\n", 
info->has_sparse_vm_mappings);
printf("has_2d_tiling = %u\n", info->has_2d_tiling);
printf("has_read_registers_query = %u\n", 
info->has_read_registers_query);
 
printf("Shader core info:\n");
printf("max_shader_clock = %i\n", info->max_shader_clock);
printf("num_good_compute_units = %i\n", 
info->num_good_compute_units);
+   printf("num_good_cu_per_sh = %i\n", info->num_good_cu_per_sh);
printf("num_tcc_blocks = %i\n", info->num_tcc_blocks);
printf("max_se = %i\n", info->max_se);
printf("max_sh_per_se = %i\n", info->max_sh_per_se);
 
printf("Render backend info:\n");
printf("num_render_backends = %i\n", info->num_render_backends);
printf("num_tile_pipes = %i\n", info->num_tile_pipes);
printf("pipe_interleave_bytes = %i\n", info->pipe_interleave_bytes);
printf("enabled_rb_mask = 0x%x\n", info->enabled_rb_mask);
printf("max_alignment = %u\n", (unsigned)info->max_alignment);
diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h
index a897496da48..0583a6037f2 100644
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -109,20 +109,21 @@ struct radeon_info {
boolhas_indirect_compute_dispatch;
boolhas_unaligned_shader_loads;
boolhas_sparse_vm_mappings;
boolhas_2d_tiling;
boolhas_read_registers_query;
 
/* Shader cores. */
uint32_tr600_max_quad_pipes; /* wave size / 16 */
uint32_tmax_shader_clock;
uint32_tnum_good_compute_units;
+   uint32_tnum_good_cu_per_sh;
uint32_tnum_tcc_blocks;
uint32_tmax_se; /* shader engines */
uint32_tmax_sh_per_se; /* shader arrays per shader 
engine */
 
/* Render backends (color + depth blocks). */
uint32_tr300_num_gb_pipes;
uint32_tr300_num_z_pipes;
uint32_tr600_gb_backend_map; /* R600 harvest config 
*/
boolr600_gb_backend_map_valid;
uint32_tr600_num_banks;
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 780d9010abc..c9851ff3300 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -4921,23 +4921,21 @@ static void si_init_config(struct si_context *sctx)
 * but we don't use on-chip GS.
 */
si_pm4_set_reg(pm4, R_028A44_VGT_GS_ONCHIP_CNTL,
   S_028A44_ES_VERTS_PER_SUBGRP(64) |
 

[Mesa-dev] [PATCH 09/15] radeonsi: fix GPU hangs with bindless textures and LLVM 7.0

2018-08-29 Thread Marek Olšák
From: Marek Olšák 

---
 src/amd/common/ac_llvm_build.c| 52 +--
 src/amd/common/ac_llvm_build.h|  4 ++
 .../drivers/radeonsi/si_shader_internal.h |  3 +-
 src/gallium/drivers/radeonsi/si_shader_nir.c  | 12 -
 .../drivers/radeonsi/si_shader_tgsi_mem.c | 23 ++--
 5 files changed, 84 insertions(+), 10 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 1c8d944db74..1f5112e9929 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -828,70 +828,112 @@ ac_build_gep0(struct ac_llvm_context *ctx,
  LLVMValueRef base_ptr,
  LLVMValueRef index)
 {
LLVMValueRef indices[2] = {
ctx->i32_0,
index,
};
return LLVMBuildGEP(ctx->builder, base_ptr, indices, 2, "");
 }
 
+LLVMValueRef ac_build_pointer_add(struct ac_llvm_context *ctx, LLVMValueRef 
ptr,
+ LLVMValueRef index)
+{
+   return LLVMBuildPointerCast(ctx->builder,
+   ac_build_gep0(ctx, ptr, index),
+   LLVMTypeOf(ptr), "");
+}
+
 void
 ac_build_indexed_store(struct ac_llvm_context *ctx,
   LLVMValueRef base_ptr, LLVMValueRef index,
   LLVMValueRef value)
 {
LLVMBuildStore(ctx->builder, value,
   ac_build_gep0(ctx, base_ptr, index));
 }
 
 /**
  * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad.
  * It's equivalent to doing a load from _ptr[index].
  *
  * \param base_ptr  Where the array starts.
  * \param index The element index into the array.
  * \param uniform   Whether the base_ptr and index can be assumed to be
  *  dynamically uniform (i.e. load to an SGPR)
  * \param invariant Whether the load is invariant (no other opcodes affect it)
+ * \param no_unsigned_wraparound
+ *For all possible re-associations and re-distributions of an expression
+ *"base_ptr + index * elemsize" into "addr + offset" (excluding GEPs
+ *without inbounds in base_ptr), this parameter is true if "addr + offset"
+ *does not result in an unsigned integer wraparound. This is used for
+ *optimal code generation of 32-bit pointer arithmetic.
+ *
+ *For example, a 32-bit immediate offset that causes a 32-bit unsigned
+ *integer wraparound can't be an imm offset in s_load_dword, because
+ *the instruction performs "addr + offset" in 64 bits.
+ *
+ *Expected usage for bindless textures by chaining GEPs:
+ *  // possible unsigned wraparound, don't use InBounds:
+ *  ptr1 = LLVMBuildGEP(base_ptr, index);
+ *  image = load(ptr1); // becomes "s_load ptr1, 0"
+ *
+ *  ptr2 = LLVMBuildInBoundsGEP(ptr1, 32 / elemsize);
+ *  sampler = load(ptr2); // becomes "s_load ptr1, 32" thanks to InBounds
  */
 static LLVMValueRef
 ac_build_load_custom(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
-LLVMValueRef index, bool uniform, bool invariant)
+LLVMValueRef index, bool uniform, bool invariant,
+bool no_unsigned_wraparound)
 {
LLVMValueRef pointer, result;
+   LLVMValueRef indices[2] = {ctx->i32_0, index};
+
+   if (no_unsigned_wraparound &&
+   LLVMGetPointerAddressSpace(LLVMTypeOf(base_ptr)) == 
AC_CONST_32BIT_ADDR_SPACE)
+   pointer = LLVMBuildInBoundsGEP(ctx->builder, base_ptr, indices, 
2, "");
+   else
+   pointer = LLVMBuildGEP(ctx->builder, base_ptr, indices, 2, "");
 
-   pointer = ac_build_gep0(ctx, base_ptr, index);
if (uniform)
LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md);
result = LLVMBuildLoad(ctx->builder, pointer, "");
if (invariant)
LLVMSetMetadata(result, ctx->invariant_load_md_kind, 
ctx->empty_md);
return result;
 }
 
 LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
   LLVMValueRef index)
 {
-   return ac_build_load_custom(ctx, base_ptr, index, false, false);
+   return ac_build_load_custom(ctx, base_ptr, index, false, false, false);
 }
 
 LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx,
 LLVMValueRef base_ptr, LLVMValueRef index)
 {
-   return ac_build_load_custom(ctx, base_ptr, index, false, true);
+   return ac_build_load_custom(ctx, base_ptr, index, false, true, false);
 }
 
+/* This assumes that there is no unsigned integer wraparound during the address
+ * computation, excluding all GEPs within base_ptr. */
 LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx,
   LLVMValueRef base_ptr, LLVMValueRef index)
 {
-   return ac_build_load_custom(ctx, base_ptr, index, true, true);
+   return ac_build_load_custom(ctx, base_ptr, index, true, 

[Mesa-dev] [PATCH 07/15] ac: use iN_0/1 constants

2018-08-29 Thread Marek Olšák
From: Marek Olšák 

---
 src/amd/common/ac_llvm_build.c| 23 +--
 src/amd/common/ac_nir_to_llvm.c   |  4 ++--
 src/gallium/drivers/radeonsi/si_shader.c  |  4 ++--
 .../drivers/radeonsi/si_shader_tgsi_alu.c |  2 +-
 4 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 629cd2a7527..1c8d944db74 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -822,25 +822,24 @@ ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.mov",
  ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
 }
 
 LLVMValueRef
 ac_build_gep0(struct ac_llvm_context *ctx,
  LLVMValueRef base_ptr,
  LLVMValueRef index)
 {
LLVMValueRef indices[2] = {
-   LLVMConstInt(ctx->i32, 0, 0),
+   ctx->i32_0,
index,
};
-   return LLVMBuildGEP(ctx->builder, base_ptr,
-   indices, 2, "");
+   return LLVMBuildGEP(ctx->builder, base_ptr, indices, 2, "");
 }
 
 void
 ac_build_indexed_store(struct ac_llvm_context *ctx,
   LLVMValueRef base_ptr, LLVMValueRef index,
   LLVMValueRef value)
 {
LLVMBuildStore(ctx->builder, value,
   ac_build_gep0(ctx, base_ptr, index));
 }
@@ -937,21 +936,21 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
 
if (inst_offset)
offset = LLVMBuildAdd(ctx->builder, offset,
  LLVMConstInt(ctx->i32, 
inst_offset, 0), "");
if (voffset)
offset = LLVMBuildAdd(ctx->builder, offset, voffset, 
"");
 
LLVMValueRef args[] = {
ac_to_float(ctx, vdata),
LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
-   LLVMConstInt(ctx->i32, 0, 0),
+   ctx->i32_0,
offset,
LLVMConstInt(ctx->i1, glc, 0),
LLVMConstInt(ctx->i1, slc, 0),
};
 
char name[256];
snprintf(name, sizeof(name), "llvm.amdgcn.buffer.store.%s",
 types[CLAMP(num_channels, 1, 3) - 1]);
 
ac_build_intrinsic(ctx, name, ctx->voidt,
@@ -965,22 +964,22 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
static const unsigned dfmt[] = {
V_008F0C_BUF_DATA_FORMAT_32,
V_008F0C_BUF_DATA_FORMAT_32_32,
V_008F0C_BUF_DATA_FORMAT_32_32_32,
V_008F0C_BUF_DATA_FORMAT_32_32_32_32
};
static const char *types[] = {"i32", "v2i32", "v4i32"};
LLVMValueRef args[] = {
vdata,
LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
-   LLVMConstInt(ctx->i32, 0, 0),
-   voffset ? voffset : LLVMConstInt(ctx->i32, 0, 0),
+   ctx->i32_0,
+   voffset ? voffset : ctx->i32_0,
soffset,
LLVMConstInt(ctx->i32, inst_offset, 0),
LLVMConstInt(ctx->i32, dfmt[num_channels - 1], 0),
LLVMConstInt(ctx->i32, V_008F0C_BUF_NUM_FORMAT_UINT, 0),
LLVMConstInt(ctx->i1, glc, 0),
LLVMConstInt(ctx->i1, slc, 0),
};
char name[256];
snprintf(name, sizeof(name), "llvm.amdgcn.tbuffer.store.%s",
 types[CLAMP(num_channels, 1, 3) - 1]);
@@ -998,21 +997,21 @@ ac_build_buffer_load_common(struct ac_llvm_context *ctx,
LLVMValueRef vindex,
LLVMValueRef voffset,
unsigned num_channels,
bool glc,
bool slc,
bool can_speculate,
bool use_format)
 {
LLVMValueRef args[] = {
LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
-   vindex ? vindex : LLVMConstInt(ctx->i32, 0, 0),
+   vindex ? vindex : ctx->i32_0,
voffset,
LLVMConstInt(ctx->i1, glc, 0),
LLVMConstInt(ctx->i1, slc, 0)
};
unsigned func = CLAMP(num_channels, 1, 3) - 1;
 
LLVMTypeRef types[] = {ctx->f32, ctx->v2f32, ctx->v4f32};
const char *type_names[] = {"f32", "v2f32", "v4f32"};
char name[256];
 
@@ -1093,21 +1092,21 @@ LLVMValueRef ac_build_buffer_load_format(struct 
ac_llvm_context *ctx,
 
 LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx,
   LLVMValueRef rsrc,
   LLVMValueRef vindex,
   

[Mesa-dev] [PATCH 05/15] ac: revert new LLVM 7.0 behavior for fdiv

2018-08-29 Thread Marek Olšák
From: Marek Olšák 

Cc: 18.1 18.2 
---
 src/amd/common/ac_llvm_build.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index c741a1ab62d..629cd2a7527 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -554,21 +554,28 @@ LLVMValueRef ac_build_expand_to_vec4(struct 
ac_llvm_context *ctx,
chan[num_channels++] = LLVMGetUndef(elemtype);
 
return ac_build_gather_values(ctx, chan, 4);
 }
 
 LLVMValueRef
 ac_build_fdiv(struct ac_llvm_context *ctx,
  LLVMValueRef num,
  LLVMValueRef den)
 {
-   LLVMValueRef ret = LLVMBuildFDiv(ctx->builder, num, den, "");
+   /* If we do (num / den), LLVM >= 7.0 does:
+*return num * v_rcp_f32(den * (fabs(den) > 0x1.0p+96f ? 0x1.0p-32f 
: 1.0f));
+*
+* If we do (num * (1 / den)), LLVM does:
+*return num * v_rcp_f32(den);
+*/
+   LLVMValueRef rcp = LLVMBuildFDiv(ctx->builder, ctx->f32_1, den, "");
+   LLVMValueRef ret = LLVMBuildFMul(ctx->builder, num, rcp, "");
 
/* Use v_rcp_f32 instead of precise division. */
if (!LLVMIsConstant(ret))
LLVMSetMetadata(ret, ctx->fpmath_md_kind, 
ctx->fpmath_md_2p5_ulp);
return ret;
 }
 
 /* Coordinates for cube map selection. sc, tc, and ma are as in Table 8.27
  * of the OpenGL 4.5 (Compatibility Profile) specification, except ma is
  * already multiplied by two. id is the cube face number.
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/15] radeonsi: fix printing a BO list into ddebug reports

2018-08-29 Thread Marek Olšák
From: Marek Olšák 

important for debugging

Cc: 18.1 18.2 
---
 src/gallium/drivers/radeonsi/si_gfx_cs.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c 
b/src/gallium/drivers/radeonsi/si_gfx_cs.c
index c39564ecbe5..38b85ce6243 100644
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -126,26 +126,27 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned 
flags,
 
/* Wait for draw calls to finish if needed. */
if (wait_flags) {
ctx->flags |= wait_flags;
si_emit_cache_flush(ctx);
}
ctx->gfx_last_ib_is_busy = wait_flags == 0;
 
if (ctx->current_saved_cs) {
si_trace_emit(ctx);
-   si_log_hw_flush(ctx);
 
/* Save the IB for debug contexts. */
si_save_cs(ws, cs, >current_saved_cs->gfx, true);
ctx->current_saved_cs->flushed = true;
ctx->current_saved_cs->time_flush = os_time_get_nano();
+
+   si_log_hw_flush(ctx);
}
 
/* Flush the CS. */
ws->cs_flush(cs, flags, >last_gfx_fence);
if (fence)
ws->fence_reference(fence, ctx->last_gfx_fence);
 
/* This must be after cs_flush returns, since the context's API
 * thread can concurrently read this value in si_fence_finish. */
ctx->num_gfx_cs_flushes++;
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/15] winsys/radeon: fix CMASK fast clear for NPOT textures with mipmapping on SI/CI

2018-08-29 Thread Marek Olšák
From: Marek Olšák 

Cc: 18.1 18.2 
---
 src/gallium/winsys/radeon/drm/radeon_drm_surface.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_surface.c 
b/src/gallium/winsys/radeon/drm/radeon_drm_surface.c
index fda1ba7870e..20cfc86ebe0 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_surface.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_surface.c
@@ -250,22 +250,22 @@ static void si_compute_cmask(const struct radeon_info 
*info,
cl_width = 64;
cl_height = 64;
break;
default:
assert(0);
return;
}
 
unsigned base_align = num_pipes * pipe_interleave_bytes;
 
-   unsigned width = align(config->info.width, cl_width*8);
-   unsigned height = align(config->info.height, cl_height*8);
+   unsigned width = align(surf->u.legacy.level[0].nblk_x, cl_width*8);
+   unsigned height = align(surf->u.legacy.level[0].nblk_y, cl_height*8);
unsigned slice_elements = (width * height) / (8*8);
 
/* Each element of CMASK is a nibble. */
unsigned slice_bytes = slice_elements / 2;
 
surf->u.legacy.cmask_slice_tile_max = (width * height) / (128*128);
if (surf->u.legacy.cmask_slice_tile_max)
surf->u.legacy.cmask_slice_tile_max -= 1;
 
unsigned num_layers;
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 00/15] RadeonSI Fixes and Cleanups

2018-08-29 Thread Marek Olšák
Hi,

There are HTILE and CMASK allocator fixes, a GPU hang fix for bindless
textures with LLVM 7.0, partial NIR support for bindless textures,
ddebug fix, and much more.

Please review.

Thanks,
Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 03/15] r600: fix HTILE for NPOT textures with mipmapping

2018-08-29 Thread Marek Olšák
From: Marek Olšák 

Cc: 18.1 18.2 
---
 src/gallium/drivers/r600/r600_texture.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_texture.c 
b/src/gallium/drivers/r600/r600_texture.c
index 08db6bab04c..d08c6e5637c 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -767,22 +767,22 @@ static void r600_texture_get_htile_size(struct 
r600_common_screen *rscreen,
break;
case 16:
cl_width = 128;
cl_height = 64;
break;
default:
assert(0);
return;
}
 
-   width = align(rtex->resource.b.b.width0, cl_width * 8);
-   height = align(rtex->resource.b.b.height0, cl_height * 8);
+   width = align(rtex->surface.u.legacy.level[0].nblk_x, cl_width * 8);
+   height = align(rtex->surface.u.legacy.level[0].nblk_y, cl_height * 8);
 
slice_elements = (width * height) / (8 * 8);
slice_bytes = slice_elements * 4;
 
pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
base_align = num_pipes * pipe_interleave_bytes;
 
rtex->surface.htile_alignment = base_align;
rtex->surface.htile_size =
util_num_layers(>resource.b.b, 0) *
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] i965/vec4: Clamp indirect tes input array reads with 0x0fffffff

2018-08-29 Thread Ian Romanick
From: Ian Romanick 

Page 190 of "Volume 7: 3D Media GPGPU Engine (Haswell)" says the valid
range of the offset is [0, 0FFFh].

Signed-off-by: Ian Romanick 
Cc: mesa-sta...@lists.freedesktop.org
Cc: Kenneth Graunke 
---
 src/intel/compiler/brw_vec4_tes.cpp | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/intel/compiler/brw_vec4_tes.cpp 
b/src/intel/compiler/brw_vec4_tes.cpp
index 35aff0f4b78..cf1bff42aa9 100644
--- a/src/intel/compiler/brw_vec4_tes.cpp
+++ b/src/intel/compiler/brw_vec4_tes.cpp
@@ -185,9 +185,19 @@ vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr 
*instr)
  first_component /= 2;
 
   if (indirect_offset.file != BAD_FILE) {
+ src_reg clamped_indirect_offset = src_reg(this, 
glsl_type::uvec4_type);
+
+ /* Page 190 of "Volume 7: 3D Media GPGPU Engine (Haswell)" says the
+  * valid range of the offset is [0, 0FFFh].
+  */
+ emit_minmax(BRW_CONDITIONAL_L,
+ dst_reg(clamped_indirect_offset),
+ retype(indirect_offset, BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(0x0fffu));
+
  header = src_reg(this, glsl_type::uvec4_type);
  emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header),
-  input_read_header, indirect_offset);
+  input_read_header, clamped_indirect_offset);
   } else {
  /* Arbitrarily only push up to 24 vec4 slots worth of data,
   * which is 12 registers (since each holds 2 vec4 slots).
-- 
2.14.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] i965/vec4: Correctly handle uniform sources in generate_tes_add_indirect_urb_offset

2018-08-29 Thread Ian Romanick
From: Ian Romanick 

Fixes failure in the new piglit test
tes-patch-input-array-vec2-index-invalid-rd.shader_test.

Signed-off-by: Ian Romanick 
Cc: mesa-sta...@lists.freedesktop.org
Cc: Kenneth Graunke 
---
 src/intel/compiler/brw_vec4_generator.cpp | 15 ++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/intel/compiler/brw_vec4_generator.cpp 
b/src/intel/compiler/brw_vec4_generator.cpp
index d506b675776..888cb358fd1 100644
--- a/src/intel/compiler/brw_vec4_generator.cpp
+++ b/src/intel/compiler/brw_vec4_generator.cpp
@@ -929,8 +929,21 @@ generate_tes_add_indirect_urb_offset(struct brw_codegen *p,
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
 
brw_MOV(p, dst, header);
+
+   /* Uniforms will have a stride <0;4,1>, and we need to convert to <0;1,0>.
+* Other values get <4;1,0>.
+*/
+   struct brw_reg restrided_offset;
+   if (offset.vstride == BRW_VERTICAL_STRIDE_0 &&
+   offset.width == BRW_WIDTH_4 &&
+   offset.hstride == BRW_HORIZONTAL_STRIDE_1) {
+  restrided_offset = stride(offset, 0, 1, 0);
+   } else {
+  restrided_offset = stride(offset, 4, 1, 0);
+   }
+
/* m0.3-0.4: 128-bit-granular offsets into the URB from the handles */
-   brw_MOV(p, vec2(get_element_ud(dst, 3)), stride(offset, 4, 1, 0));
+   brw_MOV(p, vec2(get_element_ud(dst, 3)), restrided_offset);
 
brw_pop_insn_state(p);
 }
-- 
2.14.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 105731] linker error "fragment shader input ... has no matching output in the previous stage" when previous stage's output declaration in a separate shader object

2018-08-29 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=105731

--- Comment #3 from Mark Janes  ---
Vadym, can you make a piglit test for this bug?

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 107477] [DXVK] Setting high shader quality in GTA V results in LLVM error

2018-08-29 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107477

--- Comment #16 from Samuel Pitoiset  ---
Yeah, it was just a workaround for a weird issue but I fixed it since. I'm
improving the patch for supporting cull distances as well.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radv: fix passing clip distances from VS to FS

2018-08-29 Thread Samuel Pitoiset
CTS doesn't test input clip distances with the fragment shader
stage, which explains why it was broken. I wrote a simple test
locally that does pass now. I'm quite sure that cull distances
are broken as well but that can be fixed later.

This fixes a crash with GTA V and DXVK.

Cc: mesa-sta...@lists.freedesktop.org
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107477
Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_nir_to_llvm.c | 41 ---
 src/amd/vulkan/radv_pipeline.c| 15 +++
 src/amd/vulkan/radv_shader.h  |  1 +
 src/amd/vulkan/radv_shader_info.c | 11 +
 4 files changed, 65 insertions(+), 3 deletions(-)

diff --git a/src/amd/vulkan/radv_nir_to_llvm.c 
b/src/amd/vulkan/radv_nir_to_llvm.c
index 4940e3230f..80d991c904 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -2098,9 +2098,10 @@ handle_fs_input_decl(struct radv_shader_context *ctx,
int idx = variable->data.location;
unsigned attrib_count = glsl_count_attribute_slots(variable->type, 
false);
LLVMValueRef interp = NULL;
+   uint64_t mask;
 
variable->data.driver_location = idx * 4;
-   ctx->input_mask |= ((1ull << attrib_count) - 1) << 
variable->data.location;
+   mask = ((1ull << attrib_count) - 1) << variable->data.location;
 
if (glsl_get_base_type(glsl_without_array(variable->type)) == 
GLSL_TYPE_FLOAT) {
unsigned interp_type;
@@ -2121,6 +2122,16 @@ handle_fs_input_decl(struct radv_shader_context *ctx,
for (unsigned i = 0; i < attrib_count; ++i)
ctx->inputs[ac_llvm_reg_index_soa(idx + i, 0)] = interp;
 
+   if (idx == VARYING_SLOT_CLIP_DIST0) {
+   /* Do not account for the number of attribute slots because
+* we only want to know if clip distances are present.
+*/
+   mask = 1ull << VARYING_SLOT_CLIP_DIST0;
+   if (attrib_count > 4)
+   mask |= 1ull << VARYING_SLOT_CLIP_DIST1;
+   }
+
+   ctx->input_mask |= mask;
 }
 
 static void
@@ -2179,7 +2190,8 @@ handle_fs_inputs(struct radv_shader_context *ctx,
continue;
 
if (i >= VARYING_SLOT_VAR0 || i == VARYING_SLOT_PNTC ||
-   i == VARYING_SLOT_PRIMITIVE_ID || i == VARYING_SLOT_LAYER) {
+   i == VARYING_SLOT_PRIMITIVE_ID || i == VARYING_SLOT_LAYER ||
+   i == VARYING_SLOT_CLIP_DIST0 || i == 
VARYING_SLOT_CLIP_DIST1) {
interp_param = *inputs;
interp_fs_input(ctx, index, interp_param, 
ctx->abi.prim_mask,
inputs);
@@ -2238,7 +2250,21 @@ scan_shader_output_decl(struct radv_shader_context *ctx,
attrib_count = 2;
else
attrib_count = 1;
-   mask_attribs = 1ull << idx;
+
+   mask_attribs = 1ull << VARYING_SLOT_CLIP_DIST0;
+
+   /* Use CLIP_DIST1 when the number of components is > 4
+* because we have to export two parameters. Also it's
+* only for VS->PS because other stages don't need it.
+*/
+   if (((stage == MESA_SHADER_VERTEX &&
+!ctx->options->key.vs.as_ls &&
+!ctx->options->key.vs.as_es &&
+!ctx->is_gs_copy_shader) ||
+   (stage == MESA_SHADER_TESS_EVAL &&
+!ctx->options->key.tes.as_es)) &&
+   shader->info.clip_distance_array_size > 4)
+   mask_attribs |= 1ull << VARYING_SLOT_CLIP_DIST1;
}
}
 
@@ -2569,6 +2595,8 @@ handle_vs_outputs_post(struct radv_shader_context *ctx,
 
if (i != VARYING_SLOT_LAYER &&
i != VARYING_SLOT_PRIMITIVE_ID &&
+   i != VARYING_SLOT_CLIP_DIST0 &&
+   i != VARYING_SLOT_CLIP_DIST1 &&
i < VARYING_SLOT_VAR0)
continue;
 
@@ -2590,6 +2618,13 @@ handle_vs_outputs_post(struct radv_shader_context *ctx,
ctx->shader_info->info.gs.output_usage_mask[i];
}
 
+   if (i == VARYING_SLOT_CLIP_DIST0 ||
+   i == VARYING_SLOT_CLIP_DIST1) {
+   /* The output usage mask is wrong for clip distances.
+* Use all channels. */
+   output_usage_mask = 0xf;
+   }
+
radv_export_param(ctx, param_count, values, output_usage_mask);
 
outinfo->vs_output_param_offset[i] = param_count++;
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index e63c481d1e..21a8b0c5e9 100644
--- 

[Mesa-dev] [PATCH 1/5] i965/vec4: Silence unused parameter warnings in vec4 compiler tests

2018-08-29 Thread Ian Romanick
From: Ian Romanick 

src/intel/compiler/test_vec4_copy_propagation.cpp: In member function ‘virtual 
brw::dst_reg* copy_propagation_vec4_visitor::make_reg_for_system_value(int)’:
src/intel/compiler/test_vec4_copy_propagation.cpp:57:51: warning: unused 
parameter ‘location’ [-Wunused-parameter]
virtual dst_reg *make_reg_for_system_value(int location)
   ^~~~
src/intel/compiler/test_vec4_copy_propagation.cpp: In member function ‘virtual 
void copy_propagation_vec4_visitor::emit_urb_write_header(int)’:
src/intel/compiler/test_vec4_copy_propagation.cpp:77:43: warning: unused 
parameter ‘mrf’ [-Wunused-parameter]
virtual void emit_urb_write_header(int mrf)
   ^~~
src/intel/compiler/test_vec4_copy_propagation.cpp: In member function ‘virtual 
brw::vec4_instruction* 
copy_propagation_vec4_visitor::emit_urb_write_opcode(bool)’:
src/intel/compiler/test_vec4_copy_propagation.cpp:82:57: warning: unused 
parameter ‘complete’ [-Wunused-parameter]
virtual vec4_instruction *emit_urb_write_opcode(bool complete)
 ^~~~
src/intel/compiler/test_vec4_register_coalesce.cpp: In member function ‘virtual 
brw::dst_reg* register_coalesce_vec4_visitor::make_reg_for_system_value(int)’:
src/intel/compiler/test_vec4_register_coalesce.cpp:60:51: warning: unused 
parameter ‘location’ [-Wunused-parameter]
virtual dst_reg *make_reg_for_system_value(int location)
   ^~~~
src/intel/compiler/test_vec4_register_coalesce.cpp: In member function ‘virtual 
void register_coalesce_vec4_visitor::emit_urb_write_header(int)’:
src/intel/compiler/test_vec4_register_coalesce.cpp:80:43: warning: unused 
parameter ‘mrf’ [-Wunused-parameter]
virtual void emit_urb_write_header(int mrf)
   ^~~
src/intel/compiler/test_vec4_register_coalesce.cpp: In member function ‘virtual 
brw::vec4_instruction* 
register_coalesce_vec4_visitor::emit_urb_write_opcode(bool)’:
src/intel/compiler/test_vec4_register_coalesce.cpp:85:57: warning: unused 
parameter ‘complete’ [-Wunused-parameter]
virtual vec4_instruction *emit_urb_write_opcode(bool complete)
 ^~~~
src/intel/compiler/test_vec4_cmod_propagation.cpp: In member function ‘virtual 
brw::dst_reg* cmod_propagation_vec4_visitor::make_reg_for_system_value(int)’:
src/intel/compiler/test_vec4_cmod_propagation.cpp:60:51: warning: unused 
parameter ‘location’ [-Wunused-parameter]
virtual dst_reg *make_reg_for_system_value(int location)
   ^~~~
src/intel/compiler/test_vec4_cmod_propagation.cpp: In member function ‘virtual 
void cmod_propagation_vec4_visitor::emit_urb_write_header(int)’:
src/intel/compiler/test_vec4_cmod_propagation.cpp:85:43: warning: unused 
parameter ‘mrf’ [-Wunused-parameter]
virtual void emit_urb_write_header(int mrf)
   ^~~
src/intel/compiler/test_vec4_cmod_propagation.cpp: In member function ‘virtual 
brw::vec4_instruction* 
cmod_propagation_vec4_visitor::emit_urb_write_opcode(bool)’:
src/intel/compiler/test_vec4_cmod_propagation.cpp:90:57: warning: unused 
parameter ‘complete’ [-Wunused-parameter]
virtual vec4_instruction *emit_urb_write_opcode(bool complete)
 ^~~~

Signed-off-by: Ian Romanick 
---
 src/intel/compiler/test_vec4_cmod_propagation.cpp  | 6 +++---
 src/intel/compiler/test_vec4_copy_propagation.cpp  | 6 +++---
 src/intel/compiler/test_vec4_register_coalesce.cpp | 6 +++---
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/intel/compiler/test_vec4_cmod_propagation.cpp 
b/src/intel/compiler/test_vec4_cmod_propagation.cpp
index 8430924de63..02c6f85321d 100644
--- a/src/intel/compiler/test_vec4_cmod_propagation.cpp
+++ b/src/intel/compiler/test_vec4_cmod_propagation.cpp
@@ -57,7 +57,7 @@ public:
 
 protected:
/* Dummy implementation for pure virtual methods */
-   virtual dst_reg *make_reg_for_system_value(int location)
+   virtual dst_reg *make_reg_for_system_value(int /* location */)
{
   unreachable("Not reached");
}
@@ -82,12 +82,12 @@ protected:
   unreachable("Not reached");
}
 
-   virtual void emit_urb_write_header(int mrf)
+   virtual void emit_urb_write_header(int /* mrf */)
{
   unreachable("Not reached");
}
 
-   virtual vec4_instruction *emit_urb_write_opcode(bool complete)
+   virtual vec4_instruction *emit_urb_write_opcode(bool /* complete */)
{
   unreachable("Not reached");
}
diff --git a/src/intel/compiler/test_vec4_copy_propagation.cpp 
b/src/intel/compiler/test_vec4_copy_propagation.cpp
index f4f91d8c8c7..5f3f3e525f2 100644
--- a/src/intel/compiler/test_vec4_copy_propagation.cpp
+++ b/src/intel/compiler/test_vec4_copy_propagation.cpp

[Mesa-dev] [PATCH 5/5] i965/vec4: Propagate conditional modifiers from more compares to other compares

2018-08-29 Thread Ian Romanick
From: Ian Romanick 

If there is a CMP.NZ that compares a single component (via a .
swizzle, for example) with 0, it can propagate its conditional modifier
back to a previous CMP that writes only that component.  The specific
case that I saw was:

cmp.l.f0(8) g42<1>.xF   g61<4>.xF   (abs)g18<4>.zF
...
cmp.nz.f0(8)null<1>Dg42<4>.xD   0D

In this case we can just delete the second CMP.

No changes on Broadwell or Skylake because they do not use the vec4
backend.  Also no changes on GM45 or Iron Lake.

Sandy Bridge, Ivy Bridge, and Haswell had similar results. (Sandy Bridge shown)
total instructions in shared programs: 10427834 -> 10423577 (-0.04%)
instructions in affected programs: 226882 -> 222625 (-1.88%)
helped: 1305
HURT: 0
helped stats (abs) min: 1 max: 7 x̄: 3.26 x̃: 4
helped stats (rel) min: 0.11% max: 6.67% x̄: 1.94% x̃: 1.86%
95% mean confidence interval for instructions value: -3.37 -3.16
95% mean confidence interval for instructions %-change: -1.99% -1.89%
Instructions are helped.

total cycles in shared programs: 146154725 -> 146097503 (-0.04%)
cycles in affected programs: 2487836 -> 2430614 (-2.30%)
helped: 1098
HURT: 53
helped stats (abs) min: 2 max: 134 x̄: 52.27 x̃: 64
helped stats (rel) min: 0.12% max: 12.70% x̄: 3.46% x̃: 2.23%
HURT stats (abs)   min: 2 max: 16 x̄: 3.13 x̃: 2
HURT stats (rel)   min: 0.18% max: 0.83% x̄: 0.63% x̃: 0.71%
95% mean confidence interval for cycles value: -51.63 -47.80
95% mean confidence interval for cycles %-change: -3.44% -3.11%
Cycles are helped.

Signed-off-by: Ian Romanick 
---
 src/intel/compiler/brw_vec4_cmod_propagation.cpp | 103 ++-
 1 file changed, 100 insertions(+), 3 deletions(-)

diff --git a/src/intel/compiler/brw_vec4_cmod_propagation.cpp 
b/src/intel/compiler/brw_vec4_cmod_propagation.cpp
index a1d46dc8dca..760327d559d 100644
--- a/src/intel/compiler/brw_vec4_cmod_propagation.cpp
+++ b/src/intel/compiler/brw_vec4_cmod_propagation.cpp
@@ -47,7 +47,7 @@ writemasks_incompatible(const vec4_instruction *earlier,
 }
 
 static bool
-opt_cmod_propagation_local(bblock_t *block)
+opt_cmod_propagation_local(bblock_t *block, vec4_visitor *v)
 {
bool progress = false;
int ip = block->end_ip + 1;
@@ -146,12 +146,109 @@ opt_cmod_propagation_local(bblock_t *block)
  scan_inst->dst, scan_inst->size_written)) {
 if ((scan_inst->predicate && scan_inst->opcode != BRW_OPCODE_SEL) 
||
 scan_inst->dst.offset != inst->src[0].offset ||
-writemasks_incompatible(scan_inst, inst) ||
 scan_inst->exec_size != inst->exec_size ||
 scan_inst->group != inst->group) {
break;
 }
 
+/* If scan_inst is a CMP that produces a single value and inst is
+ * a CMP.NZ that consumes only that value, remove inst.
+ */
+if (inst->conditional_mod == BRW_CONDITIONAL_NZ &&
+(inst->src[0].type == BRW_REGISTER_TYPE_D ||
+ inst->src[0].type == BRW_REGISTER_TYPE_UD) &&
+(inst->opcode == BRW_OPCODE_CMP ||
+ inst->opcode == BRW_OPCODE_MOV) &&
+scan_inst->opcode == BRW_OPCODE_CMP &&
+((inst->src[0].swizzle == BRW_SWIZZLE_ &&
+  scan_inst->dst.writemask == WRITEMASK_X) ||
+ (inst->src[0].swizzle == BRW_SWIZZLE_ &&
+  scan_inst->dst.writemask == WRITEMASK_Y) ||
+ (inst->src[0].swizzle == BRW_SWIZZLE_ &&
+  scan_inst->dst.writemask == WRITEMASK_Z) ||
+ (inst->src[0].swizzle == BRW_SWIZZLE_ &&
+  scan_inst->dst.writemask == WRITEMASK_W))) {
+   if (inst->dst.writemask != scan_inst->dst.writemask) {
+  src_reg temp(v, glsl_type::vec4_type, 1);
+
+  /* Given a sequence like:
+   *
+   *cmp.ge.f0(8)  g21<1>.xF  g20<4>.xF  g18<4>.xF
+   *...
+   *cmp.nz.f0(8)  null<1>D   g21<4>.xD  0D
+   *
+   * Replace it with something like:
+   *
+   *cmp.ge.f0(8)  g22<1>Fg20<4>.xF  g18<4>.xF
+   *mov(8)g21<1>.xF  g22<1>.F
+   *
+   * The added MOV will most likely be removed later.  In the
+   * worst case, it should be cheaper to schedule.
+   */
+  temp.swizzle = inst->src[0].swizzle;
+  temp.type = scan_inst->src[0].type;
+
+  vec4_instruction *mov = v->MOV(scan_inst->dst, temp);
+
+  /* Modify the source swizzles on scan_inst.  If scan_inst
+   * was
+   *
+   *cmp.ge.f0(8)  g21<1>.zF  g20<4>.wzyxF   
g18<4>.yxwzF
+  

[Mesa-dev] [PATCH 2/5] i965/vec4/dce: Don't narrow the write mask if the flags are used

2018-08-29 Thread Ian Romanick
From: Ian Romanick 

In an instruction sequence like

cmp(8).ge.f0.0 vgrf17:D, vgrf2.:D, vgrf9.:D
(+f0.0) sel(8) vgrf1:UD, vgrf8.xyzw:UD, vgrf1.xyzw:UD

The other fields of vgrf17 may be unused, but the CMP still needs to
generate the other flag bits.

To my surprise, nothing in shader-db or any test suite appears to hit
this.  However, I have a change to brw_vec4_cmod_propagation that
creates cases where this can happen.  This fix prevents a couple dozen
regressions in that patch.

Signed-off-by: Ian Romanick 
---
 src/intel/Makefile.compiler.am |   5 +
 .../compiler/brw_vec4_dead_code_eliminate.cpp  |  47 --
 src/intel/compiler/meson.build |   3 +-
 .../compiler/test_vec4_dead_code_eliminate.cpp | 163 +
 4 files changed, 208 insertions(+), 10 deletions(-)
 create mode 100644 src/intel/compiler/test_vec4_dead_code_eliminate.cpp

diff --git a/src/intel/Makefile.compiler.am b/src/intel/Makefile.compiler.am
index 46711fe71b7..2d66883f35a 100644
--- a/src/intel/Makefile.compiler.am
+++ b/src/intel/Makefile.compiler.am
@@ -64,6 +64,7 @@ COMPILER_TESTS = \
compiler/test_vf_float_conversions \
compiler/test_vec4_cmod_propagation \
compiler/test_vec4_copy_propagation \
+   compiler/test_vec4_dead_code_eliminate \
compiler/test_vec4_register_coalesce
 
 TESTS += $(COMPILER_TESTS)
@@ -97,6 +98,10 @@ compiler_test_vec4_cmod_propagation_SOURCES = \
compiler/test_vec4_cmod_propagation.cpp
 compiler_test_vec4_cmod_propagation_LDADD = $(TEST_LIBS)
 
+compiler_test_vec4_dead_code_eliminate_SOURCES = \
+   compiler/test_vec4_dead_code_eliminate.cpp
+compiler_test_vec4_dead_code_eliminate_LDADD = $(TEST_LIBS)
+
 # Strictly speaking this is neither a C++ test nor using gtest - we can address
 # address that at a later point. Until then, this allows us a to simplify 
things.
 compiler_test_eu_compact_SOURCES = \
diff --git a/src/intel/compiler/brw_vec4_dead_code_eliminate.cpp 
b/src/intel/compiler/brw_vec4_dead_code_eliminate.cpp
index c09a3d7ebe9..99e4c9cacaf 100644
--- a/src/intel/compiler/brw_vec4_dead_code_eliminate.cpp
+++ b/src/intel/compiler/brw_vec4_dead_code_eliminate.cpp
@@ -81,17 +81,46 @@ vec4_visitor::dead_code_eliminate()
result_live[3] = result;
 }
 
-for (int c = 0; c < 4; c++) {
-   if (!result_live[c] && inst->dst.writemask & (1 << c)) {
-  inst->dst.writemask &= ~(1 << c);
+if (inst->writes_flag()) {
+   /* Independently calculate the usage of the flag components and
+* the destination value components.
+*/
+   uint8_t flag_mask = inst->dst.writemask;
+   uint8_t dest_mask = inst->dst.writemask;
+
+   for (int c = 0; c < 4; c++) {
+  if (!result_live[c] && dest_mask & (1 << c))
+ dest_mask &= ~(1 << c);
+
+  if (!BITSET_TEST(flag_live, c))
+ flag_mask &= ~(1 << c);
+   }
+
+   if (inst->dst.writemask != (flag_mask | dest_mask)) {
   progress = true;
+  inst->dst.writemask = flag_mask | dest_mask;
+   }
 
-  if (inst->dst.writemask == 0) {
- if (inst->writes_accumulator || inst->writes_flag()) {
-inst->dst = dst_reg(retype(brw_null_reg(), 
inst->dst.type));
- } else {
-inst->opcode = BRW_OPCODE_NOP;
-break;
+   /* If none of the destination components are read, replace the
+* destination register with the NULL register.
+*/
+   if (dest_mask == 0) {
+  progress = true;
+  inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type));
+   }
+} else {
+   for (int c = 0; c < 4; c++) {
+  if (!result_live[c] && inst->dst.writemask & (1 << c)) {
+ inst->dst.writemask &= ~(1 << c);
+ progress = true;
+
+ if (inst->dst.writemask == 0) {
+if (inst->writes_accumulator) {
+   inst->dst = dst_reg(retype(brw_null_reg(), 
inst->dst.type));
+} else {
+   inst->opcode = BRW_OPCODE_NOP;
+   break;
+}
  }
   }
}
diff --git a/src/intel/compiler/meson.build b/src/intel/compiler/meson.build
index 98860c94374..484f5a076b5 100644
--- a/src/intel/compiler/meson.build
+++ b/src/intel/compiler/meson.build
@@ -144,7 +144,8 @@ if with_tests
   foreach t : ['fs_cmod_propagation', 'fs_copy_propagation',
'fs_saturate_propagation', 'vf_float_conversions',

[Mesa-dev] [RESEND PATCH 0/5] i965: More cmod propagation

2018-08-29 Thread Ian Romanick
This is mostly a resend of a series that I originally sent out around
the end of June.  I updated some of the shader-db results, and I dropped
one patch (i965/fs: Allow Boolean conditions in CSEL generation).  I
decided that I want to try to acomplish that with a different method.
That's going to take a bit more work, and I didn't want to hold up the
rest of the series.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/5] i965/fs: Relax type matching rules in cmod propagation from MOV instructions

2018-08-29 Thread Ian Romanick
From: Ian Romanick 

To allow cmod propagation from a MOV in a sequence like:

and(16) g31<1>UD   g20<8,8,1>UD   g22<8,8,1>UD
mov.nz.f0(16)   null<1>F   g31<8,8,1>D

A similar change to the vec4 backend had no effect.

Somewhere between c1ec5820593 and 40fc4b5acd6 (1,094 commits) the
effectiveness of this patch diminished.  Applying this on c1ec5820593
used to help 20 shaders on Gen7+ platforms.  I did not investigate
this further.

The SIMD8 and SIMD16 shaders in two UE4 demos are helped.

Skylake, Ivy Bridge, and Sandy Bridge had similar results. (Skylake shown)
total instructions in shared programs: 14304235 -> 14304227 (<.01%)
instructions in affected programs: 1956 -> 1948 (-0.41%)
helped: 4
HURT: 0
helped stats (abs) min: 2 max: 2 x̄: 2.00 x̃: 2
helped stats (rel) min: 0.41% max: 0.41% x̄: 0.41% x̃: 0.41%
95% mean confidence interval for instructions value: -2.00 -2.00
95% mean confidence interval for instructions %-change: -0.41% -0.41%
Instructions are helped.

total cycles in shared programs: 527531092 -> 527530920 (<.01%)
cycles in affected programs: 92474 -> 92302 (-0.19%)
helped: 4
HURT: 0
helped stats (abs) min: 32 max: 54 x̄: 43.00 x̃: 43
helped stats (rel) min: 0.15% max: 0.21% x̄: 0.18% x̃: 0.18%
95% mean confidence interval for cycles value: -63.21 -22.79
95% mean confidence interval for cycles %-change: -0.24% -0.13%
Cycles are helped.

Haswell and Broadwell had similar results. (Broadwell shown)
total instructions in shared programs: 14615704 -> 14615700 (<.01%)
instructions in affected programs: 990 -> 986 (-0.40%)
helped: 2
HURT: 0

total cycles in shared programs: 554530624 -> 554530532 (<.01%)
cycles in affected programs: 42044 -> 41952 (-0.22%)
helped: 2
HURT: 0

No changes on Iron Lake or GM45.

Signed-off-by: Ian Romanick 
---
 src/intel/compiler/brw_fs_cmod_propagation.cpp | 23 +++
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/src/intel/compiler/brw_fs_cmod_propagation.cpp 
b/src/intel/compiler/brw_fs_cmod_propagation.cpp
index 5b74f267359..17abcf05d8a 100644
--- a/src/intel/compiler/brw_fs_cmod_propagation.cpp
+++ b/src/intel/compiler/brw_fs_cmod_propagation.cpp
@@ -248,10 +248,25 @@ opt_cmod_propagation_local(const gen_device_info 
*devinfo, bblock_t *block)
break;
 
 /* Comparisons operate differently for ints and floats */
-if (scan_inst->dst.type != inst->dst.type &&
-(scan_inst->dst.type == BRW_REGISTER_TYPE_F ||
- inst->dst.type == BRW_REGISTER_TYPE_F))
-   break;
+if (scan_inst->dst.type != inst->dst.type) {
+   /* We should propagate from a MOV to another instruction in a
+* sequence like:
+*
+*and(16) g31<1>UD   g20<8,8,1>UD   g22<8,8,1>UD
+*mov.nz.f0(16)   null<1>F   g31<8,8,1>D
+*/
+   if (inst->opcode == BRW_OPCODE_MOV) {
+  if ((inst->src[0].type != BRW_REGISTER_TYPE_D &&
+   inst->src[0].type != BRW_REGISTER_TYPE_UD) ||
+  (scan_inst->dst.type != BRW_REGISTER_TYPE_D &&
+   scan_inst->dst.type != BRW_REGISTER_TYPE_UD)) {
+ break;
+  }
+   } else if (scan_inst->dst.type == BRW_REGISTER_TYPE_F ||
+  inst->dst.type == BRW_REGISTER_TYPE_F) {
+  break;
+   }
+}
 
 /* If the instruction generating inst's source also wrote the
  * flag, and inst is doing a simple .nz comparison, then inst
-- 
2.14.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/5] i965/fs: Eliminate unary op on operand of compare-with-zero

2018-08-29 Thread Ian Romanick
From: Ian Romanick 

All Gen7+ platforms had similar results. (Broadwell shown)
total instructions in shared programs: 14715715 -> 14715709 (<.01%)
instructions in affected programs: 474 -> 468 (-1.27%)
helped: 6
HURT: 0
helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1
helped stats (rel) min: 1.12% max: 1.35% x̄: 1.28% x̃: 1.35%
95% mean confidence interval for instructions value: -1.00 -1.00
95% mean confidence interval for instructions %-change: -1.40% -1.15%
Instructions are helped.

total cycles in shared programs: 559569911 -> 559569809 (<.01%)
cycles in affected programs: 5963 -> 5861 (-1.71%)
helped: 6
HURT: 0
helped stats (abs) min: 16 max: 18 x̄: 17.00 x̃: 17
helped stats (rel) min: 1.45% max: 1.88% x̄: 1.73% x̃: 1.85%
95% mean confidence interval for cycles value: -18.15 -15.85
95% mean confidence interval for cycles %-change: -1.95% -1.51%
Cycles are helped.

Iron Lake and Sandy Bridge had similar results. (Iron Lake shown)
total instructions in shared programs: 7780915 -> 7780913 (<.01%)
instructions in affected programs: 246 -> 244 (-0.81%)
helped: 2
HURT: 0

total cycles in shared programs: 177876108 -> 177876106 (<.01%)
cycles in affected programs: 3636 -> 3634 (-0.06%)
helped: 1
HURT: 0

GM45
total instructions in shared programs: 4799152 -> 4799151 (<.01%)
instructions in affected programs: 126 -> 125 (-0.79%)
helped: 1
HURT: 0

total cycles in shared programs: 122052654 -> 122052652 (<.01%)
cycles in affected programs: 3640 -> 3638 (-0.05%)
helped: 1
HURT: 0

Signed-off-by: Ian Romanick 
---
 src/intel/compiler/brw_fs.cpp   | 19 ++-
 src/intel/compiler/brw_vec4.cpp | 12 
 2 files changed, 14 insertions(+), 17 deletions(-)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 58736503f9a..9d7051e279f 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -2388,6 +2388,16 @@ fs_visitor::opt_algebraic()
foreach_block_and_inst(block, fs_inst, inst, cfg) {
   switch (inst->opcode) {
   case BRW_OPCODE_MOV:
+ if ((inst->conditional_mod == BRW_CONDITIONAL_Z ||
+  inst->conditional_mod == BRW_CONDITIONAL_NZ) &&
+ inst->dst.is_null() &&
+ (inst->src[0].abs || inst->src[0].negate)) {
+inst->src[0].abs = false;
+inst->src[0].negate = false;
+progress = true;
+break;
+ }
+
  if (inst->src[0].file != IMM)
 break;
 
@@ -2492,13 +2502,12 @@ fs_visitor::opt_algebraic()
  }
  break;
   case BRW_OPCODE_CMP:
- if (inst->conditional_mod == BRW_CONDITIONAL_GE &&
- inst->src[0].abs &&
- inst->src[0].negate &&
- inst->src[1].is_zero()) {
+ if ((inst->conditional_mod == BRW_CONDITIONAL_Z ||
+  inst->conditional_mod == BRW_CONDITIONAL_NZ) &&
+ inst->src[1].is_zero() &&
+ (inst->src[0].abs || inst->src[0].negate)) {
 inst->src[0].abs = false;
 inst->src[0].negate = false;
-inst->conditional_mod = BRW_CONDITIONAL_Z;
 progress = true;
 break;
  }
diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp
index 4e242e03032..3d5d4fb5b4d 100644
--- a/src/intel/compiler/brw_vec4.cpp
+++ b/src/intel/compiler/brw_vec4.cpp
@@ -871,18 +871,6 @@ vec4_visitor::opt_algebraic()
 progress = true;
 }
 break;
-  case BRW_OPCODE_CMP:
- if (inst->conditional_mod == BRW_CONDITIONAL_GE &&
- inst->src[0].abs &&
- inst->src[0].negate &&
- inst->src[1].is_zero()) {
-inst->src[0].abs = false;
-inst->src[0].negate = false;
-inst->conditional_mod = BRW_CONDITIONAL_Z;
-progress = true;
-break;
- }
- break;
   case SHADER_OPCODE_BROADCAST:
  if (is_uniform(inst->src[0]) ||
  inst->src[1].is_zero()) {
-- 
2.14.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] st/nine: do not double-close the fd on teardown

2018-08-29 Thread Axel Davy

Hi Emil,

This patch and the nine part of the second patch look fine.

Reviewed-by: Axel Davy 

for them.

Yours,

Axel

On 29/08/2018 19:13, Emil Velikov wrote:

From: Emil Velikov 

As the newly introduced comment says:
  The pipe loader takes ownership of the fd

Thus, there's no need to close it again.

Cc: Patrick Rudolph 
Cc: Axel Davy 
Cc: mesa-sta...@lists.freedesktop.org>
Signed-off-by: Emil Velikov 
---
  src/gallium/targets/d3dadapter9/drm.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/targets/d3dadapter9/drm.c 
b/src/gallium/targets/d3dadapter9/drm.c
index 85b3e10633e..a2a36dbbda9 100644
--- a/src/gallium/targets/d3dadapter9/drm.c
+++ b/src/gallium/targets/d3dadapter9/drm.c
@@ -107,7 +107,7 @@ drm_destroy( struct d3dadapter9_context *ctx )
  if (drm->dev)
  pipe_loader_release(>dev, 1);
  
-close(drm->fd);

+/* The pipe loader takes ownership of the fd */
  FREE(ctx);
  }
  



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] pipe-loader: move dup(fd) within pipe_loader_drm_probe_fd

2018-08-29 Thread Thomas Hellstrom

Hi, Emil,

On 08/29/2018 07:13 PM, Emil Velikov wrote:

From: Emil Velikov 

Currently pipe_loader_drm_probe_fd takes ownership of the fd given.
To match that, pipe_loader_release closes it.

Yet we have many instances which do not want the change of ownership,
and thus duplicate the fd before passing it to the pipe-loader.

Move the dup() within pipe-loader, explicitly document that and document
all the cases through the codebase.

A trivial git grep -2 pipe_loader_release makes things as obvious as it
gets ;-)

Cc: Leo Liu 
Cc: Thomas Hellstrom 
Cc: Axel Davy 
Cc: Patrick Rudolph 
Signed-off-by: Emil Velikov 
---
I'm 99% sure that the VAAPI/XA notes are correct, but I left it as CHECK
since it's been a while since I useed those ;-)

Leo, Thomas, can you please confirm the respective CHECK notes?



Yes, The user of the xa API maintains ownership of the drm fd passed to XA.

For the xa part:
Reviewed-by: Thomas Hellstrom 


Thanks,
Thomas

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3] intel/tools: new i965_disasm tool

2018-08-29 Thread Matt Turner
Looks great!

Reviewed-by: Matt Turner 

and pushed. Thanks a bunch!
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: skip stringification in preprocessor if in unreachable branch

2018-08-29 Thread Ian Romanick
On 08/28/2018 06:36 PM, Timothy Arceri wrote:
> This fixes compilation of some "No Mans Sky" shaders where the stringification
> happens in branches intended for DX12.

I was going to complain that this would make things like the following
compile:

#version 110

#ifdef this_is_undefined
#is_this_valid_too
#endif

void main() { }

This already compiles.  Looking at the C99 spec and testing with GCC,
this is intentional.  We should probably add that as another
preprocessor test.

This patch is

Reviewed-by: Ian Romanick 

> ---
> 
>  Piglit tests: https://patchwork.freedesktop.org/series/48850/
> 
>  src/compiler/glsl/glcpp/glcpp-lex.l | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/src/compiler/glsl/glcpp/glcpp-lex.l 
> b/src/compiler/glsl/glcpp/glcpp-lex.l
> index 9cfcc120222..fe5845acd4e 100644
> --- a/src/compiler/glsl/glcpp/glcpp-lex.l
> +++ b/src/compiler/glsl/glcpp/glcpp-lex.l
> @@ -420,8 +420,10 @@ HEXADECIMAL_INTEGER  0[xX][0-9a-fA-F]+[uU]?
>  
>   /* This will catch any non-directive garbage after a HASH */
>  {NONSPACE} {
> - BEGIN INITIAL;
> - RETURN_TOKEN (GARBAGE);
> + if (!parser->skipping) {
> + BEGIN INITIAL;
> + RETURN_TOKEN (GARBAGE);
> + }
>  }
>  
>   /* An identifier immediately followed by '(' */
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3] intel/tools: new i965_disasm tool

2018-08-29 Thread Sagar Ghuge
Adds a new i965 instruction disassemble tool

v2: 1) fix a few nits (Matt Turner)
2) Remove i965_disasm header (Matt Turner)

v3: 1) Redirect output to correct file descriptors (Matt Turner)
2) Refactor code (Matt Turner)
3) Use better formatting style (Matt Turner)

Signed-off-by: Sagar Ghuge 
---
 src/intel/Makefile.tools.am   |  14 +++
 src/intel/tools/i965_disasm.c | 182 ++
 src/intel/tools/meson.build   |  11 ++
 3 files changed, 207 insertions(+)
 create mode 100644 src/intel/tools/i965_disasm.c

diff --git a/src/intel/Makefile.tools.am b/src/intel/Makefile.tools.am
index 30c8d3b3f7..4809962b18 100644
--- a/src/intel/Makefile.tools.am
+++ b/src/intel/Makefile.tools.am
@@ -22,6 +22,7 @@
 noinst_PROGRAMS += \
tools/aubinator \
tools/aubinator_error_decode \
+   tools/i965_disasm \
tools/error2aub
 
 
@@ -66,6 +67,19 @@ tools_aubinator_error_decode_CFLAGS = \
$(AM_CFLAGS) \
$(ZLIB_CFLAGS)
 
+tools_i965_disasm_SOURCES = \
+   tools/i965_disasm.c
+
+tools_i965_disasm_LDADD = \
+   common/libintel_common.la \
+   compiler/libintel_compiler.la \
+   dev/libintel_dev.la \
+   $(top_builddir)/src/util/libmesautil.la \
+   $(PTHREAD_LIBS)
+
+tools_i965_disasm_CFLAGS = \
+   $(AM_CFLAGS)
+
 
 tools_error2aub_SOURCES = \
tools/gen_context.h \
diff --git a/src/intel/tools/i965_disasm.c b/src/intel/tools/i965_disasm.c
new file mode 100644
index 00..73a6760fc1
--- /dev/null
+++ b/src/intel/tools/i965_disasm.c
@@ -0,0 +1,182 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include "compiler/brw_eu.h"
+#include "dev/gen_device_info.h"
+
+uint64_t INTEL_DEBUG;
+
+/* Return size of file in bytes pointed by fp */
+static size_t
+i965_disasm_get_file_size(FILE *fp)
+{
+   size_t size;
+
+   fseek(fp, 0L, SEEK_END);
+   size = ftell(fp);
+   fseek(fp, 0L, SEEK_SET);
+
+   return size;
+}
+
+static void *
+i965_disasm_read_binary(FILE *fp, size_t *end)
+{
+   void *assembly;
+
+   *end = i965_disasm_get_file_size(fp);
+
+   assembly = malloc(*end + 1);
+   if (assembly == NULL)
+  return NULL;
+
+   fread(assembly, *end, 1, fp);
+   fclose(fp);
+
+   return assembly;
+}
+
+static struct gen_device_info *
+i965_disasm_init(uint16_t pci_id)
+{
+   struct gen_device_info *devinfo;
+
+   devinfo = malloc(sizeof *devinfo);
+   if (devinfo == NULL)
+  return NULL;
+
+   if (!gen_get_device_info(pci_id, devinfo)) {
+  fprintf(stderr, "can't find device information: pci_id=0x%x\n",
+  pci_id);
+  exit(EXIT_FAILURE);
+   }
+
+   /* initialize compaction table in order to handle compacted instructions */
+   brw_init_compaction_tables(devinfo);
+
+   return devinfo;
+}
+
+static void
+print_help(const char *progname, FILE *file)
+{
+   fprintf(file,
+   "Usage: %s [OPTION]...\n"
+   "Disassemble i965 instructions from binary file.\n\n"
+   "  --help display this help and exit\n"
+   "  --binary-path=PATH read binary file from binary file PATH\n"
+   "  --gen=platform disassemble instructions for given \n"
+   " platform (3 letter platform name)\n",
+   progname);
+}
+
+int main(int argc, char *argv[])
+{
+   FILE *fp = NULL;
+   void *assembly = NULL;
+   char *binary_path = NULL;
+   size_t start = 0, end = 0;
+   uint16_t pci_id = 0;
+   int c, i;
+   struct gen_device_info *devinfo;
+
+   bool help = false;
+   const struct option i965_disasm_opts[] = {
+  { "help",  no_argument,   (int *) ,  true },
+  { "binary-path",   required_argument, NULL,   'b' },
+  { "gen",   required_argument, NULL,   'g'},
+  { NULL,0, 

[Mesa-dev] [Bug 107477] [DXVK] Setting high shader quality in GTA V results in LLVM error

2018-08-29 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107477

--- Comment #15 from Clément Guérin  ---
I haven't found the time to test it yet. Is forcing `output_usage_mask` at line
71 intentional?

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 14/22] intel/compiler: Do image load/store lowering to NIR

2018-08-29 Thread Kenneth Graunke
On Wednesday, August 29, 2018 10:11:48 AM PDT Jason Ekstrand wrote:
> This commit moves our storage image format conversion codegen into NIR
> instead of doing it in the back-end.  This has the advantage of letting
> us run it through NIR's optimizer which is pretty effective at shrinking
> things down.  In the common case of rgba8, the number of instructions
> emitted after NIR is done with it is half of what it was with the
> lowering happening in the back-end.  On the downside, the back-end's
> lowering is able to directly use predicates and the NIR lowering has to
> use IFs.
> 
> Shader-db results on Kaby Lake:
> 
> total instructions in shared programs: 15166910 -> 15166872 (<.01%)
> instructions in affected programs: 5895 -> 5857 (-0.64%)
> helped: 15
> HURT: 0
> 
> Clearly, we don't have that much image_load_store happening in the
> shaders in shader-db

Looks like the untyped stride checks are back and the unnecessary typed
load checks (mistakenly copied from atomics) are gone.  Nice.

Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 21.5/22 (was 18/22)] intel: Use TXS for image_size when we have a typed surface

2018-08-29 Thread Kenneth Graunke
On Wednesday, August 29, 2018 10:12:59 AM PDT Jason Ekstrand wrote:
> ---
>  src/intel/compiler/brw_eu_defines.h   |  2 ++
>  src/intel/compiler/brw_fs_generator.cpp   | 23 +---
>  src/intel/compiler/brw_fs_nir.cpp | 35 +++
>  .../compiler/brw_nir_lower_image_load_store.c | 15 
>  src/intel/compiler/brw_shader.cpp |  3 ++
>  5 files changed, 74 insertions(+), 4 deletions(-)
> 
> diff --git a/src/intel/compiler/brw_eu_defines.h 
> b/src/intel/compiler/brw_eu_defines.h
> index 883616d6bab..52957882b10 100644
> --- a/src/intel/compiler/brw_eu_defines.h
> +++ b/src/intel/compiler/brw_eu_defines.h
> @@ -354,6 +354,8 @@ enum opcode {
> SHADER_OPCODE_SAMPLEINFO,
> SHADER_OPCODE_SAMPLEINFO_LOGICAL,
>  
> +   SHADER_OPCODE_IMAGE_SIZE,
> +
> /**
>  * Combines multiple sources of size 1 into a larger virtual GRF.
>  * For example, parameters for a send-from-GRF message.  Or, updating
> diff --git a/src/intel/compiler/brw_fs_generator.cpp 
> b/src/intel/compiler/brw_fs_generator.cpp
> index d40ce2ce0d7..cb402cd4e75 100644
> --- a/src/intel/compiler/brw_fs_generator.cpp
> +++ b/src/intel/compiler/brw_fs_generator.cpp
> @@ -958,6 +958,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg 
> dst, struct brw_reg src
>   }
>   break;
>case SHADER_OPCODE_TXS:
> +  case SHADER_OPCODE_IMAGE_SIZE:
>msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
>break;
>case SHADER_OPCODE_TXD:
> @@ -1126,10 +1127,19 @@ fs_generator::generate_tex(fs_inst *inst, struct 
> brw_reg dst, struct brw_reg src
>}
> }
>  
> -   uint32_t base_binding_table_index = (inst->opcode == SHADER_OPCODE_TG4 ||
> - inst->opcode == SHADER_OPCODE_TG4_OFFSET)
> - ? prog_data->binding_table.gather_texture_start
> - : prog_data->binding_table.texture_start;
> +   uint32_t base_binding_table_index;
> +   switch (inst->opcode) {
> +   case SHADER_OPCODE_TG4:
> +   case SHADER_OPCODE_TG4_OFFSET:
> +  base_binding_table_index = 
> prog_data->binding_table.gather_texture_start;
> +  break;
> +   case SHADER_OPCODE_IMAGE_SIZE:
> +  base_binding_table_index = prog_data->binding_table.image_start;
> +  break;
> +   default:
> +  base_binding_table_index = prog_data->binding_table.texture_start;
> +  break;
> +   }
>  
> if (surface_index.file == BRW_IMMEDIATE_VALUE &&
> sampler_index.file == BRW_IMMEDIATE_VALUE) {
> @@ -2114,6 +2124,11 @@ fs_generator::generate_code(const cfg_t *cfg, int 
> dispatch_width)
>case SHADER_OPCODE_SAMPLEINFO:
>generate_tex(inst, dst, src[0], src[1], src[2]);
>break;
> +
> +  case SHADER_OPCODE_IMAGE_SIZE:
> + generate_tex(inst, dst, src[0], src[1], brw_imm_ud(0));
> + break;
> +
>case FS_OPCODE_DDX_COARSE:
>case FS_OPCODE_DDX_FINE:
>   generate_ddx(inst, dst, src[0]);
> diff --git a/src/intel/compiler/brw_fs_nir.cpp 
> b/src/intel/compiler/brw_fs_nir.cpp
> index aaba0e2a693..2fef050f81a 100644
> --- a/src/intel/compiler/brw_fs_nir.cpp
> +++ b/src/intel/compiler/brw_fs_nir.cpp
> @@ -3918,6 +3918,41 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , 
> nir_intrinsic_instr *instr
>break;
> }
>  
> +   case nir_intrinsic_image_size: {
> +  /* Unlike the [un]typed load and store opcodes, the TXS that this turns
> +   * into will handle the binding table index for us in the geneerator.
> +   */
> +  fs_reg image = retype(get_nir_src_imm(instr->src[0]),
> +BRW_REGISTER_TYPE_UD);
> +  image = bld.emit_uniformize(image);
> +
> +  /* Since the image size is always uniform, we can just emit a SIMD8
> +   * query instruction and splat the result out.
> +   */
> +  const fs_builder ubld = bld.exec_all().group(8, 0);

Ah good, you remembered the exec_all().  Matt just reminded me about it.

Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] anv: Use separate MOCS settings for external BOs on gen8

2018-08-29 Thread Jason Ekstrand
On all other platforms, it's safe to use the usual PTE settings for both
internal and external BOs.  On Broadwell, however, we can't get the
right caching behavior for scanout without disabling eLLC and we really
don't want to do this on everything.

In order to do this, we add an anv-specific BO flag for "external" and
use that to distinguish between buffers which may be shared with other
processes and/or display and those which are entirely internal.  That,
together with an anv_mocs_for_bo helper lets us choose the right MOCS
settings for each BO use.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99507
Cc: mesa-sta...@lists.freedesktop.org
---
 src/intel/vulkan/anv_allocator.c   | 12 ++--
 src/intel/vulkan/anv_batch_chain.c |  2 +-
 src/intel/vulkan/anv_blorp.c   | 15 ---
 src/intel/vulkan/anv_device.c  |  9 +++--
 src/intel/vulkan/anv_image.c   |  5 +++--
 src/intel/vulkan/anv_intel.c   |  2 +-
 src/intel/vulkan/anv_private.h | 20 
 src/intel/vulkan/gen7_cmd_buffer.c |  3 ++-
 src/intel/vulkan/gen8_cmd_buffer.c |  3 ++-
 src/intel/vulkan/genX_cmd_buffer.c | 18 +-
 src/intel/vulkan/genX_gpu_memcpy.c |  5 ++---
 src/intel/vulkan/genX_state.c  |  6 ++
 12 files changed, 71 insertions(+), 29 deletions(-)

diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c
index ab01d46cbeb..f62d48ae3fe 100644
--- a/src/intel/vulkan/anv_allocator.c
+++ b/src/intel/vulkan/anv_allocator.c
@@ -1253,7 +1253,8 @@ anv_bo_cache_lookup(struct anv_bo_cache *cache, uint32_t 
gem_handle)
(EXEC_OBJECT_WRITE | \
 EXEC_OBJECT_ASYNC | \
 EXEC_OBJECT_SUPPORTS_48B_ADDRESS | \
-EXEC_OBJECT_PINNED)
+EXEC_OBJECT_PINNED | \
+ANV_BO_EXTERNAL)
 
 VkResult
 anv_bo_cache_alloc(struct anv_device *device,
@@ -1311,6 +1312,7 @@ anv_bo_cache_import(struct anv_device *device,
 struct anv_bo **bo_out)
 {
assert(bo_flags == (bo_flags & ANV_BO_CACHE_SUPPORTED_FLAGS));
+   assert(bo_flags & ANV_BO_EXTERNAL);
 
pthread_mutex_lock(>mutex);
 
@@ -1327,7 +1329,7 @@ anv_bo_cache_import(struct anv_device *device,
* client has imported a BO twice in different ways and they get what
* they have coming.
*/
-  uint64_t new_flags = 0;
+  uint64_t new_flags = ANV_BO_EXTERNAL;
   new_flags |= (bo->bo.flags | bo_flags) & EXEC_OBJECT_WRITE;
   new_flags |= (bo->bo.flags & bo_flags) & EXEC_OBJECT_ASYNC;
   new_flags |= (bo->bo.flags & bo_flags) & 
EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
@@ -1411,6 +1413,12 @@ anv_bo_cache_export(struct anv_device *device,
assert(anv_bo_cache_lookup(cache, bo_in->gem_handle) == bo_in);
struct anv_cached_bo *bo = (struct anv_cached_bo *)bo_in;
 
+   /* This BO must have been flagged external in order for us to be able
+* to export it.  This is done based on external options passed into
+* anv_AllocateMemory.
+*/
+   assert(bo->bo.flags & ANV_BO_EXTERNAL);
+
int fd = anv_gem_handle_to_fd(device, bo->bo.gem_handle);
if (fd < 0)
   return vk_error(VK_ERROR_TOO_MANY_OBJECTS);
diff --git a/src/intel/vulkan/anv_batch_chain.c 
b/src/intel/vulkan/anv_batch_chain.c
index 0f7c8325ea4..3e13553ac18 100644
--- a/src/intel/vulkan/anv_batch_chain.c
+++ b/src/intel/vulkan/anv_batch_chain.c
@@ -1088,7 +1088,7 @@ anv_execbuf_add_bo(struct anv_execbuf *exec,
   obj->relocs_ptr = 0;
   obj->alignment = 0;
   obj->offset = bo->offset;
-  obj->flags = bo->flags | extra_flags;
+  obj->flags = (bo->flags & ~ANV_BO_FLAG_MASK) | extra_flags;
   obj->rsvd1 = 0;
   obj->rsvd2 = 0;
}
diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index cd67cc636b2..9eb7aead8b0 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -155,7 +155,7 @@ get_blorp_surf_for_anv_buffer(struct anv_device *device,
   .addr = {
  .buffer = buffer->address.bo,
  .offset = buffer->address.offset + offset,
- .mocs = device->default_mocs,
+ .mocs = anv_mocs_for_bo(device, buffer->address.bo),
   },
};
 
@@ -208,7 +208,7 @@ get_blorp_surf_for_anv_image(const struct anv_device 
*device,
   .addr = {
  .buffer = image->planes[plane].address.bo,
  .offset = image->planes[plane].address.offset + surface->offset,
- .mocs = device->default_mocs,
+ .mocs = anv_mocs_for_bo(device, image->planes[plane].address.bo),
   },
};
 
@@ -218,7 +218,7 @@ get_blorp_surf_for_anv_image(const struct anv_device 
*device,
   blorp_surf->aux_addr = (struct blorp_address) {
  .buffer = image->planes[plane].address.bo,
  .offset = image->planes[plane].address.offset + aux_surface->offset,
- .mocs = device->default_mocs,
+ .mocs = anv_mocs_for_bo(device, image->planes[plane].address.bo),
   };
   blorp_surf->aux_usage = aux_usage;
 
@@ -663,12 +663,12 @@ void 

[Mesa-dev] [PATCH] anv: Re-emit vertex buffers when the pipeline changes

2018-08-29 Thread Jason Ekstrand
Some of the bits of VERTEX_BUFFER_STATE such as access type, instance
data step rate, and pitch come from the pipeline.

Cc: mesa-sta...@lists.freedesktop.org
---
 src/intel/vulkan/genX_cmd_buffer.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index b55acca4521..8cbc345aef9 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -2493,6 +2493,8 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer 
*cmd_buffer)
uint32_t *p;
 
uint32_t vb_emit = cmd_buffer->state.gfx.vb_dirty & pipeline->vb_used;
+   if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE)
+  vb_emit |= pipeline->vb_used;
 
assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0);
 
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   >