[Mesa-dev] [PATCH 07/11] freedreno/ir3: Use the separated dead write vars pass

2018-09-14 Thread Caio Marcelo de Oliveira Filho
No changes to shader-db expected.
---
 src/gallium/drivers/freedreno/ir3/ir3_nir.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir.c 
b/src/gallium/drivers/freedreno/ir3/ir3_nir.c
index db1d74fdee7..d5f42f2a231 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_nir.c
@@ -94,6 +94,7 @@ ir3_optimize_loop(nir_shader *s)
 
OPT_V(s, nir_lower_vars_to_ssa);
progress |= OPT(s, nir_opt_copy_prop_vars);
+   progress |= OPT(s, nir_opt_dead_write_vars);
progress |= OPT(s, nir_lower_alu_to_scalar);
progress |= OPT(s, nir_lower_phis_to_scalar);
 
-- 
2.19.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 09/11] nir: Add tests for copy propagation of derefs

2018-09-14 Thread Caio Marcelo de Oliveira Filho
Also tests for removal of redundant loads, that we currently handle as
part of the copy propagation.
---
 src/compiler/nir/tests/vars_tests.cpp | 300 ++
 1 file changed, 300 insertions(+)

diff --git a/src/compiler/nir/tests/vars_tests.cpp 
b/src/compiler/nir/tests/vars_tests.cpp
index cdd2a17fe92..b1fa04b5cb9 100644
--- a/src/compiler/nir/tests/vars_tests.cpp
+++ b/src/compiler/nir/tests/vars_tests.cpp
@@ -140,11 +140,131 @@ nir_imm_ivec2(nir_builder *build, int x, int y)
 }
 
 /* Allow grouping the tests while still sharing the helpers. */
+class nir_redundant_load_vars_test : public nir_vars_test {};
 class nir_copy_prop_vars_test : public nir_vars_test {};
 class nir_dead_write_vars_test : public nir_vars_test {};
 
 } // namespace
 
+TEST_F(nir_redundant_load_vars_test, duplicated_load)
+{
+   /* Load a variable twice in the same block.  One should be removed. */
+
+   nir_variable *in = create_int(nir_var_shader_in, "in");
+   nir_variable **out = create_many_int(nir_var_shader_out, "out", 2);
+
+   nir_store_var(b, out[0], nir_load_var(b, in), 1);
+   nir_store_var(b, out[1], nir_load_var(b, in), 1);
+
+   nir_validate_shader(b->shader);
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
+
+   bool progress = nir_opt_copy_prop_vars(b->shader);
+   EXPECT_TRUE(progress);
+
+   nir_validate_shader(b->shader);
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
+}
+
+TEST_F(nir_redundant_load_vars_test, DISABLED_duplicated_load_in_two_blocks)
+{
+   /* Load a variable twice in different blocks.  One should be removed. */
+
+   nir_variable *in = create_int(nir_var_shader_in, "in");
+   nir_variable **out = create_many_int(nir_var_shader_out, "out", 2);
+
+   nir_store_var(b, out[0], nir_load_var(b, in), 1);
+
+   /* Forces the stores to be in different blocks. */
+   nir_pop_if(b, nir_push_if(b, nir_imm_int(b, 0)));
+
+   nir_store_var(b, out[1], nir_load_var(b, in), 1);
+
+   nir_validate_shader(b->shader);
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
+
+   bool progress = nir_opt_copy_prop_vars(b->shader);
+   EXPECT_TRUE(progress);
+
+   nir_validate_shader(b->shader);
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
+}
+
+TEST_F(nir_redundant_load_vars_test, DISABLED_invalidate_inside_if_block)
+{
+   /* Load variables, then write to some of then in different branches of the
+* if statement.  They should be invalidated accordingly.
+*/
+
+   nir_variable **g = create_many_int(nir_var_global, "g", 3);
+   nir_variable **out = create_many_int(nir_var_shader_out, "out", 3);
+
+   nir_load_var(b, g[0]);
+   nir_load_var(b, g[1]);
+   nir_load_var(b, g[2]);
+
+   nir_if *if_stmt = nir_push_if(b, nir_imm_int(b, 0));
+   nir_store_var(b, g[0], nir_imm_int(b, 10), 1);
+
+   nir_push_else(b, if_stmt);
+   nir_store_var(b, g[1], nir_imm_int(b, 20), 1);
+
+   nir_pop_if(b, if_stmt);
+
+   nir_store_var(b, out[0], nir_load_var(b, g[0]), 1);
+   nir_store_var(b, out[1], nir_load_var(b, g[1]), 1);
+   nir_store_var(b, out[2], nir_load_var(b, g[2]), 1);
+
+   nir_validate_shader(b->shader);
+
+   bool progress = nir_opt_copy_prop_vars(b->shader);
+   EXPECT_TRUE(progress);
+
+   /* There are 3 initial loads, plus 2 loads for the values invalidated
+* inside the if statement.
+*/
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 5);
+
+   /* We only load g[2] once. */
+   unsigned g2_load_count = 0;
+   nir_intrinsic_instr *load = NULL;
+   for (int i = 0; i < 5; i++) {
+  load = find_next_intrinsic(nir_intrinsic_load_deref, load);
+  if (nir_intrinsic_get_var(load, 0) == g[2])
+ g2_load_count++;
+   }
+   EXPECT_EQ(g2_load_count, 1);
+}
+
+TEST_F(nir_redundant_load_vars_test, invalidate_live_load_in_the_end_of_loop)
+{
+   /* Invalidating a load in the end of loop body will apply to the whole loop
+* body.
+*/
+
+   nir_variable *v = create_int(nir_var_shader_storage, "v");
+
+   nir_load_var(b, v);
+
+   nir_loop *loop = nir_push_loop(b);
+
+   nir_if *if_stmt = nir_push_if(b, nir_imm_int(b, 0));
+   nir_jump(b, nir_jump_break);
+   nir_pop_if(b, if_stmt);
+
+   nir_load_var(b, v);
+   nir_store_var(b, v, nir_imm_int(b, 10), 1);
+
+   nir_pop_loop(b, loop);
+
+   bool progress = nir_opt_copy_prop_vars(b->shader);
+   ASSERT_FALSE(progress);
+}
+
 TEST_F(nir_copy_prop_vars_test, simple_copies)
 {
nir_variable *in   = create_int(nir_var_shader_in,  "in");
@@ -199,6 +319,186 @@ TEST_F(nir_copy_prop_vars_test, simple_store_load)
}
 }
 
+TEST_F(nir_copy_prop_vars_test, store_store_load)
+{
+   nir_variable **v = create_many_ivec2(nir_var_local, "v", 2);
+   unsigned mask = 1 | 2;
+
+   nir_ssa_def *first_value = nir_imm_ivec2(b, 10, 20);
+   nir_store_var(b, v[0], first_value, mask);
+
+   nir_ssa_def *second_value = nir_imm_ivec2(b, 30, 40);
+   nir_store_var(b, v[0], second_value, mask);
+
+   nir_ssa_def *read_value = nir_load_var(b, v[0]);
+   nir_store_var(b, v[1], 

[Mesa-dev] [PATCH 08/11] nir: Remove handling of dead writes from copy_prop_vars

2018-09-14 Thread Caio Marcelo de Oliveira Filho
These are covered by another pass now.
---
 src/compiler/nir/nir_opt_copy_prop_vars.c | 84 +++
 1 file changed, 8 insertions(+), 76 deletions(-)

diff --git a/src/compiler/nir/nir_opt_copy_prop_vars.c 
b/src/compiler/nir/nir_opt_copy_prop_vars.c
index 9fecaf0eeec..5276aa176d8 100644
--- a/src/compiler/nir/nir_opt_copy_prop_vars.c
+++ b/src/compiler/nir/nir_opt_copy_prop_vars.c
@@ -38,10 +38,7 @@
  *  1) Copy-propagation on variables that have indirect access.  This includes
  * propagating from indirect stores into indirect loads.
  *
- *  2) Dead code elimination of store_var and copy_var intrinsics based on
- * killed destination values.
- *
- *  3) Removal of redundant load_deref intrinsics.  We can't trust regular CSE
+ *  2) Removal of redundant load_deref intrinsics.  We can't trust regular CSE
  * to do this because it isn't aware of variable writes that may alias the
  * value and make the former load invalid.
  *
@@ -51,6 +48,8 @@
  * rapidly get out of hand.  Fortunately, for anything that is only ever
  * accessed directly, we get SSA based copy-propagation which is extremely
  * powerful so this isn't that great a loss.
+ *
+ * Removal of dead writes to variables is handled by another pass.
  */
 
 struct value {
@@ -64,9 +63,6 @@ struct value {
 struct copy_entry {
struct list_head link;
 
-   nir_instr *store_instr[4];
-
-   unsigned comps_may_be_read;
struct value src;
 
nir_deref_instr *dst;
@@ -114,44 +110,6 @@ copy_entry_remove(struct copy_prop_var_state *state, 
struct copy_entry *entry)
list_add(>link, >copy_free_list);
 }
 
-static void
-remove_dead_writes(struct copy_prop_var_state *state,
-   struct copy_entry *entry, unsigned write_mask)
-{
-   /* We're overwriting another entry.  Some of it's components may not
-* have been read yet and, if that's the case, we may be able to delete
-* some instructions but we have to be careful.
-*/
-   unsigned dead_comps = write_mask & ~entry->comps_may_be_read;
-
-   for (unsigned mask = dead_comps; mask;) {
-  unsigned i = u_bit_scan();
-
-  nir_instr *instr = entry->store_instr[i];
-
-  /* We may have already deleted it on a previous iteration */
-  if (!instr)
- continue;
-
-  /* See if this instr is used anywhere that it's not dead */
-  bool keep = false;
-  for (unsigned j = 0; j < 4; j++) {
- if (entry->store_instr[j] == instr) {
-if (dead_comps & (1 << j)) {
-   entry->store_instr[j] = NULL;
-} else {
-   keep = true;
-}
- }
-  }
-
-  if (!keep) {
- nir_instr_remove(instr);
- state->progress = true;
-  }
-   }
-}
-
 static struct copy_entry *
 lookup_entry_for_deref(struct copy_prop_var_state *state,
nir_deref_instr *deref,
@@ -165,16 +123,6 @@ lookup_entry_for_deref(struct copy_prop_var_state *state,
return NULL;
 }
 
-static void
-mark_aliased_entries_as_read(struct copy_prop_var_state *state,
- nir_deref_instr *deref, unsigned components)
-{
-   list_for_each_entry(struct copy_entry, iter, >copies, link) {
-  if (nir_compare_derefs(iter->dst, deref) & nir_derefs_may_alias_bit)
- iter->comps_may_be_read |= components;
-   }
-}
-
 static struct copy_entry *
 get_entry_and_kill_aliases(struct copy_prop_var_state *state,
nir_deref_instr *deref,
@@ -191,11 +139,6 @@ get_entry_and_kill_aliases(struct copy_prop_var_state 
*state,
   }
 
   nir_deref_compare_result comp = nir_compare_derefs(iter->dst, deref);
-  /* This is a store operation.  If we completely overwrite some value, we
-   * want to delete any dead writes that may be present.
-   */
-  if (comp & nir_derefs_b_contains_a_bit)
- remove_dead_writes(state, iter, write_mask);
 
   if (comp & nir_derefs_equal_bit) {
  assert(entry == NULL);
@@ -228,25 +171,19 @@ apply_barrier_for_modes(struct copy_prop_var_state *state,
 
 static void
 store_to_entry(struct copy_prop_var_state *state, struct copy_entry *entry,
-   const struct value *value, unsigned write_mask,
-   nir_instr *store_instr)
+   const struct value *value, unsigned write_mask)
 {
-   entry->comps_may_be_read &= ~write_mask;
if (value->is_ssa) {
   entry->src.is_ssa = true;
   /* Only overwrite the written components */
   for (unsigned i = 0; i < 4; i++) {
- if (write_mask & (1 << i)) {
-entry->store_instr[i] = store_instr;
+ if (write_mask & (1 << i))
 entry->src.ssa[i] = value->ssa[i];
- }
   }
} else {
   /* Non-ssa stores always write everything */
   entry->src.is_ssa = false;
   entry->src.deref = value->deref;
-  for (unsigned i = 0; i < 4; i++)
- entry->store_instr[i] = store_instr;
}
 }
 
@@ -490,9 +427,6 @@ 

[Mesa-dev] [PATCH 11/11] nir: Copy propagation between blocks

2018-09-14 Thread Caio Marcelo de Oliveira Filho
Extend the pass to propagate the copies information along the control
flow graph.  It performs two walks, first it collects the vars
that were written inside each node. Then it walks applying the copy
propagation using a list of copies previously available.  At each node
the list is invalidated according to results from the first walk.

This approach is simpler than a full data-flow analysis, but covers
various cases.  If derefs are used for operating on more memory
resources (e.g. SSBOs), the difference from a regular pass is expected
to be more visible -- as the SSA copy propagation pass won't apply to
those.

A full data-flow analysis would handle more scenarios: conditional
breaks in the control flow and merge equivalent effects from multiple
branches (e.g. using a phi node to merge the source for writes to the
same deref).  However, as previous commentary in the code stated, its
complexity 'rapidly get out of hand'.  The current patch is a good
intermediate step towards more complex analysis.

The 'copies' linked list was modified to use util_dynarray to make it
more convenient to clone it (to handle ifs/loops).

Annotated shader-db results for Skylake:

total instructions in shared programs: 15105796 -> 15105451 (<.01%)
instructions in affected programs: 152293 -> 151948 (-0.23%)
helped: 96
HURT: 17

All the HURTs and many HELPs are one instruction.  Looking
at pass by pass outputs, the copy prop kicks in removing a
bunch of loads correctly, which ends up altering what other
other optimizations kick.  In those cases the copies would be
propagated after lowering to SSA.

In few HELPs we are actually helping doing more than was
possible previously, e.g. consolidating load_uniforms from
different blocks.  Most of those are from
shaders/dolphin/ubershaders/.

total cycles in shared programs: 566048861 -> 565954876 (-0.02%)
cycles in affected programs: 151461830 -> 151367845 (-0.06%)
helped: 2933
HURT: 2950

A lot of noise on both sides.

total loops in shared programs: 4603 -> 4603 (0.00%)
loops in affected programs: 0 -> 0
helped: 0
HURT: 0

total spills in shared programs: 11085 -> 11073 (-0.11%)
spills in affected programs: 23 -> 11 (-52.17%)
helped: 1
HURT: 0

The shaders/dolphin/ubershaders/12.shader_test was able to
pull a couple of loads from inside if statements and reuse
them.

total fills in shared programs: 23143 -> 23089 (-0.23%)
fills in affected programs: 2718 -> 2664 (-1.99%)
helped: 27
HURT: 0

All from shaders/dolphin/ubershaders/.

LOST:   0
GAINED: 0

The other generations follow the same overall shape.  The spills and
fills HURTs are all from the same game.

shader-db results for Broadwell.

total instructions in shared programs: 15402037 -> 15401841 (<.01%)
instructions in affected programs: 144386 -> 144190 (-0.14%)
helped: 86
HURT: 9

total cycles in shared programs: 600912755 -> 600902486 (<.01%)
cycles in affected programs: 185662820 -> 185652551 (<.01%)
helped: 2598
HURT: 3053

total loops in shared programs: 4579 -> 4579 (0.00%)
loops in affected programs: 0 -> 0
helped: 0
HURT: 0

total spills in shared programs: 80929 -> 80924 (<.01%)
spills in affected programs: 720 -> 715 (-0.69%)
helped: 1
HURT: 5

total fills in shared programs: 93057 -> 93013 (-0.05%)
fills in affected programs: 3398 -> 3354 (-1.29%)
helped: 27
HURT: 5

LOST:   0
GAINED: 2

shader-db results for Haswell:

total instructions in shared programs: 9231975 -> 9230357 (-0.02%)
instructions in affected programs: 44992 -> 43374 (-3.60%)
helped: 27
HURT: 69

total cycles in shared programs: 87760587 -> 87727502 (-0.04%)
cycles in affected programs: 7720673 -> 7687588 (-0.43%)
helped: 1609
HURT: 1416

total loops in shared programs: 1830 -> 1830 (0.00%)
loops in affected programs: 0 -> 0
helped: 0
HURT: 0

total spills in shared programs: 1988 -> 1692 (-14.89%)
spills in affected programs: 296 -> 0
helped: 1
HURT: 0

total fills in shared programs: 2103 -> 1668 (-20.68%)
fills in affected programs: 438 -> 3 (-99.32%)
helped: 4
HURT: 0

LOST:   0
GAINED: 1
---
 src/compiler/nir/nir_opt_copy_prop_vars.c | 394 +-
 1 file changed, 317 insertions(+), 77 deletions(-)

diff --git a/src/compiler/nir/nir_opt_copy_prop_vars.c 
b/src/compiler/nir/nir_opt_copy_prop_vars.c
index f58abfbb69f..966ccbdec53 100644
--- a/src/compiler/nir/nir_opt_copy_prop_vars.c
+++ b/src/compiler/nir/nir_opt_copy_prop_vars.c
@@ -26,6 +26,7 @@
 #include "nir_deref.h"
 
 #include "util/bitscan.h"
+#include "util/u_dynarray.h"
 
 /**
  * Variable-based copy propagation
@@ -42,16 +43,21 @@
  * to do this because it isn't aware of variable writes that may 

[Mesa-dev] [PATCH 06/11] intel/nir: Use the separated dead write vars pass

2018-09-14 Thread Caio Marcelo de Oliveira Filho
No changes to shader-db.
---
 src/intel/compiler/brw_nir.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index b38c3ba383d..77938efae31 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -553,6 +553,7 @@ brw_nir_optimize(nir_shader *nir, const struct brw_compiler 
*compiler,
  OPT(nir_opt_find_array_copies);
   }
   OPT(nir_opt_copy_prop_vars);
+  OPT(nir_opt_dead_write_vars);
 
   if (is_scalar) {
  OPT(nir_lower_alu_to_scalar);
-- 
2.19.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/11] nir: Separate dead write removal into its own pass

2018-09-14 Thread Caio Marcelo de Oliveira Filho
Instead of doing this as part of the existing copy_prop_vars pass.

Separation makes easier to expand the scope of both passes to be more
than per-block.  For copy propagation, the information about valid
copies comes from previous instructions; while the dead write removal
depends on information from later instructions ("have any instruction
used this deref before overwrite it?").

Also change the tests to use this pass (instead of copy prop vars).
Note that the disabled tests continue to fail, since the standalone
pass is still per-block.

v2: Remove entries from dynarray instead of marking items as
deleted.  Use foreach_reverse. (Caio)

(all from Jason)
Do not cache nir_deref_path.  Not worthy for this patch.
Clear unused writes when hitting a call instruction.
Clean up enumeration of modes for barriers.
Move metadata calls to the inner function.
---
 src/compiler/Makefile.sources  |   1 +
 src/compiler/nir/meson.build   |   1 +
 src/compiler/nir/nir.h |   2 +
 src/compiler/nir/nir_opt_dead_write_vars.c | 216 +
 src/compiler/nir/tests/vars_tests.cpp  |   3 -
 5 files changed, 220 insertions(+), 3 deletions(-)
 create mode 100644 src/compiler/nir/nir_opt_dead_write_vars.c

diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index d3b06564832..b65bb9b80b9 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -274,6 +274,7 @@ NIR_FILES = \
nir/nir_opt_cse.c \
nir/nir_opt_dce.c \
nir/nir_opt_dead_cf.c \
+   nir/nir_opt_dead_write_vars.c \
nir/nir_opt_find_array_copies.c \
nir/nir_opt_gcm.c \
nir/nir_opt_global_to_local.c \
diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
index 1a7fa2d3327..d8f65640004 100644
--- a/src/compiler/nir/meson.build
+++ b/src/compiler/nir/meson.build
@@ -158,6 +158,7 @@ files_libnir = files(
   'nir_opt_cse.c',
   'nir_opt_dce.c',
   'nir_opt_dead_cf.c',
+  'nir_opt_dead_write_vars.c',
   'nir_opt_find_array_copies.c',
   'nir_opt_gcm.c',
   'nir_opt_global_to_local.c',
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 599f469a714..80d145cac1e 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -3030,6 +3030,8 @@ bool nir_opt_dce(nir_shader *shader);
 
 bool nir_opt_dead_cf(nir_shader *shader);
 
+bool nir_opt_dead_write_vars(nir_shader *shader);
+
 bool nir_opt_find_array_copies(nir_shader *shader);
 
 bool nir_opt_gcm(nir_shader *shader, bool value_number);
diff --git a/src/compiler/nir/nir_opt_dead_write_vars.c 
b/src/compiler/nir/nir_opt_dead_write_vars.c
new file mode 100644
index 000..5a3145875cb
--- /dev/null
+++ b/src/compiler/nir/nir_opt_dead_write_vars.c
@@ -0,0 +1,216 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+#include "nir_deref.h"
+
+#include "util/u_dynarray.h"
+
+/**
+ * Elimination of dead writes based on derefs.
+ *
+ * Dead writes are stores and copies that write to a deref, which then gets
+ * another write before it was used (read or sourced for a copy).  Those
+ * writes can be removed since they don't affect anything.
+ *
+ * For derefs that refer to a memory area that can be read after the program,
+ * the last write is considered used.  The presence of certain instructions
+ * may also cause writes to be considered used, e.g. memory barrier (in this 
case
+ * the value must be written as other thread might use it).
+ *
+ * The write mask for store instructions is considered, so it is possible that
+ * a store is removed because of the combination of other stores overwritten
+ * its value.
+ */
+
+/* Entry for unused_writes arrays. */
+struct write_entry {
+   /* If NULL indicates the entry is free to be reused. */
+   nir_intrinsic_instr 

[Mesa-dev] [PATCH 10/11] nir: Take call instruction into account in copy_prop_vars

2018-09-14 Thread Caio Marcelo de Oliveira Filho
Calls are not used yet (functions are inlined), but since new code is
already taking them into account, do it here too.  The convention here
and in other places is that no writable memory is assumed to remain
unchanged, as well as global variables.

Also, explicitly state the modes affected (instead of using the
reverse logic) in one of the apply_for_barrier_modes calls.

Suggested (indirectly) by Jason.
---

Jason suggested this for the other pass, so doing this here too.

 src/compiler/nir/nir_opt_copy_prop_vars.c | 17 +++--
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/compiler/nir/nir_opt_copy_prop_vars.c 
b/src/compiler/nir/nir_opt_copy_prop_vars.c
index 5276aa176d8..f58abfbb69f 100644
--- a/src/compiler/nir/nir_opt_copy_prop_vars.c
+++ b/src/compiler/nir/nir_opt_copy_prop_vars.c
@@ -404,6 +404,14 @@ copy_prop_vars_block(struct copy_prop_var_state *state,
   copy_entry_remove(state, iter);
 
nir_foreach_instr_safe(instr, block) {
+  if (instr->type == nir_instr_type_call) {
+ apply_barrier_for_modes(copies, nir_var_shader_out |
+ nir_var_global |
+ nir_var_shader_storage |
+ nir_var_shared);
+ continue;
+  }
+
   if (instr->type != nir_instr_type_intrinsic)
  continue;
 
@@ -411,12 +419,9 @@ copy_prop_vars_block(struct copy_prop_var_state *state,
   switch (intrin->intrinsic) {
   case nir_intrinsic_barrier:
   case nir_intrinsic_memory_barrier:
- /* If we hit a barrier, we need to trash everything that may possibly
-  * be accessible to another thread.  Locals, globals, and things of
-  * the like are safe, however.
-  */
- apply_barrier_for_modes(state, ~(nir_var_local | nir_var_global |
-  nir_var_shader_in | 
nir_var_uniform));
+ apply_barrier_for_modes(copies, nir_var_shader_out |
+ nir_var_shader_storage |
+ nir_var_shared);
  break;
 
   case nir_intrinsic_emit_vertex:
-- 
2.19.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/11] util: Add macro to get number of elements in dynarray

2018-09-14 Thread Caio Marcelo de Oliveira Filho
---

I've ended up not using this macro in this series, but it is useful
for other cases, so kept it here.

 src/util/u_dynarray.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/util/u_dynarray.h b/src/util/u_dynarray.h
index f74bfc7080b..53dde9241bb 100644
--- a/src/util/u_dynarray.h
+++ b/src/util/u_dynarray.h
@@ -149,6 +149,7 @@ util_dynarray_trim(struct util_dynarray *buf)
 #define util_dynarray_element(buf, type, idx) ((type*)(buf)->data + (idx))
 #define util_dynarray_begin(buf) ((buf)->data)
 #define util_dynarray_end(buf) ((void*)util_dynarray_element((buf), char, 
(buf)->size))
+#define util_dynarray_num_elements(buf, type) ((buf)->size / sizeof(type))
 
 #define util_dynarray_foreach(buf, type, elem) \
for (type *elem = (type *)(buf)->data; \
-- 
2.19.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/11] nir: Add tests for dead write elimination

2018-09-14 Thread Caio Marcelo de Oliveira Filho
Note at the moment the pass called is nir_opt_copy_prop_vars, because
dead write elimination is implemented there.

Also added tests that involve identifying dead writes in multiple
blocks (e.g. the overwrite happens in another block).  Those currently
fail as expected, so are marked to be skipped.
---
 src/compiler/nir/tests/vars_tests.cpp | 241 ++
 1 file changed, 241 insertions(+)

diff --git a/src/compiler/nir/tests/vars_tests.cpp 
b/src/compiler/nir/tests/vars_tests.cpp
index 7fbdb514349..dd913f04429 100644
--- a/src/compiler/nir/tests/vars_tests.cpp
+++ b/src/compiler/nir/tests/vars_tests.cpp
@@ -26,6 +26,9 @@
 #include "nir.h"
 #include "nir_builder.h"
 
+/* This optimization is done together with copy propagation. */
+#define nir_opt_dead_write_vars nir_opt_copy_prop_vars
+
 namespace {
 
 class nir_vars_test : public ::testing::Test {
@@ -141,6 +144,7 @@ nir_imm_ivec2(nir_builder *build, int x, int y)
 
 /* Allow grouping the tests while still sharing the helpers. */
 class nir_copy_prop_vars_test : public nir_vars_test {};
+class nir_dead_write_vars_test : public nir_vars_test {};
 
 } // namespace
 
@@ -197,3 +201,240 @@ TEST_F(nir_copy_prop_vars_test, simple_store_load)
   EXPECT_EQ(store->src[1].ssa, stored_value);
}
 }
+
+TEST_F(nir_dead_write_vars_test, no_dead_writes_in_block)
+{
+   nir_variable **v = create_many_int(nir_var_shader_storage, "v", 2);
+
+   nir_store_var(b, v[0], nir_load_var(b, v[1]), 1);
+
+   bool progress = nir_opt_dead_write_vars(b->shader);
+   ASSERT_FALSE(progress);
+}
+
+TEST_F(nir_dead_write_vars_test, no_dead_writes_different_components_in_block)
+{
+   nir_variable **v = create_many_ivec2(nir_var_shader_storage, "v", 3);
+
+   nir_store_var(b, v[0], nir_load_var(b, v[1]), 1 << 0);
+   nir_store_var(b, v[0], nir_load_var(b, v[2]), 1 << 1);
+
+   bool progress = nir_opt_dead_write_vars(b->shader);
+   ASSERT_FALSE(progress);
+}
+
+TEST_F(nir_dead_write_vars_test, no_dead_writes_in_if_statement)
+{
+   nir_variable **v = create_many_int(nir_var_shader_storage, "v", 6);
+
+   nir_store_var(b, v[2], nir_load_var(b, v[0]), 1);
+   nir_store_var(b, v[3], nir_load_var(b, v[1]), 1);
+
+   /* Each arm of the if statement will overwrite one store. */
+   nir_if *if_stmt = nir_push_if(b, nir_imm_int(b, 0));
+   nir_store_var(b, v[2], nir_load_var(b, v[4]), 1);
+
+   nir_push_else(b, if_stmt);
+   nir_store_var(b, v[3], nir_load_var(b, v[5]), 1);
+
+   nir_pop_if(b, if_stmt);
+
+   bool progress = nir_opt_dead_write_vars(b->shader);
+   ASSERT_FALSE(progress);
+}
+
+TEST_F(nir_dead_write_vars_test, no_dead_writes_in_loop_statement)
+{
+   nir_variable **v = create_many_int(nir_var_shader_storage, "v", 3);
+
+   nir_store_var(b, v[0], nir_load_var(b, v[1]), 1);
+
+   /* Loop will write other value.  Since it might not be executed, it doesn't
+* kill the first write.
+*/
+   nir_loop *loop = nir_push_loop(b);
+
+   nir_if *if_stmt = nir_push_if(b, nir_imm_int(b, 0));
+   nir_jump(b, nir_jump_break);
+   nir_pop_if(b, if_stmt);
+
+   nir_store_var(b, v[0], nir_load_var(b, v[2]), 1);
+   nir_pop_loop(b, loop);
+
+   bool progress = nir_opt_dead_write_vars(b->shader);
+   ASSERT_FALSE(progress);
+}
+
+TEST_F(nir_dead_write_vars_test, dead_write_in_block)
+{
+   nir_variable **v = create_many_int(nir_var_shader_storage, "v", 3);
+
+   nir_store_var(b, v[0], nir_load_var(b, v[1]), 1);
+   nir_ssa_def *load_v2 = nir_load_var(b, v[2]);
+   nir_store_var(b, v[0], load_v2, 1);
+
+   bool progress = nir_opt_dead_write_vars(b->shader);
+   ASSERT_TRUE(progress);
+
+   EXPECT_EQ(1, count_intrinsics(nir_intrinsic_store_deref));
+
+   nir_intrinsic_instr *store = find_next_intrinsic(nir_intrinsic_store_deref, 
NULL);
+   ASSERT_TRUE(store->src[1].is_ssa);
+   EXPECT_EQ(store->src[1].ssa, load_v2);
+}
+
+TEST_F(nir_dead_write_vars_test, dead_write_components_in_block)
+{
+   nir_variable **v = create_many_ivec2(nir_var_shader_storage, "v", 3);
+
+   nir_store_var(b, v[0], nir_load_var(b, v[1]), 1 << 0);
+   nir_ssa_def *load_v2 = nir_load_var(b, v[2]);
+   nir_store_var(b, v[0], load_v2, 1 << 0);
+
+   bool progress = nir_opt_dead_write_vars(b->shader);
+   ASSERT_TRUE(progress);
+
+   EXPECT_EQ(1, count_intrinsics(nir_intrinsic_store_deref));
+
+   nir_intrinsic_instr *store = find_next_intrinsic(nir_intrinsic_store_deref, 
NULL);
+   ASSERT_TRUE(store->src[1].is_ssa);
+   EXPECT_EQ(store->src[1].ssa, load_v2);
+}
+
+
+/* TODO: The DISABLED tests below depend on the dead write removal be able to
+ * identify dead writes between multiple blocks.  This is still not
+ * implemented.
+ */
+
+TEST_F(nir_dead_write_vars_test, DISABLED_dead_write_in_two_blocks)
+{
+   nir_variable **v = create_many_int(nir_var_shader_storage, "v", 3);
+
+   nir_store_var(b, v[0], nir_load_var(b, v[1]), 1);
+   nir_ssa_def *load_v2 = nir_load_var(b, v[2]);
+
+   /* Causes the stores to be in different blocks. */
+   nir_pop_if(b, nir_push_if(b, nir_imm_int(b, 0)));
+
+   

[Mesa-dev] [PATCH 03/11] nir: Add test file for vars related passes

2018-09-14 Thread Caio Marcelo de Oliveira Filho
Add basic helpers for doing tests on the vars related optimization
passes.  The main goal is to lower the barrier to create tests during
development and debugging of the passes.  Full coverage is not a
requirement.
---
 src/compiler/Makefile.nir.am  |  34 +++--
 src/compiler/nir/meson.build  |  11 ++
 src/compiler/nir/tests/vars_tests.cpp | 199 ++
 3 files changed, 233 insertions(+), 11 deletions(-)
 create mode 100644 src/compiler/nir/tests/vars_tests.cpp

diff --git a/src/compiler/Makefile.nir.am b/src/compiler/Makefile.nir.am
index 4ccd7f36be9..c646c6bdc1e 100644
--- a/src/compiler/Makefile.nir.am
+++ b/src/compiler/Makefile.nir.am
@@ -60,25 +60,37 @@ nir/nir_opt_algebraic.c: nir/nir_opt_algebraic.py 
nir/nir_algebraic.py
$(MKDIR_GEN)
$(PYTHON_GEN) $(srcdir)/nir/nir_opt_algebraic.py > $@ || ($(RM) $@; 
false)
 
-check_PROGRAMS += nir/tests/control_flow_tests
+check_PROGRAMS += \
+   nir/tests/control_flow_tests \
+   nir/tests/vars_tests
 
-nir_tests_control_flow_tests_CPPFLAGS = \
+NIR_TESTS_CPPFLAGS = \
$(AM_CPPFLAGS) \
-I$(top_builddir)/src/compiler/nir \
-I$(top_srcdir)/src/compiler/nir
-
-nir_tests_control_flow_tests_SOURCES = \
-   nir/tests/control_flow_tests.cpp
-nir_tests_control_flow_tests_CFLAGS =  \
+NIR_TESTS_CFLAGS = \
$(PTHREAD_CFLAGS)
-nir_tests_control_flow_tests_LDADD =   \
-   $(top_builddir)/src/gtest/libgtest.la   \
-   nir/libnir.la   \
-   $(top_builddir)/src/util/libmesautil.la \
+NIR_TESTS_LDADD = \
+   $(top_builddir)/src/gtest/libgtest.la \
+   nir/libnir.la \
+   $(top_builddir)/src/util/libmesautil.la \
$(PTHREAD_LIBS)
 
 
-TESTS += nir/tests/control_flow_tests
+nir_tests_control_flow_tests_CPPFLAGS = $(NIR_TESTS_CPPFLAGS)
+nir_tests_control_flow_tests_SOURCES = nir/tests/control_flow_tests.cpp
+nir_tests_control_flow_tests_CFLAGS = $(NIR_TESTS_CFLAGS)
+nir_tests_control_flow_tests_LDADD = $(NIR_TESTS_LDADD)
+
+nir_tests_vars_tests_CPPFLAGS = $(NIR_TESTS_CPPFLAGS)
+nir_tests_vars_tests_SOURCES = nir/tests/vars_tests.cpp
+nir_tests_vars_tests_CFLAGS = $(NIR_TESTS_CFLAGS)
+nir_tests_vars_tests_LDADD = $(NIR_TESTS_LDADD)
+
+
+TESTS += \
+nir/tests/control_flow_tests \
+nir/tests/vars_tests
 
 
 BUILT_SOURCES += \
diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
index 090aa7a628f..1a7fa2d3327 100644
--- a/src/compiler/nir/meson.build
+++ b/src/compiler/nir/meson.build
@@ -245,4 +245,15 @@ if with_tests
   link_with : libmesa_util,
 )
   )
+  test(
+'nir_vars',
+executable(
+  'nir_vars_test',
+  files('tests/vars_tests.cpp'),
+  cpp_args : [cpp_vis_args, cpp_msvc_compat_args],
+  include_directories : [inc_common],
+  dependencies : [dep_thread, idep_gtest, idep_nir],
+  link_with : libmesa_util,
+)
+  )
 endif
diff --git a/src/compiler/nir/tests/vars_tests.cpp 
b/src/compiler/nir/tests/vars_tests.cpp
new file mode 100644
index 000..7fbdb514349
--- /dev/null
+++ b/src/compiler/nir/tests/vars_tests.cpp
@@ -0,0 +1,199 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include 
+
+#include "nir.h"
+#include "nir_builder.h"
+
+namespace {
+
+class nir_vars_test : public ::testing::Test {
+protected:
+   nir_vars_test();
+   ~nir_vars_test();
+
+   nir_variable *create_int(nir_variable_mode mode, const char *name) {
+  if (mode == nir_var_local)
+ return nir_local_variable_create(b->impl, glsl_int_type(), name);
+  return nir_variable_create(b->shader, mode, glsl_int_type(), name);
+   }
+
+   nir_variable *create_ivec2(nir_variable_mode mode, const char *name) {
+  const glsl_type *var_type = glsl_vector_type(GLSL_TYPE_INT, 2);
+  if (mode == nir_var_local)
+   

[Mesa-dev] [PATCH 00/11] NIR Copy Propagation between blocks

2018-09-14 Thread Caio Marcelo de Oliveira Filho
This series supersedes the "Global dead write vars removal pass".

The goal here is to perform copy propagation among values in different
blocks.  While this has currently small benefits (it effectively
helped some cases with uniforms), as we move other resources to be
addressed with derefs (e.g. SSBOs), we expect it to be more useful.
In particular with compute shaders.

To be able to do this I had to extract the dead write removal from the
copy propagation pass.  When performing more than per-block, the
information flows in different way for that optimization (backwards),
so it helps to keep them separated.

The pass uses an approach similar to what we do in GLSL copy prop.  We
propagate values forward following the control flow graph.  It doesn't
try to merge values from different branches or handle more detailed
control flow.  I think this approach is a good intermediate step.

I've experimented with various approaches to implement a full
data-flow analysis, but all of them ended up either too complex or too
messy.  Some factors to that were: (a) we have load/stores and copies,
so a value in ACP needs to be "broken up into pieces", (b) copies with
wildcards force us to take into consideration whether derefs are
contained or not, at many levels, (c) we have writemasks (for the
vectors) associated.

In particular (b) made the deref_map tree-based structure I've
discussed elsewhere not as good as I've expected.  Because we want to
keep track of "a[*].x", "a[1].x" and "a[indirect].x", the walk on the
tree is not linear on the size of the deref.

A future idea I'll explore is trying to split the problem in different
pieces, directed by the inputs we see. E.g. maybe a data-flow analysis
only of the copies, or only the fully qualified load/stores, or handle
only scalars (after a vec to scalar pass).

For now, I've shelved the global optimization for dead write removal.
It wasn't helping any cases, so will wait until we have more derefs
around to see the difference.

Caio Marcelo de Oliveira Filho (11):
  util: Add foreach_reverse for dynarray
  util: Add macro to get number of elements in dynarray
  nir: Add test file for vars related passes
  nir: Add tests for dead write elimination
  nir: Separate dead write removal into its own pass
  intel/nir: Use the separated dead write vars pass
  freedreno/ir3: Use the separated dead write vars pass
  nir: Remove handling of dead writes from copy_prop_vars
  nir: Add tests for copy propagation of derefs
  nir: Take call instruction into account in copy_prop_vars
  nir: Copy propagation between blocks

 src/compiler/Makefile.nir.am|  34 +-
 src/compiler/Makefile.sources   |   1 +
 src/compiler/nir/meson.build|  12 +
 src/compiler/nir/nir.h  |   2 +
 src/compiler/nir/nir_opt_copy_prop_vars.c   | 481 +
 src/compiler/nir/nir_opt_dead_write_vars.c  | 216 ++
 src/compiler/nir/tests/vars_tests.cpp   | 737 
 src/gallium/drivers/freedreno/ir3/ir3_nir.c |   1 +
 src/intel/compiler/brw_nir.c|   1 +
 src/util/u_dynarray.h   |   7 +
 10 files changed, 1329 insertions(+), 163 deletions(-)
 create mode 100644 src/compiler/nir/nir_opt_dead_write_vars.c
 create mode 100644 src/compiler/nir/tests/vars_tests.cpp

-- 
2.19.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/11] util: Add foreach_reverse for dynarray

2018-09-14 Thread Caio Marcelo de Oliveira Filho
Useful to walk the array removing elements by swapping them with the
last element.
---
 src/util/u_dynarray.h | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/util/u_dynarray.h b/src/util/u_dynarray.h
index 6bea481d44b..f74bfc7080b 100644
--- a/src/util/u_dynarray.h
+++ b/src/util/u_dynarray.h
@@ -154,6 +154,12 @@ util_dynarray_trim(struct util_dynarray *buf)
for (type *elem = (type *)(buf)->data; \
 elem < (type *)((char *)(buf)->data + (buf)->size); elem++)
 
+#define util_dynarray_foreach_reverse(buf, type, elem)  \
+   if ((buf)->size > 0) \
+  for (type *elem = util_dynarray_top_ptr(buf, type);   \
+   elem >= (type *)(buf)->data; \
+   elem--)
+
 #define util_dynarray_delete_unordered(buf, type, v)\
do { \
   unsigned num_elements = (buf)->size / sizeof(type);   \
-- 
2.19.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Adding support for EXT_sRGB for Opengl ES

2018-09-14 Thread Jacob Lifshay
Any progress on adding EXT_sRGB support to Mesa?

Jacob Lifshay
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 78123] svga prints out command errors

2018-09-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=78123

--- Comment #7 from Brian Paul  ---
John, is this still an issue?

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/5] anv/query: Add an emit_srm helper

2018-09-14 Thread Jason Ekstrand
---
 src/intel/vulkan/genX_query.c | 53 ++-
 1 file changed, 21 insertions(+), 32 deletions(-)

diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c
index 4ccbe2975de..7533ec05095 100644
--- a/src/intel/vulkan/genX_query.c
+++ b/src/intel/vulkan/genX_query.c
@@ -310,6 +310,22 @@ VkResult genX(GetQueryPoolResults)(
return status;
 }
 
+static void
+emit_srm32(struct anv_batch *batch, struct anv_address addr, uint32_t reg)
+{
+   anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), srm) {
+  srm.MemoryAddress= addr;
+  srm.RegisterAddress  = reg;
+   }
+}
+
+static void
+emit_srm64(struct anv_batch *batch, struct anv_address addr, uint32_t reg)
+{
+   emit_srm32(batch, anv_address_add(addr, 0), reg + 0);
+   emit_srm32(batch, anv_address_add(addr, 4), reg + 4);
+}
+
 static void
 emit_ps_depth_count(struct anv_cmd_buffer *cmd_buffer,
 struct anv_address addr)
@@ -394,16 +410,7 @@ emit_pipeline_stat(struct anv_cmd_buffer *cmd_buffer, 
uint32_t stat,
  (1 << ARRAY_SIZE(vk_pipeline_stat_to_reg)) - 1);
 
assert(stat < ARRAY_SIZE(vk_pipeline_stat_to_reg));
-   uint32_t reg = vk_pipeline_stat_to_reg[stat];
-
-   anv_batch_emit(_buffer->batch, GENX(MI_STORE_REGISTER_MEM), lrm) {
-  lrm.RegisterAddress  = reg;
-  lrm.MemoryAddress= anv_address_add(addr, 0);
-   }
-   anv_batch_emit(_buffer->batch, GENX(MI_STORE_REGISTER_MEM), lrm) {
-  lrm.RegisterAddress  = reg + 4;
-  lrm.MemoryAddress= anv_address_add(addr, 4);
-   }
+   emit_srm64(_buffer->batch, addr, vk_pipeline_stat_to_reg[stat]);
 }
 
 void genX(CmdBeginQuery)(
@@ -515,14 +522,7 @@ void genX(CmdWriteTimestamp)(
 
switch (pipelineStage) {
case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT:
-  anv_batch_emit(_buffer->batch, GENX(MI_STORE_REGISTER_MEM), srm) {
- srm.RegisterAddress  = TIMESTAMP;
- srm.MemoryAddress= anv_address_add(query_addr, 8);
-  }
-  anv_batch_emit(_buffer->batch, GENX(MI_STORE_REGISTER_MEM), srm) {
- srm.RegisterAddress  = TIMESTAMP + 4;
- srm.MemoryAddress= anv_address_add(query_addr, 12);
-  }
+  emit_srm64(_buffer->batch, anv_address_add(query_addr, 8), 
TIMESTAMP);
   break;
 
default:
@@ -689,21 +689,10 @@ gpu_write_query_result(struct anv_batch *batch,
VkQueryResultFlags flags,
uint32_t value_index, uint32_t reg)
 {
-   if (flags & VK_QUERY_RESULT_64_BIT)
-  dst_addr = anv_address_add(dst_addr, value_index * 8);
-   else
-  dst_addr = anv_address_add(dst_addr, value_index * 4);
-
-   anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), srm) {
-  srm.RegisterAddress  = reg;
-  srm.MemoryAddress= anv_address_add(dst_addr, 0);
-   }
-
if (flags & VK_QUERY_RESULT_64_BIT) {
-  anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), srm) {
- srm.RegisterAddress  = reg + 4;
- srm.MemoryAddress= anv_address_add(dst_addr, 4);
-  }
+  emit_srm64(batch, anv_address_add(dst_addr, value_index * 8), reg);
+   } else {
+  emit_srm32(batch, anv_address_add(dst_addr, value_index * 4), reg);
}
 }
 
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/5] anv/query: Use anv_address everywhere

2018-09-14 Thread Jason Ekstrand
Instead of passing around BOs and offsets, use addresses which are anv's
GPU equivalent of pointers.
---
 src/intel/vulkan/genX_query.c | 121 ++
 1 file changed, 64 insertions(+), 57 deletions(-)

diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c
index 817a3a3c4e2..56d18e021e4 100644
--- a/src/intel/vulkan/genX_query.c
+++ b/src/intel/vulkan/genX_query.c
@@ -139,6 +139,15 @@ void genX(DestroyQueryPool)(
vk_free2(>alloc, pAllocator, pool);
 }
 
+static struct anv_address
+anv_query_address(struct anv_query_pool *pool, uint32_t query)
+{
+   return (struct anv_address) {
+  .bo = >bo,
+  .offset = query * pool->stride,
+   };
+}
+
 static void
 cpu_write_query_result(void *dst_slot, VkQueryResultFlags flags,
uint32_t value_index, uint64_t result)
@@ -303,13 +312,13 @@ VkResult genX(GetQueryPoolResults)(
 
 static void
 emit_ps_depth_count(struct anv_cmd_buffer *cmd_buffer,
-struct anv_bo *bo, uint32_t offset)
+struct anv_address addr)
 {
anv_batch_emit(_buffer->batch, GENX(PIPE_CONTROL), pc) {
   pc.DestinationAddressType  = DAT_PPGTT;
   pc.PostSyncOperation   = WritePSDepthCount;
   pc.DepthStallEnable= true;
-  pc.Address = (struct anv_address) { bo, offset };
+  pc.Address = addr;
 
   if (GEN_GEN == 9 && cmd_buffer->device->info.gt == 4)
  pc.CommandStreamerStallEnable = true;
@@ -318,12 +327,12 @@ emit_ps_depth_count(struct anv_cmd_buffer *cmd_buffer,
 
 static void
 emit_query_availability(struct anv_cmd_buffer *cmd_buffer,
-struct anv_bo *bo, uint32_t offset)
+struct anv_address addr)
 {
anv_batch_emit(_buffer->batch, GENX(PIPE_CONTROL), pc) {
   pc.DestinationAddressType  = DAT_PPGTT;
   pc.PostSyncOperation   = WriteImmediateData;
-  pc.Address = (struct anv_address) { bo, offset };
+  pc.Address = addr;
   pc.ImmediateData   = 1;
}
 }
@@ -340,20 +349,19 @@ emit_zero_queries(struct anv_cmd_buffer *cmd_buffer,
const uint32_t num_elements = pool->stride / sizeof(uint64_t);
 
for (uint32_t i = 0; i < num_queries; i++) {
-  uint32_t slot_offset = (first_index + i) * pool->stride;
+  struct anv_address slot_addr =
+ anv_query_address(pool, first_index + i);
   for (uint32_t j = 1; j < num_elements; j++) {
  anv_batch_emit(_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
-sdi.Address.bo = >bo;
-sdi.Address.offset = slot_offset + j * sizeof(uint64_t);
+sdi.Address = anv_address_add(slot_addr, j * sizeof(uint64_t));
 sdi.ImmediateData = 0ull;
  }
  anv_batch_emit(_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
-sdi.Address.bo = >bo;
-sdi.Address.offset = slot_offset + j * sizeof(uint64_t) + 4;
+sdi.Address = anv_address_add(slot_addr, j * sizeof(uint64_t) + 4);
 sdi.ImmediateData = 0ull;
  }
   }
-  emit_query_availability(cmd_buffer, >bo, slot_offset);
+  emit_query_availability(cmd_buffer, slot_addr);
}
 }
 
@@ -368,10 +376,7 @@ void genX(CmdResetQueryPool)(
 
for (uint32_t i = 0; i < queryCount; i++) {
   anv_batch_emit(_buffer->batch, GENX(MI_STORE_DATA_IMM), sdm) {
- sdm.Address = (struct anv_address) {
-.bo = >bo,
-.offset = (firstQuery + i) * pool->stride,
- };
+ sdm.Address = anv_query_address(pool, firstQuery + i);
  sdm.ImmediateData = 0;
   }
}
@@ -393,7 +398,7 @@ static const uint32_t vk_pipeline_stat_to_reg[] = {
 
 static void
 emit_pipeline_stat(struct anv_cmd_buffer *cmd_buffer, uint32_t stat,
-   struct anv_bo *bo, uint32_t offset)
+   struct anv_address addr)
 {
STATIC_ASSERT(ANV_PIPELINE_STATISTICS_MASK ==
  (1 << ARRAY_SIZE(vk_pipeline_stat_to_reg)) - 1);
@@ -402,12 +407,12 @@ emit_pipeline_stat(struct anv_cmd_buffer *cmd_buffer, 
uint32_t stat,
uint32_t reg = vk_pipeline_stat_to_reg[stat];
 
anv_batch_emit(_buffer->batch, GENX(MI_STORE_REGISTER_MEM), lrm) {
-  lrm.RegisterAddress  = reg,
-  lrm.MemoryAddress= (struct anv_address) { bo, offset };
+  lrm.RegisterAddress  = reg;
+  lrm.MemoryAddress= anv_address_add(addr, 0);
}
anv_batch_emit(_buffer->batch, GENX(MI_STORE_REGISTER_MEM), lrm) {
-  lrm.RegisterAddress  = reg + 4,
-  lrm.MemoryAddress= (struct anv_address) { bo, offset + 4 };
+  lrm.RegisterAddress  = reg + 4;
+  lrm.MemoryAddress= anv_address_add(addr, 4);
}
 }
 
@@ -419,10 +424,11 @@ void genX(CmdBeginQuery)(
 {
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
+   struct anv_address query_addr = anv_query_address(pool, 

[Mesa-dev] [PATCH 2/5] anv/query: Write both dwords in emit_zero_queries

2018-09-14 Thread Jason Ekstrand
Each query slot is a uint64_t and we were only zeroing half of it.

Fixes: 7ec6e4e68980 "anv/query: implement multiview interactions"
---
 src/intel/vulkan/genX_query.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c
index 1b26401c9ff..817a3a3c4e2 100644
--- a/src/intel/vulkan/genX_query.c
+++ b/src/intel/vulkan/genX_query.c
@@ -347,6 +347,11 @@ emit_zero_queries(struct anv_cmd_buffer *cmd_buffer,
 sdi.Address.offset = slot_offset + j * sizeof(uint64_t);
 sdi.ImmediateData = 0ull;
  }
+ anv_batch_emit(_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
+sdi.Address.bo = >bo;
+sdi.Address.offset = slot_offset + j * sizeof(uint64_t) + 4;
+sdi.ImmediateData = 0ull;
+ }
   }
   emit_query_availability(cmd_buffer, >bo, slot_offset);
}
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/5] anv: Add a mi_memset and use it for zeroing queries

2018-09-14 Thread Jason Ekstrand
---
 src/intel/vulkan/anv_genX.h|  4 
 src/intel/vulkan/genX_gpu_memcpy.c | 17 +
 src/intel/vulkan/genX_query.c  | 14 ++
 3 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h
index bef9b5bde4e..7921e0674a0 100644
--- a/src/intel/vulkan/anv_genX.h
+++ b/src/intel/vulkan/anv_genX.h
@@ -80,5 +80,9 @@ void genX(cmd_buffer_mi_memcpy)(struct anv_cmd_buffer 
*cmd_buffer,
 struct anv_address dst, struct anv_address src,
 uint32_t size);
 
+void genX(cmd_buffer_mi_memset)(struct anv_cmd_buffer *cmd_buffer,
+struct anv_address dst, uint32_t value,
+uint32_t size);
+
 void genX(blorp_exec)(struct blorp_batch *batch,
   const struct blorp_params *params);
diff --git a/src/intel/vulkan/genX_gpu_memcpy.c 
b/src/intel/vulkan/genX_gpu_memcpy.c
index 2b39f2fc009..fd78f4d125b 100644
--- a/src/intel/vulkan/genX_gpu_memcpy.c
+++ b/src/intel/vulkan/genX_gpu_memcpy.c
@@ -108,6 +108,23 @@ genX(cmd_buffer_mi_memcpy)(struct anv_cmd_buffer 
*cmd_buffer,
return;
 }
 
+void
+genX(cmd_buffer_mi_memset)(struct anv_cmd_buffer *cmd_buffer,
+   struct anv_address dst, uint32_t value,
+   uint32_t size)
+{
+   /* This memset operates in units of dwords. */
+   assert(size % 4 == 0);
+   assert(dst.offset % 4 == 0);
+
+   for (uint32_t i = 0; i < size; i += 4) {
+  anv_batch_emit(_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
+ sdi.Address = anv_address_add(dst, i);
+ sdi.ImmediateData = value;
+  }
+   }
+}
+
 void
 genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
struct anv_address dst, struct anv_address src,
diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c
index 56d18e021e4..4ccbe2975de 100644
--- a/src/intel/vulkan/genX_query.c
+++ b/src/intel/vulkan/genX_query.c
@@ -346,21 +346,11 @@ emit_zero_queries(struct anv_cmd_buffer *cmd_buffer,
   struct anv_query_pool *pool,
   uint32_t first_index, uint32_t num_queries)
 {
-   const uint32_t num_elements = pool->stride / sizeof(uint64_t);
-
for (uint32_t i = 0; i < num_queries; i++) {
   struct anv_address slot_addr =
  anv_query_address(pool, first_index + i);
-  for (uint32_t j = 1; j < num_elements; j++) {
- anv_batch_emit(_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
-sdi.Address = anv_address_add(slot_addr, j * sizeof(uint64_t));
-sdi.ImmediateData = 0ull;
- }
- anv_batch_emit(_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
-sdi.Address = anv_address_add(slot_addr, j * sizeof(uint64_t) + 4);
-sdi.ImmediateData = 0ull;
- }
-  }
+  genX(cmd_buffer_mi_memset)(cmd_buffer, anv_address_add(slot_addr, 8),
+ 0, pool->stride - 8);
   emit_query_availability(cmd_buffer, slot_addr);
}
 }
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/5] anv/query: Increment an index while writing results

2018-09-14 Thread Jason Ekstrand
Instead of computing an index at the end which we hope maps to the
number of things written, just count the number of things as we go.
---
 src/intel/vulkan/genX_query.c | 67 ---
 1 file changed, 31 insertions(+), 36 deletions(-)

diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c
index 011db549c08..1b26401c9ff 100644
--- a/src/intel/vulkan/genX_query.c
+++ b/src/intel/vulkan/genX_query.c
@@ -249,18 +249,19 @@ VkResult genX(GetQueryPoolResults)(
*/
   bool write_results = available || (flags & VK_QUERY_RESULT_PARTIAL_BIT);
 
-  if (write_results) {
- switch (pool->type) {
- case VK_QUERY_TYPE_OCCLUSION: {
-cpu_write_query_result(pData, flags, 0, slot[2] - slot[1]);
-break;
- }
+  uint32_t idx = 0;
+  switch (pool->type) {
+  case VK_QUERY_TYPE_OCCLUSION:
+ if (write_results)
+cpu_write_query_result(pData, flags, idx, slot[2] - slot[1]);
+ idx++;
+ break;
 
- case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
-uint32_t statistics = pool->pipeline_statistics;
-uint32_t idx = 0;
-while (statistics) {
-   uint32_t stat = u_bit_scan();
+  case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
+ uint32_t statistics = pool->pipeline_statistics;
+ while (statistics) {
+uint32_t stat = u_bit_scan();
+if (write_results) {
uint64_t result = slot[idx * 2 + 2] - slot[idx * 2 + 1];
 
/* WaDividePSInvocationCountBy4:HSW,BDW */
@@ -269,29 +270,28 @@ VkResult genX(GetQueryPoolResults)(
   result >>= 2;
 
cpu_write_query_result(pData, flags, idx, result);
-
-   idx++;
 }
-assert(idx == util_bitcount(pool->pipeline_statistics));
-break;
+idx++;
  }
+ assert(idx == util_bitcount(pool->pipeline_statistics));
+ break;
+  }
 
- case VK_QUERY_TYPE_TIMESTAMP: {
-cpu_write_query_result(pData, flags, 0, slot[1]);
-break;
- }
- default:
-unreachable("invalid pool type");
- }
-  } else {
- status = VK_NOT_READY;
+  case VK_QUERY_TYPE_TIMESTAMP:
+ if (write_results)
+cpu_write_query_result(pData, flags, idx, slot[1]);
+ idx++;
+ break;
+
+  default:
+ unreachable("invalid pool type");
   }
 
-  if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
- uint32_t idx = (pool->type == VK_QUERY_TYPE_PIPELINE_STATISTICS) ?
-util_bitcount(pool->pipeline_statistics) : 1;
+  if (!write_results)
+ status = VK_NOT_READY;
+
+  if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
  cpu_write_query_result(pData, flags, idx, available);
-  }
 
   pData += stride;
   if (pData >= data_end)
@@ -749,17 +749,17 @@ void genX(CmdCopyQueryPoolResults)(
 
for (uint32_t i = 0; i < queryCount; i++) {
   slot_offset = (firstQuery + i) * pool->stride;
+  uint32_t idx = 0;
   switch (pool->type) {
   case VK_QUERY_TYPE_OCCLUSION:
  compute_query_result(_buffer->batch, MI_ALU_REG2,
   >bo, slot_offset + 8);
  gpu_write_query_result(_buffer->batch, buffer, destOffset,
-flags, 0, CS_GPR(2));
+flags, idx++, CS_GPR(2));
  break;
 
   case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
  uint32_t statistics = pool->pipeline_statistics;
- uint32_t idx = 0;
  while (statistics) {
 uint32_t stat = u_bit_scan();
 
@@ -774,9 +774,7 @@ void genX(CmdCopyQueryPoolResults)(
 }
 
 gpu_write_query_result(_buffer->batch, buffer, destOffset,
-   flags, idx, CS_GPR(0));
-
-idx++;
+   flags, idx++, CS_GPR(0));
  }
  assert(idx == util_bitcount(pool->pipeline_statistics));
  break;
@@ -794,9 +792,6 @@ void genX(CmdCopyQueryPoolResults)(
   }
 
   if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
- uint32_t idx = (pool->type == VK_QUERY_TYPE_PIPELINE_STATISTICS) ?
-util_bitcount(pool->pipeline_statistics) : 1;
-
  emit_load_alu_reg_u64(_buffer->batch, CS_GPR(0),
>bo, slot_offset);
  gpu_write_query_result(_buffer->batch, buffer, destOffset,
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 91098] vmwgfx null ptr dereference at vmw_screen_ioctl.c:76 due to ioctl failure

2018-09-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=91098

Brian Paul  changed:

   What|Removed |Added

 Resolution|--- |WONTFIX
 Status|NEW |RESOLVED

--- Comment #2 from Brian Paul  ---
There's been no follow-up from the original poster.  Closing.  Re-open if it's
still and issue for you.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 48143] [vmwgfx] src/gallium/drivers/svga/svga_tgsi_insn.c:273:get_temp: Assertion `i < 32' failed.

2018-09-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=48143

Brian Paul  changed:

   What|Removed |Added

 Resolution|--- |WONTFIX
 Status|NEW |RESOLVED

--- Comment #1 from Brian Paul  ---
This should not be an issue with the current driver.  We support much more than
32 temps regs now.

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 92983] [vmwgfx] SIGABRT vmw_screen_ioctl.c:461

2018-09-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=92983

Brian Paul  changed:

   What|Removed |Added

 CC||v...@freedesktop.org

--- Comment #1 from Brian Paul  ---
Vinson, this is a pretty old bug.  Do you want to re-test it?

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 107878] Artifacting Hair on Overwatch vega56

2018-09-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107878

--- Comment #6 from Timothy Arceri  ---
(In reply to gloriouseggroll from comment #5)
> can confirm fixed on llvm8 for me as well. (still broken with current mesa
> git and llvm 7)

If you want llvm7 to work you might need to do a git bisect to see what fixed
it and see if the fix can be backported to llvm 7 (assuming whoever fixed it
isn't trying to get it included already).

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 93970] Second Life - Advanced Lighting Model shader fails to compile on Radeon SI driver

2018-09-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=93970

Timothy Arceri  changed:

   What|Removed |Added

 Resolution|--- |NOTOURBUG
 Status|NEEDINFO|RESOLVED

--- Comment #5 from Timothy Arceri  ---
I downloaded the latest Second Life viewer (which is apparently not longer
supported for Linux) and Advanced Lighting Model seems to work without any
workarounds so seems this was fixed in the game at some point. Closing.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 96542] Tonga Unreal elemental black lava since radeonsi: enable OpenGL 4.3

2018-09-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=96542

Timothy Arceri  changed:

   What|Removed |Added

 Resolution|--- |NOTOURBUG
 Status|NEW |RESOLVED

--- Comment #3 from Timothy Arceri  ---
As per comment #1 when running on the Nvidia blob is always takes a GL 3.2
path. When I replay an apitrace taken on radeonsi the Nvidia blob also renders
the lava black so I this does indeed seem to be a bug in the demo. Closing.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 107878] Artifacting Hair on Overwatch vega56

2018-09-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107878

--- Comment #5 from gloriouseggr...@gmail.com ---
can confirm fixed on llvm8 for me as well. (still broken with current mesa git
and llvm 7)

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium/util: don't let children of fork & exec inherit our thread affinity

2018-09-14 Thread Marek Olšák
Since only Blender and Firefox experience problems, they can be
blacklisted for this optimization, and then we can expand the
blacklist as we go.

Marek

On Fri, Sep 14, 2018 at 9:04 PM, Marek Olšák  wrote:
> On Fri, Sep 14, 2018 at 4:53 AM, Michel Dänzer  wrote:
>> On 2018-09-13 8:56 p.m., Marek Olšák wrote:
>>> On Thu, Sep 13, 2018 at 11:48 AM, Michel Dänzer  wrote:
 On 2018-09-13 2:40 a.m., Marek Olšák wrote:
> From: Marek Olšák 
>
> [...]
>
>  static void
> -util_init_cache_number(void)
> +util_init_thread_pinning(void)
>  {
> /* Get a semi-random number. */
> int64_t t = os_time_get_nano();
> L3_cache_number = (t ^ (t >> 8) ^ (t >> 16));
> +
> +   /* Reset thread affinity for all children of fork and exec to prevent

 I don't think exec (which doesn't spawn a child, it replaces the current
 process "image" with a new one) has anything to do with this.


> +* spawned processes and threads from inheriting the current thread's
> +* affinity.

 As the name implies, pthread_atfork only affects child processes spawned
 with fork(), not new threads. As such, I'm afraid this won't help at
 least for blender, which AFAICT doesn't call fork, it only spawns threads.
>>>
>>> All created threads and processes are just some variants of fork.
>>
>> fork(2) spawns a new process, not a new thread in the same process. Its
>> current implementation in glibc uses clone(2), which is also used for
>> spawning threads, but that's an implementation detail. The kernel still
>> has the dedicated fork system calls.
>>
>> pthread_atfork only affects new processes created with fork(2), not new
>> threads created in the same process.
>>
>>
>>> This patch is enough to stop inheriting thread affinity from X and
>>> gnome-shell to GL apps, to gcc run within an X terminal, etc.
>>> Everything within X inherits the thread affinity of X or gnome-shell,
>>> including gcc.
>>
>> FWIW, X clients are not descendants of the X server, so they must have
>> inherited it from something else.
>>
>> Anyway, now I understand the scope of this patch, thanks. But the commit
>> log and comment need to be fixed not to be misleading by talking about
>> exec and spawned threads. E.g.:
>>
>>  gallium/util: don't let child processes inherit our thread affinity
>>
>>/* Prevent child processes from inheriting the current thread's
>> * affinity.
>>
>>
>> That leaves:
>>
>>> +* What happens if a driver is unloaded and the app creates a thread?
>>
>> I suppose the child process will likely crash, because the memory
>> address where util_set_full_cpu_affinity was located will either be
>> unmapped or have random other contents?
>>
>> At least in theory, there could also be an issue where the application
>> might have set its own thread affinity before calling fork, which would
>> be clobbered by util_set_full_cpu_affinity in the child process.
>>
>>
>> Last but not least, this doesn't solve the issue of apps such as
>> blender, which spawn their own worker threads after initializing OpenGL
>> (possibly not themselves directly, but via the toolkit or another
>> library; e.g. GTK+4 uses OpenGL by default), inheriting the thread affinity.
>>
>>
>> Due to these issues, setting the thread affinity needs to be disabled by
>> default, and only white-listed for applications where it's known safe
>> and beneficial. This sucks, but I'm afraid that's the reality until
>> there's better API available which allows solving these issues.
>
> We don't have the bandwidth to maintain whitelists. This will either
> have to be always on or always off.
>
> On the positive side, only Ryzens with multiple CCXs get all the
> benefits and disadvantages.
>
> Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 106922] Tangrams demo: LLVM ERROR: Cannot select: 0x7e8d8750: i16 = bitcast 0x7e8d8af8

2018-09-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=106922

--- Comment #7 from Christoph Haag  ---
Created attachment 141569
  --> https://bugs.freedesktop.org/attachment.cgi?id=141569=edit
corruption at small resolutions

Nice, with the patches it runs without crashing.

At 1920x1080 it looks good, but when choosing smaller resolutions, corruption
appears. In the default 720x400 it's pretty bad. I have no idea if the
corruption is in any way related to the 16 bit support though.

RX 480, llvm 8.0.0svn_r169421, latest radv git + patches

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium/util: don't let children of fork & exec inherit our thread affinity

2018-09-14 Thread Marek Olšák
On Fri, Sep 14, 2018 at 4:53 AM, Michel Dänzer  wrote:
> On 2018-09-13 8:56 p.m., Marek Olšák wrote:
>> On Thu, Sep 13, 2018 at 11:48 AM, Michel Dänzer  wrote:
>>> On 2018-09-13 2:40 a.m., Marek Olšák wrote:
 From: Marek Olšák 

 [...]

  static void
 -util_init_cache_number(void)
 +util_init_thread_pinning(void)
  {
 /* Get a semi-random number. */
 int64_t t = os_time_get_nano();
 L3_cache_number = (t ^ (t >> 8) ^ (t >> 16));
 +
 +   /* Reset thread affinity for all children of fork and exec to prevent
>>>
>>> I don't think exec (which doesn't spawn a child, it replaces the current
>>> process "image" with a new one) has anything to do with this.
>>>
>>>
 +* spawned processes and threads from inheriting the current thread's
 +* affinity.
>>>
>>> As the name implies, pthread_atfork only affects child processes spawned
>>> with fork(), not new threads. As such, I'm afraid this won't help at
>>> least for blender, which AFAICT doesn't call fork, it only spawns threads.
>>
>> All created threads and processes are just some variants of fork.
>
> fork(2) spawns a new process, not a new thread in the same process. Its
> current implementation in glibc uses clone(2), which is also used for
> spawning threads, but that's an implementation detail. The kernel still
> has the dedicated fork system calls.
>
> pthread_atfork only affects new processes created with fork(2), not new
> threads created in the same process.
>
>
>> This patch is enough to stop inheriting thread affinity from X and
>> gnome-shell to GL apps, to gcc run within an X terminal, etc.
>> Everything within X inherits the thread affinity of X or gnome-shell,
>> including gcc.
>
> FWIW, X clients are not descendants of the X server, so they must have
> inherited it from something else.
>
> Anyway, now I understand the scope of this patch, thanks. But the commit
> log and comment need to be fixed not to be misleading by talking about
> exec and spawned threads. E.g.:
>
>  gallium/util: don't let child processes inherit our thread affinity
>
>/* Prevent child processes from inheriting the current thread's
> * affinity.
>
>
> That leaves:
>
>> +* What happens if a driver is unloaded and the app creates a thread?
>
> I suppose the child process will likely crash, because the memory
> address where util_set_full_cpu_affinity was located will either be
> unmapped or have random other contents?
>
> At least in theory, there could also be an issue where the application
> might have set its own thread affinity before calling fork, which would
> be clobbered by util_set_full_cpu_affinity in the child process.
>
>
> Last but not least, this doesn't solve the issue of apps such as
> blender, which spawn their own worker threads after initializing OpenGL
> (possibly not themselves directly, but via the toolkit or another
> library; e.g. GTK+4 uses OpenGL by default), inheriting the thread affinity.
>
>
> Due to these issues, setting the thread affinity needs to be disabled by
> default, and only white-listed for applications where it's known safe
> and beneficial. This sucks, but I'm afraid that's the reality until
> there's better API available which allows solving these issues.

We don't have the bandwidth to maintain whitelists. This will either
have to be always on or always off.

On the positive side, only Ryzens with multiple CCXs get all the
benefits and disadvantages.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium/util: don't let children of fork & exec inherit our thread affinity

2018-09-14 Thread Marek Olšák
On Fri, Sep 14, 2018 at 12:54 PM, Nicholas Miell  wrote:
> On 09/12/2018 05:40 PM, Marek Olšák wrote:
>> +static void
>> +util_set_full_cpu_affinity(void)
>> +{
>> +   cpu_set_t cpuset;
>> +
>> +   CPU_ZERO();
>> +   for (unsigned i = 0; i < CPU_SETSIZE; i++)
>> +  CPU_SET(i, );
>> +
>> +   pthread_setaffinity_np(pthread_self(), sizeof(cpuset), );
>> +}
>>
>>  static void
>> -util_init_cache_number(void)
>> +util_init_thread_pinning(void)
>>  {
>> /* Get a semi-random number. */
>> int64_t t = os_time_get_nano();
>> L3_cache_number = (t ^ (t >> 8) ^ (t >> 16));
>> +
>> +   /* Reset thread affinity for all children of fork and exec to prevent
>> +* spawned processes and threads from inheriting the current thread's
>> +* affinity.
>> +*
>> +* What happens if a driver is unloaded and the app creates a thread?
>> +*/
>> +   pthread_atfork(NULL, NULL, util_set_full_cpu_affinity);
>>  }
>>
>
> You should probably save and restore the application's affinity mask
> rather than assuming the mask is set to all CPUs.

The affinity mask references a random CCX for each OpenGL context, and
we don't know which thread called fork in the child.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 104681] Einstein@Home BOINC FGRPB1G GPU app crash

2018-09-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=104681

Timothy Arceri  changed:

   What|Removed |Added

  Component|Mesa core   |Gallium/StateTracker/Clover

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 106833] glLinkProgram is expected to fail when vertex attribute aliasing happens on ES3.0 context or later

2018-09-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=106833

Timothy Arceri  changed:

   What|Removed |Added

 QA Contact|mesa-dev@lists.freedesktop. |intel-3d-bugs@lists.freedes
   |org |ktop.org
  Component|Mesa core   |glsl-compiler

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 106996] Compute shader compiling fails for invalid input layout qualifier used

2018-09-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=106996

Timothy Arceri  changed:

   What|Removed |Added

 QA Contact|mesa-dev@lists.freedesktop. |intel-3d-bugs@lists.freedes
   |org |ktop.org
  Component|Mesa core   |glsl-compiler

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 78123] svga prints out command errors

2018-09-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=78123

Timothy Arceri  changed:

   What|Removed |Added

  Component|Other   |Drivers/Gallium/vmwgfx

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 106677] vmwgfx: atom (electron-based app) causes corruption, hangs

2018-09-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=106677

Timothy Arceri  changed:

   What|Removed |Added

  Component|Other   |Drivers/Gallium/vmwgfx

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 100037] [vmwgfx] Invalid SVGA3D command: 1202

2018-09-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=100037

Timothy Arceri  changed:

   What|Removed |Added

  Component|Other   |Drivers/Gallium/vmwgfx

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 91098] vmwgfx null ptr dereference at vmw_screen_ioctl.c:76 due to ioctl failure

2018-09-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=91098

Timothy Arceri  changed:

   What|Removed |Added

  Component|Other   |Drivers/Gallium/vmwgfx

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 48143] [vmwgfx] src/gallium/drivers/svga/svga_tgsi_insn.c:273:get_temp: Assertion `i < 32' failed.

2018-09-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=48143

Timothy Arceri  changed:

   What|Removed |Added

  Component|Other   |Drivers/Gallium/vmwgfx

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 92983] [vmwgfx] SIGABRT vmw_screen_ioctl.c:461

2018-09-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=92983

Timothy Arceri  changed:

   What|Removed |Added

  Component|Mesa core   |Drivers/Gallium/vmwgfx

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 101405] x.org/wiki/GalliumStatus/: Add description for DEPRECATED

2018-09-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=101405

--- Comment #2 from David Hedlund  ---
(In reply to Timothy Arceri from comment #1)
> I don't think there is much point fixing this that page is out of date
> (hasn't been updated in over 5 years) and its usefulness is questionable.

Thank you. Can you please add a disclaimer on
https://www.x.org/wiki/GalliumStatus/ to make this clear?

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 101405] x.org/wiki/GalliumStatus/: Add description for DEPRECATED

2018-09-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=101405

Timothy Arceri  changed:

   What|Removed |Added

 Resolution|--- |WONTFIX
 Status|REOPENED|RESOLVED

--- Comment #1 from Timothy Arceri  ---
I don't think there is much point fixing this that page is out of date (hasn't
been updated in over 5 years) and its usefulness is questionable.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] glsl: Avoid propagating incompatible type of initializer

2018-09-14 Thread Timothy Arceri

Series:

Reviewed-by: Timothy Arceri 

Are there piglit tests to go with this?

On 15/8/18 10:46 pm, Danylo Piliaiev wrote:

do_assignment validated assigment but when rhs type was not compatible
it proceeded without issues and returned error_emitted = false.
On the other hand process_initializer expected do_assignment to always
return compatible type and never fail.

As a result when variable was initialized with incompatible type
the type of variable changed to the incompatible one.
This manifested in unnecessary error messages and in one case in crash.

Example GLSL:
  vec4 tmp = vec2(0.0);
  tmp.z -= 1.0;

Past error messages:
  initializer of type vec2 cannot be assigned to variable of type vec4
  invalid swizzle / mask `z'
  type mismatch
  operands to arithmetic operators must be numeric

After this patch:
  initializer of type vec2 cannot be assigned to variable of type vec4

In the other case when we initialize variable with incompatible struct,
accessing variable's field leaded to a crash. Example:
  uniform struct {float field;} data;
  ...
  vec4 tmp = data;
  tmp.x -= 1.0;

After the patch there is only error line without a crash:
  initializer of type #anon_struct cannot be assigned to variable of
   type vec4

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107547

Signed-off-by: Danylo Piliaiev 
---
  src/compiler/glsl/ast_to_hir.cpp | 62 +---
  1 file changed, 33 insertions(+), 29 deletions(-)

diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp
index 5d3f10b682..93e7c8ec33 100644
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -1012,6 +1012,8 @@ do_assignment(exec_list *instructions, struct 
_mesa_glsl_parse_state *state,
   mark_whole_array_access(rhs);
   mark_whole_array_access(lhs);
}
+   } else {
+ error_emitted = true;
 }
  
 /* Most callers of do_assignment (assign, add_assign, pre_inc/dec,

@@ -4562,41 +4564,43 @@ process_initializer(ir_variable *var, ast_declaration 
*decl,
/* Never emit code to initialize a uniform.
 */
const glsl_type *initializer_type;
+  bool error_emitted = false;
if (!type->qualifier.flags.q.uniform) {
- do_assignment(initializer_instructions, state,
-   NULL,
-   lhs, rhs,
-   , true,
-   true,
-   type->get_location());
+ error_emitted =
+do_assignment(initializer_instructions, state,
+  NULL, lhs, rhs,
+  , true, true,
+  type->get_location());
   initializer_type = result->type;
} else
   initializer_type = rhs->type;
  
-  var->constant_initializer = rhs->constant_expression_value(mem_ctx);

-  var->data.has_initializer = true;
+  if (!error_emitted) {
+ var->constant_initializer = rhs->constant_expression_value(mem_ctx);
+ var->data.has_initializer = true;
  
-  /* If the declared variable is an unsized array, it must inherrit

-   * its full type from the initializer.  A declaration such as
-   *
-   * uniform float a[] = float[](1.0, 2.0, 3.0, 3.0);
-   *
-   * becomes
-   *
-   * uniform float a[4] = float[](1.0, 2.0, 3.0, 3.0);
-   *
-   * The assignment generated in the if-statement (below) will also
-   * automatically handle this case for non-uniforms.
-   *
-   * If the declared variable is not an array, the types must
-   * already match exactly.  As a result, the type assignment
-   * here can be done unconditionally.  For non-uniforms the call
-   * to do_assignment can change the type of the initializer (via
-   * the implicit conversion rules).  For uniforms the initializer
-   * must be a constant expression, and the type of that expression
-   * was validated above.
-   */
-  var->type = initializer_type;
+ /* If the declared variable is an unsized array, it must inherrit
+ * its full type from the initializer.  A declaration such as
+ *
+ * uniform float a[] = float[](1.0, 2.0, 3.0, 3.0);
+ *
+ * becomes
+ *
+ * uniform float a[4] = float[](1.0, 2.0, 3.0, 3.0);
+ *
+ * The assignment generated in the if-statement (below) will also
+ * automatically handle this case for non-uniforms.
+ *
+ * If the declared variable is not an array, the types must
+ * already match exactly.  As a result, the type assignment
+ * here can be done unconditionally.  For non-uniforms the call
+ * to do_assignment can change the type of the initializer (via
+ * the implicit conversion rules).  For uniforms the initializer
+ * must be a constant expression, and the type of that expression
+ * was 

[Mesa-dev] [Bug 107939] Commit 888b7fc causes performance regression

2018-09-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107939

--- Comment #1 from Timothy Arceri  ---
Since the referenced sha is for the 18.1 rather than the master branch I'm
copying here for completeness.

commit 888b7fcaf4d4f25c90c318495c7c38066cff29fb
Author: Samuel Pitoiset 
Date:   Wed Jun 13 20:19:23 2018 +0200

radv: don't fast clear HTILE for 16-bit depth surfaces on GFX8

This causes rendering issues in Shadow Warrior 2 with DXVK.

Cc: mesa-sta...@lists.freedesktop.org
Fixes: ccc64f3133 ("radv: enable TC-compat HTILE for 16-bit depth surfaces
on GFX8")
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106912
Signed-off-by: Samuel Pitoiset 
Reviewed-by: Bas Nieuwenhuizen 
(cherry picked from commit 51e23d34190076159129dd7b449b95a1ac3d4949)

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 107923] build_id.c:126: multiple definition of `build_id_length'

2018-09-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107923

Vinson Lee  changed:

   What|Removed |Added

   Keywords||bisected
 CC||fdo-b...@engestrom.ch,
   ||i...@freedesktop.org

--- Comment #5 from Vinson Lee  ---
8396043f304bb2a752130230055605c5c966e89f is the first bad commit
commit 8396043f304bb2a752130230055605c5c966e89f
Author: Dylan Baker 
Date:   Tue Aug 21 09:46:46 2018 -0700

Replace uses of _mesa_bitcount with util_bitcount

and _mesa_bitcount_64 with util_bitcount_64. This fixes a build problem
in nir for platforms that don't have popcount or popcountll, such as
32bit msvc.

v2: - Fix additional uses of _mesa_bitcount added after this was
  originally written

Acked-by: Eric Engestrom  (v1)
Acked-by: Eric Anholt 
Reviewed-by: Ian Romanick 

:04 04 9b4d3f30a8c2cb4d4a549b92c6db3cf499338579
dcad5de2d7a236b4fe35516fde4abf5d24516415 M  src
bisect run success

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] genxml: Add SO_PRIM_STORAGE_NEEDED and SO_NUM_PRIMS_WRITTEN

2018-09-14 Thread Jason Ekstrand
---
 src/intel/genxml/gen10.xml | 32 
 src/intel/genxml/gen11.xml | 32 
 src/intel/genxml/gen7.xml  | 32 
 src/intel/genxml/gen75.xml | 32 
 src/intel/genxml/gen8.xml  | 32 
 src/intel/genxml/gen9.xml  | 32 
 6 files changed, 192 insertions(+)

diff --git a/src/intel/genxml/gen10.xml b/src/intel/genxml/gen10.xml
index abd5da297d6..0bb38a76a78 100644
--- a/src/intel/genxml/gen10.xml
+++ b/src/intel/genxml/gen10.xml
@@ -3553,6 +3553,38 @@
 
   
 
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
   
 
   
diff --git a/src/intel/genxml/gen11.xml b/src/intel/genxml/gen11.xml
index 1b3befbbfc9..6eed5f99d92 100644
--- a/src/intel/genxml/gen11.xml
+++ b/src/intel/genxml/gen11.xml
@@ -3551,6 +3551,38 @@
 
   
 
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
   
 
   
diff --git a/src/intel/genxml/gen7.xml b/src/intel/genxml/gen7.xml
index 6dde7973e69..7600a27bcb0 100644
--- a/src/intel/genxml/gen7.xml
+++ b/src/intel/genxml/gen7.xml
@@ -2489,6 +2489,38 @@
 
   
 
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
   
 
   
diff --git a/src/intel/genxml/gen75.xml b/src/intel/genxml/gen75.xml
index dfc3d891498..103723168bd 100644
--- a/src/intel/genxml/gen75.xml
+++ b/src/intel/genxml/gen75.xml
@@ -2972,6 +2972,38 @@
 
   
 
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
   
 
   
diff --git a/src/intel/genxml/gen8.xml b/src/intel/genxml/gen8.xml
index d42c63aabd8..364cecf5d67 100644
--- a/src/intel/genxml/gen8.xml
+++ b/src/intel/genxml/gen8.xml
@@ -3206,6 +3206,38 @@
 
   
 
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
   
 
   
diff --git a/src/intel/genxml/gen9.xml b/src/intel/genxml/gen9.xml
index ca268254503..4a26ae98fb9 100644
--- a/src/intel/genxml/gen9.xml
+++ b/src/intel/genxml/gen9.xml
@@ -3491,6 +3491,38 @@
 
   
 
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
   
 
   
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 100960] Special block from Minecraft mod rendered out of place

2018-09-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=100960

--- Comment #9 from Fabian Maurer  ---
Created attachment 141566
  --> https://bugs.freedesktop.org/attachment.cgi?id=141566=edit
Windows - Call 2245521 - Framebuffer

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 97516] GLX_OML_swap_method not fully supported

2018-09-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=97516

--- Comment #4 from Sven Arvidsson  ---
I tried the suggested hack of adding _DRI_ATTRIB_SWAP_EXCHANGE to
back_buffer_modes[]. In the case of Brink, it allows me to launch the game, but
in fullscreen mode it stops updating the screen, so only a single frame is
shown. Running the game windowed seems to work fine.

It could be a problem on my end, though I did rebuild system Mesa (32- and
64bit) so the server side should be picking up the changes too.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 107923] build_id.c:126: multiple definition of `build_id_length'

2018-09-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107923

--- Comment #4 from Dylan Baker  ---
This seems to be auto tools specific, I cn't replicate with the closest meson
configuration I could come up with (-Dbuildtype=debug -Dglx=gallium-xlib
-Ddri-drivers= -Dvulkan-drivers= -Dgallium-drivers=swrast,svga -Dgbm=false
-Degl=false); meson doesn't have a toggle to turn off direct glx or tls, so it
could be related to that.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/4] anv/so_memcpy: Use the correct SO_BUFFER size on gen8+

2018-09-14 Thread Nanley Chery
On Wed, Sep 12, 2018 at 12:06:49AM -0500, Jason Ekstrand wrote:
> This shouldn't matter as we'll never write OOB anyway but we may as well
> get it right.  It's supposed to be in dwords - 1.
> ---
>  src/intel/vulkan/genX_gpu_memcpy.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 

This patch is
Reviewed-by: Nanley Chery 

> diff --git a/src/intel/vulkan/genX_gpu_memcpy.c 
> b/src/intel/vulkan/genX_gpu_memcpy.c
> index 57abd8cd5c1..cba820a1866 100644
> --- a/src/intel/vulkan/genX_gpu_memcpy.c
> +++ b/src/intel/vulkan/genX_gpu_memcpy.c
> @@ -222,7 +222,7 @@ genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer 
> *cmd_buffer,
>  
>  #if GEN_GEN >= 8
>sob.SOBufferEnable = true;
> -  sob.SurfaceSize = size - 1;
> +  sob.SurfaceSize = size / 4 - 1;
>  #else
>sob.SurfacePitch = bs;
>sob.SurfaceEndAddress = sob.SurfaceBaseAddress;
> -- 
> 2.17.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] mesa: Additional FlipY applications

2018-09-14 Thread Brian Paul

On 09/14/2018 01:09 PM, Fritz Koenig wrote:

Instances where direction was determined based on
winsys or user fbo and should be determined based on
FlipY.

Key STATE_FB_WPOS_Y_TRANSFORM for of FlipY instead of
_mesa_is_user_fbo.  This corrects gl_FragCoord usage
when applying GL_MESA_framebuffer_flip_y.

Fixes: ab05dd183cc ("i965: implement GL_MESA_framebuffer_flip_y [v3]")
---
  src/mesa/main/multisample.c   | 4 ++--
  src/mesa/program/prog_statevars.c | 2 +-
  2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/mesa/main/multisample.c b/src/mesa/main/multisample.c
index 8beb1d839e..d494a43ac7 100644
--- a/src/mesa/main/multisample.c
+++ b/src/mesa/main/multisample.c
@@ -94,8 +94,8 @@ _mesa_GetMultisamplefv(GLenum pname, GLuint index, GLfloat * 
val)
  
ctx->Driver.GetSamplePosition(ctx, ctx->DrawBuffer, index, val);
  
-  /* winsys FBOs are upside down */

-  if (_mesa_is_winsys_fbo(ctx->DrawBuffer))
+  /* FBOs can be upside down (winsys always are)*/
+  if (ctx->DrawBuffer->FlipY)
   val[1] = 1.0f - val[1];
  
return;

diff --git a/src/mesa/program/prog_statevars.c 
b/src/mesa/program/prog_statevars.c
index 4d7f388cfb..3bbe451399 100644
--- a/src/mesa/program/prog_statevars.c
+++ b/src/mesa/program/prog_statevars.c
@@ -571,7 +571,7 @@ _mesa_fetch_state(struct gl_context *ctx, const 
gl_state_index16 state[],
case STATE_FB_WPOS_Y_TRANSFORM:
   /* A driver may negate this conditional by using ZW swizzle
* instead of XY (based on e.g. some other state). */
- if (_mesa_is_user_fbo(ctx->DrawBuffer)) {
+ if (!ctx->DrawBuffer->FlipY) {
  /* Identity (XY) followed by flipping Y upside down (ZW). */
  value[0] = 1.0F;
  value[1] = 0.0F;



For both,
Reviewed-by: Brian Paul 

Tag for stable branch?

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 107923] build_id.c:126: multiple definition of `build_id_length'

2018-09-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107923

--- Comment #3 from Brian Paul  ---
I'm seeing similar issues.

$ ../autogen.sh  CFLAGS="-g -O0" CXXFLAGS="-g -O0" --enable-debug
--enable-xlib-glx --disable-driglx-direct --disable-dri
--with-gallium-drivers=swrast,svga --disable-gbm --disable-egl

results in a lot of multiply-defined symbols.

This has probably been happening for the past week or so.  I haven't had any
time to investigate though.

Vinson, perhaps you could bisect it if you have time.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] mesa: Additional FlipY applications

2018-09-14 Thread Fritz Koenig
Instances where direction was determined based on
winsys or user fbo and should be determined based on
FlipY.

Key STATE_FB_WPOS_Y_TRANSFORM for of FlipY instead of
_mesa_is_user_fbo.  This corrects gl_FragCoord usage
when applying GL_MESA_framebuffer_flip_y.

Fixes: ab05dd183cc ("i965: implement GL_MESA_framebuffer_flip_y [v3]")
---
 src/mesa/main/multisample.c   | 4 ++--
 src/mesa/program/prog_statevars.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/mesa/main/multisample.c b/src/mesa/main/multisample.c
index 8beb1d839e..d494a43ac7 100644
--- a/src/mesa/main/multisample.c
+++ b/src/mesa/main/multisample.c
@@ -94,8 +94,8 @@ _mesa_GetMultisamplefv(GLenum pname, GLuint index, GLfloat * 
val)
 
   ctx->Driver.GetSamplePosition(ctx, ctx->DrawBuffer, index, val);
 
-  /* winsys FBOs are upside down */
-  if (_mesa_is_winsys_fbo(ctx->DrawBuffer))
+  /* FBOs can be upside down (winsys always are)*/
+  if (ctx->DrawBuffer->FlipY)
  val[1] = 1.0f - val[1];
 
   return;
diff --git a/src/mesa/program/prog_statevars.c 
b/src/mesa/program/prog_statevars.c
index 4d7f388cfb..3bbe451399 100644
--- a/src/mesa/program/prog_statevars.c
+++ b/src/mesa/program/prog_statevars.c
@@ -571,7 +571,7 @@ _mesa_fetch_state(struct gl_context *ctx, const 
gl_state_index16 state[],
   case STATE_FB_WPOS_Y_TRANSFORM:
  /* A driver may negate this conditional by using ZW swizzle
   * instead of XY (based on e.g. some other state). */
- if (_mesa_is_user_fbo(ctx->DrawBuffer)) {
+ if (!ctx->DrawBuffer->FlipY) {
 /* Identity (XY) followed by flipping Y upside down (ZW). */
 value[0] = 1.0F;
 value[1] = 0.0F;
-- 
2.19.0.397.gdd90340f6a-goog

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] mesa: FramebufferParameteri parameter checking

2018-09-14 Thread Fritz Koenig
Missing break; causes parameter checking to
never pass GL_FRAMEBUFFER_FLIP_Y_MESA paramers.

Fixes: 318c265160 ("mesa: GL_MESA_framebuffer_flip_y extension [v4]")
---
 src/mesa/main/fbobject.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index edb86438e3..3263fce845 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -1503,6 +1503,7 @@ framebuffer_parameteri(struct gl_context *ctx, struct 
gl_framebuffer *fb,
   if (!ctx->Extensions.MESA_framebuffer_flip_y)
  goto invalid_pname_enum;
   cannot_be_winsys_fbo = true;
+  break;
default:
   goto invalid_pname_enum;
}
-- 
2.19.0.397.gdd90340f6a-goog

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] move pthread_setaffinity_np check to the build system

2018-09-14 Thread Dylan Baker
Quoting Eric Engestrom (2018-09-14 09:39:27)
> On Thursday, 2018-09-13 11:41:38 -0700, Dylan Baker wrote:
> > Rather than trying to encode all of the rules in a header, lets just put
> > them in the build system where they belong. This fixes the build on
> > FreeBSD, which does have pthraed_setaffinity_np, but it's in a
> > pthread_np.h, not behind _GNU_SOURCE. FreeBSD also implements cpu_set
> > slightly differently, so additional changes would be required to get it
> > working right there anyway.
> > 
> > Fixes: 9f1bbbdbbd77d346c74c7abbb31f399151a85713
> >("util: try to fix the Android and MacOS build")
> > Cc: Marek Ol\u0161ák 
> > Cc: Emil Velikov 
> > ---
> >  configure.ac| 16 
> >  meson.build |  7 +++
> >  src/util/u_thread.h |  4 
> >  3 files changed, 23 insertions(+), 4 deletions(-)
> > 
> > diff --git a/configure.ac b/configure.ac
> > index f8bb131cb63..d10236dbead 100644
> > --- a/configure.ac
> > +++ b/configure.ac
> > @@ -968,6 +968,22 @@ if test "x$pthread_stubs_possible" = xyes; then
> >  PKG_CHECK_MODULES(PTHREADSTUBS, pthread-stubs >= 0.4)
> >  fi
> >  
> > +save_LIBS="$LIBS"
> > +LIBS="$PTHREAD_LIBS"
> > +AC_MSG_CHECKING(whether pthread_setaffinity_np is supported)
> > +AC_LINK_IFELSE([AC_LANG_SOURCE([[
> > +#define _GNU_SOURCE
> > +#include 
> > +int main() {
> > +   void *a = (void*) _setaffinity_np;
> > +   long b = (long) a;
> > +   return (int) b;
> > +}]])],
> > +  [DEFINES="$DEFINES -DPTHREAD_SETAFFINITY"];
> 
> -DHAVE_PTHREAD_SETAFFINITY
> 
> With that, and assuming the AC_LINK test code is correct:

I just copied the code that meson generates, lol. I'm not really sure that link
is required, it would probably work to just compile it.

> Reviewed-by: Eric Engestrom 
> 
> > +   AC_MSG_RESULT([yes]),
> > +   AC_MSG_RESULT([no]))
> > +LIBS="$save_LIBS"
> > +
> >  dnl Check for futex for fast inline simple_mtx_t.
> >  AC_CHECK_HEADER([linux/futex.h], [DEFINES="$DEFINES -DHAVE_LINUX_FUTEX_H"])
> >  
> > diff --git a/meson.build b/meson.build
> > index 0d534b9b4a9..0588ebf8e7a 100644
> > --- a/meson.build
> > +++ b/meson.build
> > @@ -1070,6 +1070,13 @@ pre_args += '-DHAVE_ZLIB'
> >  dep_thread = dependency('threads')
> >  if dep_thread.found() and host_machine.system() != 'windows'
> >pre_args += '-DHAVE_PTHREAD'
> > +  if cc.has_function(
> > +  'pthread_setaffinity_np',
> > +  dependencies : dep_thread,
> > +  prefix : '#include ',
> > +  args : '-D_GNU_SOURCE')
> > +pre_args += '-DHAVE_PTHREAD_SETAFFINITY'
> > +  endif
> >  endif
> >  if with_amd_vk or with_gallium_radeonsi or with_gallium_r600 or 
> > with_gallium_opencl
> >dep_elf = dependency('libelf', required : false)
> > diff --git a/src/util/u_thread.h b/src/util/u_thread.h
> > index eee6f3c712d..7538d7d634b 100644
> > --- a/src/util/u_thread.h
> > +++ b/src/util/u_thread.h
> > @@ -36,10 +36,6 @@
> >  #include 
> >  #endif
> >  
> > -#if defined(HAVE_PTHREAD) && !defined(ANDROID) && !defined(__APPLE__)
> > -#define HAVE_PTHREAD_SETAFFINITY
> > -#endif
> > -
> >  static inline thrd_t u_thread_create(int (*routine)(void *), void *param)
> >  {
> > thrd_t thread;
> > -- 
> > 2.19.0
> > 
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 107923] build_id.c:126: multiple definition of `build_id_length'

2018-09-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107923

--- Comment #2 from Vinson Lee  ---
(In reply to Sergii Romantsov from comment #1)
> could you, please, specify your build-configuration?

./autogen.sh --disable-dri --disable-egl --disable-gbm --enable-debug
--with-dri-drivers=swrast --with-gallium-drivers=swrast --with-platforms=x11

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 107765] [regression] Batman Arkham City crashes with DXVK under wine

2018-09-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107765

--- Comment #2 from farmboy0+freedesk...@googlemail.com ---
I use 
vanilla wine 3.12/3.14
DXVK 0.7.0 and 0.7.1
in a 32 or 64 bit prefix.

I have a R9 380X
Kernel 4.18.5 with DC enabled.

I tried Mesa git, no change.

Latest console output before crash:
info:  DXGI: Setting display mode: 1920x1080@60
warn:  DxgiSwapChain::QueryInterface: Unknown interface query
warn:  94d99bdb-f1f8-4ab0-b236-7da0170edab1
warn:  DXGI: MakeWindowAssociation: Ignoring flags
013b:fixme:wtsapi:WTSRegisterSessionNotification Stub 0x60226 0x
warning: The VAD has been replaced by a hack pending a complete rewrite
info:  DxgiVkPresenter: Recreating swap chain: 
  Format:   VK_FORMAT_B8G8R8A8_UNORM
  Present mode: VK_PRESENT_MODE_FIFO_KHR
  Buffer size:  1920x1080
013b:fixme:imm:ImmReleaseContext (0x60226, 0xae463b8): stub
err:   D3D11DeviceContext::SetPredication: Stub
mesa: for the -simplifycfg-sink-common option: may only occur zero or one
times!
../mesa-/src/amd/vulkan/radv_device.c:3936: FINISHME: Illegal color

amapps\common\Batman Arkham City GOTY\Binaries\Win32\BatmanAC.exe:
../mesa-/src/amd/vulkan/radv_pipeline.c:486: si_choose_spi_color_format:
Assertion `!"unhandled blend format"' failed.
016c:fixme:dbghelp:elf_search_auxv can't find symbol in module
016c:fixme:dbghelp:validate_addr64 Unsupported address f7d4

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nvir: Always split 64-bit IMAD/IMUL operations

2018-09-14 Thread Karol Herbst
Hi Dylan,

this patch only matters for debug builds (DEBUG set), but your merge
result looks correct nonetheless.

Thanks

On Fri, Sep 14, 2018 at 6:16 PM, Dylan Baker  wrote:
> Quoting Pierre Moreau (2017-12-04 15:51:04)
>> Those operations do not map to actual hardware instructions, therefore
>> those should always be lowered to 32-bit instructions.
>>
>> Fixes: 009c54aa7af "nv50/ir: Split 64-bit integer MAD/MUL operations"
>> Signed-off-by: Pierre Moreau 
>> ---
>>  src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
>> index 61d4e6a2d0..14bdcea2ca 100644
>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
>> @@ -3794,7 +3794,7 @@ Program::optimizeSSA(int level)
>> RUN_PASS(2, AlgebraicOpt, run);
>> RUN_PASS(2, ModifierFolding, run); // before load propagation -> less 
>> checks
>> RUN_PASS(1, ConstantFolding, foldAll);
>> -   RUN_PASS(1, Split64BitOpPreRA, run);
>> +   RUN_PASS(0, Split64BitOpPreRA, run);
>> RUN_PASS(1, LoadPropagation, run);
>> RUN_PASS(1, IndirectPropagation, run);
>> RUN_PASS(2, MemoryOpt, run);
>> --
>> 2.15.0
>>
>
> Hi Pierre,
>
> There was a small conflict when applying this to 18.1; I think my resolution 
> is
> correct, but you can see the version of the patch here:
> https://gitlab.freedesktop.org/mesa/mesa/commit/649aff1a8788684c3160ab6001016054de251f39
> please let me know if any changes are needed.
>
> Dylan
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium/util: don't let children of fork & exec inherit our thread affinity

2018-09-14 Thread Nicholas Miell
On 09/12/2018 05:40 PM, Marek Olšák wrote:
> +static void
> +util_set_full_cpu_affinity(void)
> +{
> +   cpu_set_t cpuset;
> +
> +   CPU_ZERO();
> +   for (unsigned i = 0; i < CPU_SETSIZE; i++)
> +  CPU_SET(i, );
> +
> +   pthread_setaffinity_np(pthread_self(), sizeof(cpuset), );
> +}
>  
>  static void
> -util_init_cache_number(void)
> +util_init_thread_pinning(void)
>  {
> /* Get a semi-random number. */
> int64_t t = os_time_get_nano();
> L3_cache_number = (t ^ (t >> 8) ^ (t >> 16));
> +
> +   /* Reset thread affinity for all children of fork and exec to prevent
> +* spawned processes and threads from inheriting the current thread's
> +* affinity.
> +*
> +* What happens if a driver is unloaded and the app creates a thread?
> +*/
> +   pthread_atfork(NULL, NULL, util_set_full_cpu_affinity);
>  }
>  

You should probably save and restore the application's affinity mask
rather than assuming the mask is set to all CPUs.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] move pthread_setaffinity_np check to the build system

2018-09-14 Thread Dylan Baker
Quoting Dylan Baker (2018-09-13 11:41:38)
> Rather than trying to encode all of the rules in a header, lets just put
> them in the build system where they belong. This fixes the build on
> FreeBSD, which does have pthraed_setaffinity_np, but it's in a
> pthread_np.h, not behind _GNU_SOURCE. FreeBSD also implements cpu_set
> slightly differently, so additional changes would be required to get it
> working right there anyway.
> 
> Fixes: 9f1bbbdbbd77d346c74c7abbb31f399151a85713
>("util: try to fix the Android and MacOS build")
> Cc: Marek Ol\u0161ák 
> Cc: Emil Velikov 
> ---
>  configure.ac| 16 
>  meson.build |  7 +++
>  src/util/u_thread.h |  4 
>  3 files changed, 23 insertions(+), 4 deletions(-)
> 
> diff --git a/configure.ac b/configure.ac
> index f8bb131cb63..d10236dbead 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -968,6 +968,22 @@ if test "x$pthread_stubs_possible" = xyes; then
>  PKG_CHECK_MODULES(PTHREADSTUBS, pthread-stubs >= 0.4)
>  fi
>  
> +save_LIBS="$LIBS"
> +LIBS="$PTHREAD_LIBS"
> +AC_MSG_CHECKING(whether pthread_setaffinity_np is supported)
> +AC_LINK_IFELSE([AC_LANG_SOURCE([[
> +#define _GNU_SOURCE
> +#include 
> +int main() {
> +   void *a = (void*) _setaffinity_np;
> +   long b = (long) a;
> +   return (int) b;
> +}]])],
> +  [DEFINES="$DEFINES -DPTHREAD_SETAFFINITY"];

This should be -DHAVE_PTHREAD_SETAFFINITY, I've fixed this locally

> +   AC_MSG_RESULT([yes]),
> +   AC_MSG_RESULT([no]))
> +LIBS="$save_LIBS"
> +
>  dnl Check for futex for fast inline simple_mtx_t.
>  AC_CHECK_HEADER([linux/futex.h], [DEFINES="$DEFINES -DHAVE_LINUX_FUTEX_H"])
>  
> diff --git a/meson.build b/meson.build
> index 0d534b9b4a9..0588ebf8e7a 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -1070,6 +1070,13 @@ pre_args += '-DHAVE_ZLIB'
>  dep_thread = dependency('threads')
>  if dep_thread.found() and host_machine.system() != 'windows'
>pre_args += '-DHAVE_PTHREAD'
> +  if cc.has_function(
> +  'pthread_setaffinity_np',
> +  dependencies : dep_thread,
> +  prefix : '#include ',
> +  args : '-D_GNU_SOURCE')
> +pre_args += '-DHAVE_PTHREAD_SETAFFINITY'
> +  endif
>  endif
>  if with_amd_vk or with_gallium_radeonsi or with_gallium_r600 or 
> with_gallium_opencl
>dep_elf = dependency('libelf', required : false)
> diff --git a/src/util/u_thread.h b/src/util/u_thread.h
> index eee6f3c712d..7538d7d634b 100644
> --- a/src/util/u_thread.h
> +++ b/src/util/u_thread.h
> @@ -36,10 +36,6 @@
>  #include 
>  #endif
>  
> -#if defined(HAVE_PTHREAD) && !defined(ANDROID) && !defined(__APPLE__)
> -#define HAVE_PTHREAD_SETAFFINITY
> -#endif
> -
>  static inline thrd_t u_thread_create(int (*routine)(void *), void *param)
>  {
> thrd_t thread;
> -- 
> 2.19.0
> 


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600/sb: use safe math optimizatiosn when TGSI contains precise operations

2018-09-14 Thread Roland Scheidegger
I suppose ideally it would only affect instruction chains which have a
precise modifier somewhere. But it's better than just ignoring it
completely.

Reviewed-by: Roland Scheidegger 


Am 14.09.2018 um 16:56 schrieb Gert Wollny:
> Fixes:
>   dEQP-GLES3.functional.shaders.invariance.highp.common_subexpression_3
>   dEQP-GLES3.functional.shaders.invariance.mediump.common_subexpression_3
>   dEQP-GLES3.functional.shaders.invariance.lowp.common_subexpression_3
> 
> Signed-off-by: Gert Wollny 
> ---
>  src/gallium/drivers/r600/r600_asm.h  | 1 +
>  src/gallium/drivers/r600/r600_shader.c   | 3 +++
>  src/gallium/drivers/r600/sb/sb_bc_parser.cpp | 2 +-
>  3 files changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git a/src/gallium/drivers/r600/r600_asm.h 
> b/src/gallium/drivers/r600/r600_asm.h
> index 5841044bf8..ca9280a7a8 100644
> --- a/src/gallium/drivers/r600/r600_asm.h
> +++ b/src/gallium/drivers/r600/r600_asm.h
> @@ -277,6 +277,7 @@ struct r600_bytecode {
>   struct r600_bytecode_output pending_outputs[5];
>   int n_pending_outputs;
>   boolean need_wait_ack; /* emit a pending WAIT_ACK prior 
> to control flow */
> + boolean precise;
>  };
>  
>  /* eg_asm.c */
> diff --git a/src/gallium/drivers/r600/r600_shader.c 
> b/src/gallium/drivers/r600/r600_shader.c
> index 2229dc8fab..408939d110 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -3879,6 +3879,9 @@ static int r600_shader_from_tgsi(struct r600_context 
> *rctx,
>   ctx.inst_info = 
> _shader_tgsi_instruction[opcode];
>   else
>   ctx.inst_info = 
> _shader_tgsi_instruction[opcode];
> +
> + ctx.bc->precise |= 
> ctx.parse.FullToken.FullInstruction.Instruction.Precise;
> +
>   r = ctx.inst_info->process();
>   if (r)
>   goto out_err;
> diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp 
> b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
> index a7b828268b..eafc1cb8ec 100644
> --- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
> +++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
> @@ -75,7 +75,7 @@ int bc_parser::decode() {
>   }
>  
>   sh = new shader(ctx, t, bc->debug_id);
> - sh->safe_math = sb_context::safe_math || (t == TARGET_COMPUTE);
> + sh->safe_math = sb_context::safe_math || (t == TARGET_COMPUTE || 
> bc->precise);
>  
>   int r = decode_shader();
>  
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] move pthread_setaffinity_np check to the build system

2018-09-14 Thread Eric Engestrom
On Thursday, 2018-09-13 11:41:38 -0700, Dylan Baker wrote:
> Rather than trying to encode all of the rules in a header, lets just put
> them in the build system where they belong. This fixes the build on
> FreeBSD, which does have pthraed_setaffinity_np, but it's in a
> pthread_np.h, not behind _GNU_SOURCE. FreeBSD also implements cpu_set
> slightly differently, so additional changes would be required to get it
> working right there anyway.
> 
> Fixes: 9f1bbbdbbd77d346c74c7abbb31f399151a85713
>("util: try to fix the Android and MacOS build")
> Cc: Marek Olšák 
> Cc: Emil Velikov 
> ---
>  configure.ac| 16 
>  meson.build |  7 +++
>  src/util/u_thread.h |  4 
>  3 files changed, 23 insertions(+), 4 deletions(-)
> 
> diff --git a/configure.ac b/configure.ac
> index f8bb131cb63..d10236dbead 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -968,6 +968,22 @@ if test "x$pthread_stubs_possible" = xyes; then
>  PKG_CHECK_MODULES(PTHREADSTUBS, pthread-stubs >= 0.4)
>  fi
>  
> +save_LIBS="$LIBS"
> +LIBS="$PTHREAD_LIBS"
> +AC_MSG_CHECKING(whether pthread_setaffinity_np is supported)
> +AC_LINK_IFELSE([AC_LANG_SOURCE([[
> +#define _GNU_SOURCE
> +#include 
> +int main() {
> +   void *a = (void*) _setaffinity_np;
> +   long b = (long) a;
> +   return (int) b;
> +}]])],
> +  [DEFINES="$DEFINES -DPTHREAD_SETAFFINITY"];

-DHAVE_PTHREAD_SETAFFINITY

With that, and assuming the AC_LINK test code is correct:
Reviewed-by: Eric Engestrom 

> +   AC_MSG_RESULT([yes]),
> +   AC_MSG_RESULT([no]))
> +LIBS="$save_LIBS"
> +
>  dnl Check for futex for fast inline simple_mtx_t.
>  AC_CHECK_HEADER([linux/futex.h], [DEFINES="$DEFINES -DHAVE_LINUX_FUTEX_H"])
>  
> diff --git a/meson.build b/meson.build
> index 0d534b9b4a9..0588ebf8e7a 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -1070,6 +1070,13 @@ pre_args += '-DHAVE_ZLIB'
>  dep_thread = dependency('threads')
>  if dep_thread.found() and host_machine.system() != 'windows'
>pre_args += '-DHAVE_PTHREAD'
> +  if cc.has_function(
> +  'pthread_setaffinity_np',
> +  dependencies : dep_thread,
> +  prefix : '#include ',
> +  args : '-D_GNU_SOURCE')
> +pre_args += '-DHAVE_PTHREAD_SETAFFINITY'
> +  endif
>  endif
>  if with_amd_vk or with_gallium_radeonsi or with_gallium_r600 or 
> with_gallium_opencl
>dep_elf = dependency('libelf', required : false)
> diff --git a/src/util/u_thread.h b/src/util/u_thread.h
> index eee6f3c712d..7538d7d634b 100644
> --- a/src/util/u_thread.h
> +++ b/src/util/u_thread.h
> @@ -36,10 +36,6 @@
>  #include 
>  #endif
>  
> -#if defined(HAVE_PTHREAD) && !defined(ANDROID) && !defined(__APPLE__)
> -#define HAVE_PTHREAD_SETAFFINITY
> -#endif
> -
>  static inline thrd_t u_thread_create(int (*routine)(void *), void *param)
>  {
> thrd_t thread;
> -- 
> 2.19.0
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nvir: Always split 64-bit IMAD/IMUL operations

2018-09-14 Thread Dylan Baker
Quoting Pierre Moreau (2017-12-04 15:51:04)
> Those operations do not map to actual hardware instructions, therefore
> those should always be lowered to 32-bit instructions.
> 
> Fixes: 009c54aa7af "nv50/ir: Split 64-bit integer MAD/MUL operations"
> Signed-off-by: Pierre Moreau 
> ---
>  src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> index 61d4e6a2d0..14bdcea2ca 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> @@ -3794,7 +3794,7 @@ Program::optimizeSSA(int level)
> RUN_PASS(2, AlgebraicOpt, run);
> RUN_PASS(2, ModifierFolding, run); // before load propagation -> less 
> checks
> RUN_PASS(1, ConstantFolding, foldAll);
> -   RUN_PASS(1, Split64BitOpPreRA, run);
> +   RUN_PASS(0, Split64BitOpPreRA, run);
> RUN_PASS(1, LoadPropagation, run);
> RUN_PASS(1, IndirectPropagation, run);
> RUN_PASS(2, MemoryOpt, run);
> -- 
> 2.15.0
> 

Hi Pierre,

There was a small conflict when applying this to 18.1; I think my resolution is
correct, but you can see the version of the patch here:
https://gitlab.freedesktop.org/mesa/mesa/commit/649aff1a8788684c3160ab6001016054de251f39
please let me know if any changes are needed.

Dylan


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 107939] Commit 888b7fc causes performance regression

2018-09-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107939

Michel Dänzer  changed:

   What|Removed |Added

  Component|Drivers/Gallium/radeonsi|Drivers/Vulkan/radeon
   Assignee|dri-devel@lists.freedesktop |mesa-dev@lists.freedesktop.
   |.org|org
 QA Contact|dri-devel@lists.freedesktop |mesa-dev@lists.freedesktop.
   |.org|org

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/fs: Don't propagate conditional modifiers from integer compares to adds

2018-09-14 Thread Ian Romanick
On 09/14/2018 02:52 AM, Alejandro Piñeiro wrote:
> No shader-db changes, so perhaps adding a test on
> test_fs_cmod_propagation? In any case, the patch looks good to me:

I should have mentioned in the commit message, but I added a piglit test:

https://patchwork.freedesktop.org/patch/249182/

> Reviewed-by: Alejandro Piñeiro 
> 
> 
> On 14/09/18 00:06, Ian Romanick wrote:
>> From: Ian Romanick 
>>
>> No shader-db changes on any Intel platform... which probably explains
>> why no bugs have been bisected to this problem since it landed in Mesa
>> 18.1. :( The commit mentioned below is in 18.2, so 18.1 would need a
>> slightly different fix (due to code refactoring).
>>
>> Signed-off-by: Ian Romanick 
>> Fixes: 77f269bb560 "i965/fs: Refactor propagation of conditional modifiers 
>> from compares to adds"
>> Cc: Matt Turner  (reviewed the original patch)
>> Cc: Alejandro Piñeiro  (reviewed the original patch)
>> ---
>>  src/intel/compiler/brw_fs_cmod_propagation.cpp | 10 +-
>>  1 file changed, 9 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/intel/compiler/brw_fs_cmod_propagation.cpp 
>> b/src/intel/compiler/brw_fs_cmod_propagation.cpp
>> index 5b74f267359..5fb522f810f 100644
>> --- a/src/intel/compiler/brw_fs_cmod_propagation.cpp
>> +++ b/src/intel/compiler/brw_fs_cmod_propagation.cpp
>> @@ -211,9 +211,17 @@ opt_cmod_propagation_local(const gen_device_info 
>> *devinfo, bblock_t *block)
>>/* A CMP with a second source of zero can match with anything.  A CMP
>> * with a second source that is not zero can only match with an ADD
>> * instruction.
>> +   *
>> +   * Only apply this optimization to float-point sources.  It can fail 
>> for
>> +   * integers.  For inputs a = 0x8000, b = 4, int(0x8000) < 4, 
>> but
>> +   * int(0x8000) - 4 overflows and results in 0x7ffc.  that's 
>> not
>> +   * less than zero, so the flags get set differently than for (a < b).
>> */
>>if (inst->opcode == BRW_OPCODE_CMP && !inst->src[1].is_zero()) {
>> - progress = cmod_propagate_cmp_to_add(devinfo, block, inst) || 
>> progress;
>> + if (brw_reg_type_is_floating_point(inst->src[0].type) &&
>> + cmod_propagate_cmp_to_add(devinfo, block, inst))
>> +progress = true;
>> +
>>   continue;
>>}
>>  
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600/sb: use safe math optimizatiosn when TGSI contains precise operations

2018-09-14 Thread Gert Wollny
The typo in subject is fixed locally. 

Am Freitag, den 14.09.2018, 16:56 +0200 schrieb Gert Wollny:
> Fixes:
>   dEQP-
> GLES3.functional.shaders.invariance.highp.common_subexpression_3
>   dEQP-
> GLES3.functional.shaders.invariance.mediump.common_subexpression_3
>   dEQP-
> GLES3.functional.shaders.invariance.lowp.common_subexpression_3
> 
> Signed-off-by: Gert Wollny 
> ---
>  src/gallium/drivers/r600/r600_asm.h  | 1 +
>  src/gallium/drivers/r600/r600_shader.c   | 3 +++
>  src/gallium/drivers/r600/sb/sb_bc_parser.cpp | 2 +-
>  3 files changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git a/src/gallium/drivers/r600/r600_asm.h
> b/src/gallium/drivers/r600/r600_asm.h
> index 5841044bf8..ca9280a7a8 100644
> --- a/src/gallium/drivers/r600/r600_asm.h
> +++ b/src/gallium/drivers/r600/r600_asm.h
> @@ -277,6 +277,7 @@ struct r600_bytecode {
>   struct r600_bytecode_output pending_outputs[5];
>   int n_pending_outputs;
>   boolean need_wait_ack; /* emit a
> pending WAIT_ACK prior to control flow */
> + boolean precise;
>  };
>  
>  /* eg_asm.c */
> diff --git a/src/gallium/drivers/r600/r600_shader.c
> b/src/gallium/drivers/r600/r600_shader.c
> index 2229dc8fab..408939d110 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -3879,6 +3879,9 @@ static int r600_shader_from_tgsi(struct
> r600_context *rctx,
>   ctx.inst_info =
> _shader_tgsi_instruction[opcode];
>   else
>   ctx.inst_info =
> _shader_tgsi_instruction[opcode];
> +
> + ctx.bc->precise |=
> ctx.parse.FullToken.FullInstruction.Instruction.Precise;
> +
>   r = ctx.inst_info->process();
>   if (r)
>   goto out_err;
> diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
> b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
> index a7b828268b..eafc1cb8ec 100644
> --- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
> +++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
> @@ -75,7 +75,7 @@ int bc_parser::decode() {
>   }
>  
>   sh = new shader(ctx, t, bc->debug_id);
> - sh->safe_math = sb_context::safe_math || (t ==
> TARGET_COMPUTE);
> + sh->safe_math = sb_context::safe_math || (t ==
> TARGET_COMPUTE || bc->precise);
>  
>   int r = decode_shader();
>  
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2 (resend)] virgl: Pass resource size and transfer offsets

2018-09-14 Thread Gert Wollny
Am Freitag, den 14.09.2018, 15:26 +0300 schrieb andrey simiklit:
[...]
> > +   if (vcmd == VCMD_TRANSFER_PUT2)
> > +  vtest_hdr[VTEST_CMD_LEN] += data_size + 3 / 4;
> 
> Looks like a copy/paste mistake)
> I suppose that it is should be like:
> ... = (data_size + 3) / 4;
> or may be just:
> ... = data_size;
Good catch I'll have to check what is actually correct. Tomeu maybe you
could clarify? 

Best, 
Gert

>  
> > +
> > +   cmd[VCMD_TRANSFER2_RES_HANDLE] = handle;
> > +   cmd[VCMD_TRANSFER2_LEVEL] = level;
> > +   cmd[VCMD_TRANSFER2_X] = box->x;
> > +   cmd[VCMD_TRANSFER2_Y] = box->y;
> > +   cmd[VCMD_TRANSFER2_Z] = box->z;
> > +   cmd[VCMD_TRANSFER2_WIDTH] = box->width;
> > +   cmd[VCMD_TRANSFER2_HEIGHT] = box->height;
> > +   cmd[VCMD_TRANSFER2_DEPTH] = box->depth;
> > +   cmd[VCMD_TRANSFER2_DATA_SIZE] = data_size;
> > +   cmd[VCMD_TRANSFER2_OFFSET] = offset;
> > +   virgl_block_write(vws->sock_fd, _hdr, sizeof(vtest_hdr));
> > +   virgl_block_write(vws->sock_fd, , sizeof(cmd));
> > +
> > +   return 0;
> > +}
> > +
> > +int virgl_vtest_send_transfer_get(struct virgl_vtest_winsys *vws,
> > +  uint32_t handle,
> > +  uint32_t level, uint32_t stride,
> > +  uint32_t layer_stride,
> > +  const struct pipe_box *box,
> > +  uint32_t data_size,
> > +  uint32_t offset)
> > +{
> > +   if (vws->protocol_version < 1)
> > +  return virgl_vtest_send_transfer_cmd(vws, VCMD_TRANSFER_GET,
> > handle,
> > +   level, stride,
> > layer_stride, box,
> > +   data_size);
> > +
> > +   return virgl_vtest_send_transfer_cmd2(vws, VCMD_TRANSFER_GET2,
> > handle,
> > +level, box, data_size,
> > offset);
> > +}
> > +
> > +int virgl_vtest_send_transfer_put(struct virgl_vtest_winsys *vws,
> > +  uint32_t handle,
> > +  uint32_t level, uint32_t stride,
> > +  uint32_t layer_stride,
> > +  const struct pipe_box *box,
> > +  uint32_t data_size,
> > +  uint32_t offset)
> > +{
> > +   if (vws->protocol_version < 1)
> > +  return virgl_vtest_send_transfer_cmd(vws, VCMD_TRANSFER_PUT,
> > handle,
> > +   level, stride,
> > layer_stride, box,
> > +   data_size);
> > +
> > +   return virgl_vtest_send_transfer_cmd2(vws, VCMD_TRANSFER_PUT2,
> > handle,
> > +level, box, data_size,
> > offset);
> > +}
> > +
> >  int virgl_vtest_send_transfer_put_data(struct virgl_vtest_winsys
> > *vws,
> > void *data,
> > uint32_t data_size)
> > @@ -327,20 +437,27 @@ int virgl_vtest_recv_transfer_get_data(struct
> > virgl_vtest_winsys *vws,
> > uint32_t data_size,
> > uint32_t stride,
> > const struct pipe_box *box,
> > -   uint32_t format)
> > +   uint32_t format, uint32_t
> > res_stride)
> >  {
> > -   void *line;
> > -   void *ptr = data;
> > -   int hblocks = util_format_get_nblocksy(format, box->height);
> > -
> > -   line = malloc(stride);
> > -   while (hblocks) {
> > -  virgl_block_read(vws->sock_fd, line, stride);
> > -  memcpy(ptr, line, util_format_get_stride(format, box-
> > >width));
> > +   char *ptr = data;
> > +   uint32_t bytes_to_read = data_size;
> > +   char dump[1024];
> > +
> > +   /* Copy the date from the IOV to the target resource respecting
> > +* the different strides */
> > +   for (int y = 0 ; y < box->height && bytes_to_read > 0; ++y) {
> > +  uint32_t btr = MIN2(res_stride, bytes_to_read);
> > +  virgl_block_read(vws->sock_fd, ptr, btr);
> >ptr += stride;
> > -  hblocks--;
> > +  bytes_to_read -= btr;
> > +   }
> > +
> > +   /* It seems that there may be extra bytes that need to be read
> > */
> > +   while (bytes_to_read > 0 && bytes_to_read < data_size) {
> > +  uint32_t btr = MIN2(sizeof(dump), bytes_to_read);
> > +  virgl_block_read(vws->sock_fd, dump, btr);
> > +  bytes_to_read -= btr;
> > }
> > -   free(line);
> > return 0;
> >  }
> > 
> > diff --git a/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c
> > b/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c
> > index 6c03a6b359..52a5245b6a 100644
> > --- a/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c
> > +++ b/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c
> > @@ -79,9 +79,13 @@ virgl_vtest_transfer_put(struct 

[Mesa-dev] [PATCH] r600/sb: use safe math optimizatiosn when TGSI contains precise operations

2018-09-14 Thread Gert Wollny
Fixes:
  dEQP-GLES3.functional.shaders.invariance.highp.common_subexpression_3
  dEQP-GLES3.functional.shaders.invariance.mediump.common_subexpression_3
  dEQP-GLES3.functional.shaders.invariance.lowp.common_subexpression_3

Signed-off-by: Gert Wollny 
---
 src/gallium/drivers/r600/r600_asm.h  | 1 +
 src/gallium/drivers/r600/r600_shader.c   | 3 +++
 src/gallium/drivers/r600/sb/sb_bc_parser.cpp | 2 +-
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/r600_asm.h 
b/src/gallium/drivers/r600/r600_asm.h
index 5841044bf8..ca9280a7a8 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -277,6 +277,7 @@ struct r600_bytecode {
struct r600_bytecode_output pending_outputs[5];
int n_pending_outputs;
boolean need_wait_ack; /* emit a pending WAIT_ACK prior 
to control flow */
+   boolean precise;
 };
 
 /* eg_asm.c */
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 2229dc8fab..408939d110 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -3879,6 +3879,9 @@ static int r600_shader_from_tgsi(struct r600_context 
*rctx,
ctx.inst_info = 
_shader_tgsi_instruction[opcode];
else
ctx.inst_info = 
_shader_tgsi_instruction[opcode];
+
+   ctx.bc->precise |= 
ctx.parse.FullToken.FullInstruction.Instruction.Precise;
+
r = ctx.inst_info->process();
if (r)
goto out_err;
diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp 
b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
index a7b828268b..eafc1cb8ec 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
@@ -75,7 +75,7 @@ int bc_parser::decode() {
}
 
sh = new shader(ctx, t, bc->debug_id);
-   sh->safe_math = sb_context::safe_math || (t == TARGET_COMPUTE);
+   sh->safe_math = sb_context::safe_math || (t == TARGET_COMPUTE || 
bc->precise);
 
int r = decode_shader();
 
-- 
2.16.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 107547] shader crashing glsl_compiler (uniform block assigned to vec2, then component substraced by 1)

2018-09-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107547

--- Comment #4 from Sergii Romantsov  ---
One more version:
https://patchwork.freedesktop.org/series/48256/

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v1] glsl: missed error_emitted for do_assignment

2018-09-14 Thread Sergii Romantsov
Seems that patch is simplified version of already exist one:
https://patchwork.freedesktop.org/series/48256/

On Fri, Sep 14, 2018 at 4:39 PM, Sergii Romantsov <
sergii.romant...@gmail.com> wrote:

> During do_assignment a validation of rhs may fail.
> Because of lack error_emitted an error_value may not be generated.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107547
> Signed-off-by: Sergii Romantsov 
> ---
>  src/compiler/glsl/ast_to_hir.cpp | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_
> hir.cpp
> index 5d3f10b..da1654b 100644
> --- a/src/compiler/glsl/ast_to_hir.cpp
> +++ b/src/compiler/glsl/ast_to_hir.cpp
> @@ -1013,6 +1013,8 @@ do_assignment(exec_list *instructions, struct
> _mesa_glsl_parse_state *state,
>   mark_whole_array_access(lhs);
>}
> }
> +   else
> +  error_emitted = true;
>
> /* Most callers of do_assignment (assign, add_assign, pre_inc/dec,
>  * but not post_inc) need the converted assigned value as an rvalue
> --
> 2.7.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>



-- 
Sergii Romantsov
GlobalLogic Inc.
www.globallogic.com
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 97516] GLX_OML_swap_method not fully supported

2018-09-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=97516

--- Comment #3 from Sven Arvidsson  ---
As a temporary workaround to get the game Brink running I disabled the
GLX_OML_swap_method. Wine has a check, and will ignore the attribute if the
extension is missing.

Brink starts and runs, though I'm not sure what the possible side effects are?

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 107547] shader crashing glsl_compiler (uniform block assigned to vec2, then component substraced by 1)

2018-09-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107547

--- Comment #3 from Sergii Romantsov  ---
Proposed patch:
https://patchwork.freedesktop.org/patch/249568/

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v1] glsl: missed error_emitted for do_assignment

2018-09-14 Thread Sergii Romantsov
During do_assignment a validation of rhs may fail.
Because of lack error_emitted an error_value may not be generated.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107547
Signed-off-by: Sergii Romantsov 
---
 src/compiler/glsl/ast_to_hir.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp
index 5d3f10b..da1654b 100644
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -1013,6 +1013,8 @@ do_assignment(exec_list *instructions, struct 
_mesa_glsl_parse_state *state,
  mark_whole_array_access(lhs);
   }
}
+   else
+  error_emitted = true;
 
/* Most callers of do_assignment (assign, add_assign, pre_inc/dec,
 * but not post_inc) need the converted assigned value as an rvalue
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 107391] feature request: enforceable vsync and anisotropic filtering via environment variables

2018-09-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107391

--- Comment #2 from tempel.jul...@gmail.com ---
DXVK now also supports enforcing vsync on/off. :)
However, it would still be nice to also have these options for outside of DXVK
since native Linux apps might not offer full control to the user either
(admittedly less likely).

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 107391] feature request: enforceable vsync and anisotropic filtering via environment variables

2018-09-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107391

--- Comment #1 from Samuel Pitoiset  ---
DXVK now allows to force AF, so I guess the only feature you want is vsync?

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 106769] radv: add support for shaderStorageImageMultisample

2018-09-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=106769

Samuel Pitoiset  changed:

   What|Removed |Added

Summary|radv: Support   |radv: add support for
   |VK_EXT_shader_viewport_inde |shaderStorageImageMultisamp
   |x_layer and |le
   |shaderStorageImageMultisamp |
   |le  |

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radv: fix descriptor pool allocation size

2018-09-14 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 
On Fri, Sep 14, 2018 at 2:55 PM Samuel Pitoiset
 wrote:
>
> The size has to be multiplied by the number of sets.
>
> This gets rid of the OUT_OF_POOL_KHR error and fixes
> the Tangrams demo.
>
> CC: 18.1 18.2 
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_descriptor_set.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/src/amd/vulkan/radv_descriptor_set.c 
> b/src/amd/vulkan/radv_descriptor_set.c
> index c4341f6ac5..49d0811bb0 100644
> --- a/src/amd/vulkan/radv_descriptor_set.c
> +++ b/src/amd/vulkan/radv_descriptor_set.c
> @@ -569,9 +569,10 @@ VkResult radv_CreateDescriptorPool(
> }
>
> if (!(pCreateInfo->flags & 
> VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT)) {
> -   uint64_t host_size = pCreateInfo->maxSets * sizeof(struct 
> radv_descriptor_set);
> +   uint64_t host_size = sizeof(struct radv_descriptor_set);
> host_size += sizeof(struct radeon_winsys_bo*) * bo_count;
> host_size += sizeof(struct radv_descriptor_range) * 
> range_count;
> +   host_size *= pCreateInfo->maxSets;
> size += host_size;
> } else {
> size += sizeof(struct radv_descriptor_pool_entry) * 
> pCreateInfo->maxSets;
> --
> 2.19.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 106922] Tangrams demo: LLVM ERROR: Cannot select: 0x7e8d8750: i16 = bitcast 0x7e8d8af8

2018-09-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=106922

--- Comment #6 from Samuel Pitoiset  ---
The demo should now work if you apply:

https://patchwork.freedesktop.org/series/49701/
https://patchwork.freedesktop.org/series/49710/

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radv: fix descriptor pool allocation size

2018-09-14 Thread Samuel Pitoiset
The size has to be multiplied by the number of sets.

This gets rid of the OUT_OF_POOL_KHR error and fixes
the Tangrams demo.

CC: 18.1 18.2 
Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_descriptor_set.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/amd/vulkan/radv_descriptor_set.c 
b/src/amd/vulkan/radv_descriptor_set.c
index c4341f6ac5..49d0811bb0 100644
--- a/src/amd/vulkan/radv_descriptor_set.c
+++ b/src/amd/vulkan/radv_descriptor_set.c
@@ -569,9 +569,10 @@ VkResult radv_CreateDescriptorPool(
}
 
if (!(pCreateInfo->flags & 
VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT)) {
-   uint64_t host_size = pCreateInfo->maxSets * sizeof(struct 
radv_descriptor_set);
+   uint64_t host_size = sizeof(struct radv_descriptor_set);
host_size += sizeof(struct radeon_winsys_bo*) * bo_count;
host_size += sizeof(struct radv_descriptor_range) * range_count;
+   host_size *= pCreateInfo->maxSets;
size += host_size;
} else {
size += sizeof(struct radv_descriptor_pool_entry) * 
pCreateInfo->maxSets;
-- 
2.19.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH mesa v2] vulkan/wsi/display: check if wsi_swapchain_init() succeeded

2018-09-14 Thread Jason Ekstrand

Rb

On September 14, 2018 04:12:44 Eric Engestrom  wrote:

Fixes: da997ebec929421939553 "vulkan: Add KHR_display extension using DRM 
[v10]"

Cc: Keith Packard 
Cc: Jason Ekstrand 
Signed-off-by: Eric Engestrom 
---
v2: don't forget to free the swapchain too (Jason)
---
src/vulkan/wsi/wsi_common_display.c | 4 
1 file changed, 4 insertions(+)

diff --git a/src/vulkan/wsi/wsi_common_display.c 
b/src/vulkan/wsi/wsi_common_display.c

index 1e90bba460cba190c5ab..338fad6be792385db9f4 100644
--- a/src/vulkan/wsi/wsi_common_display.c
+++ b/src/vulkan/wsi/wsi_common_display.c
@@ -1712,6 +1712,10 @@ wsi_display_surface_create_swapchain(

   VkResult result = wsi_swapchain_init(wsi_device, >base, device,
create_info, allocator);
+   if (result != VK_SUCCESS) {
+  vk_free(allocator, chain);
+  return result;
+   }

   chain->base.destroy = wsi_display_swapchain_destroy;
   chain->base.get_wsi_image = wsi_display_get_wsi_image;
--
Cheers,
 Eric




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 107351] Android 8.1: radv segfault with 3Dmark vulkan benchmarks

2018-09-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107351

Samuel Pitoiset  changed:

   What|Removed |Added

 Resolution|--- |FIXED
 Status|NEW |RESOLVED

--- Comment #16 from Samuel Pitoiset  ---
Thanks for letting us know. Closing!

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 107923] build_id.c:126: multiple definition of `build_id_length'

2018-09-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107923

--- Comment #1 from Sergii Romantsov  ---
Hello, Vinson,
could you, please, specify your build-configuration?
I don't see such output.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2 (resend)] virgl: Pass resource size and transfer offsets

2018-09-14 Thread andrey simiklit
Hello,

Please find my comment below:

Regards,
Andrii.
On Fri, Sep 14, 2018 at 12:23 PM Gert Wollny 
wrote:

> From: Tomeu Vizoso 
>
> Pass the size of a resource when creating it so a backing can be kept in
> the other side.
>
> Also pass the required offset to transfer commands.
>
> This moves vtest closer to how virtio-gpu works, making it more useful
> for testing.
>
> v2: - Use new messages for creation and transfers, as changing the
>   behavior of the existing messages would be messy given that we don't
>   want to break compatibility with older servers.
>
> v3: - Gert: Use correct strides: The resource corresponding to the output
>   display might have a differnt line stride then the IOVs, so when
>   reading back to this resource take the resource stride and the the
>   IOV stride into account.
>
> Signed-off-by: Tomeu Vizoso  (v2)
> Signed-off-by: Gert Wollny 
> ---
>  .../winsys/virgl/vtest/virgl_vtest_socket.c| 143
> +++--
>  .../winsys/virgl/vtest/virgl_vtest_winsys.c|  38 --
>  .../winsys/virgl/vtest/virgl_vtest_winsys.h|  19 ++-
>  src/gallium/winsys/virgl/vtest/vtest_protocol.h|  29 +
>  4 files changed, 201 insertions(+), 28 deletions(-)
>
> diff --git a/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c
> b/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c
> index 4d20a63ad6..3aa01aabdf 100644
> --- a/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c
> +++ b/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c
> @@ -221,6 +221,42 @@ int virgl_vtest_send_get_caps(struct
> virgl_vtest_winsys *vws,
> return 0;
>  }
>
> +static int virgl_vtest_send_resource_create2(struct virgl_vtest_winsys
> *vws,
> + uint32_t handle,
> + enum pipe_texture_target
> target,
> + uint32_t format,
> + uint32_t bind,
> + uint32_t width,
> + uint32_t height,
> + uint32_t depth,
> + uint32_t array_size,
> + uint32_t last_level,
> + uint32_t nr_samples,
> + uint32_t size)
> +{
> +   uint32_t res_create_buf[VCMD_RES_CREATE2_SIZE],
> vtest_hdr[VTEST_HDR_SIZE];
> +
> +   vtest_hdr[VTEST_CMD_LEN] = VCMD_RES_CREATE2_SIZE;
> +   vtest_hdr[VTEST_CMD_ID] = VCMD_RESOURCE_CREATE2;
> +
> +   res_create_buf[VCMD_RES_CREATE2_RES_HANDLE] = handle;
> +   res_create_buf[VCMD_RES_CREATE2_TARGET] = target;
> +   res_create_buf[VCMD_RES_CREATE2_FORMAT] = format;
> +   res_create_buf[VCMD_RES_CREATE2_BIND] = bind;
> +   res_create_buf[VCMD_RES_CREATE2_WIDTH] = width;
> +   res_create_buf[VCMD_RES_CREATE2_HEIGHT] = height;
> +   res_create_buf[VCMD_RES_CREATE2_DEPTH] = depth;
> +   res_create_buf[VCMD_RES_CREATE2_ARRAY_SIZE] = array_size;
> +   res_create_buf[VCMD_RES_CREATE2_LAST_LEVEL] = last_level;
> +   res_create_buf[VCMD_RES_CREATE2_NR_SAMPLES] = nr_samples;
> +   res_create_buf[VCMD_RES_CREATE2_DATA_SIZE] = size;
> +
> +   virgl_block_write(vws->sock_fd, _hdr, sizeof(vtest_hdr));
> +   virgl_block_write(vws->sock_fd, _create_buf,
> sizeof(res_create_buf));
> +
> +   return 0;
> +}
> +
>  int virgl_vtest_send_resource_create(struct virgl_vtest_winsys *vws,
>   uint32_t handle,
>   enum pipe_texture_target target,
> @@ -231,10 +267,17 @@ int virgl_vtest_send_resource_create(struct
> virgl_vtest_winsys *vws,
>   uint32_t depth,
>   uint32_t array_size,
>   uint32_t last_level,
> - uint32_t nr_samples)
> + uint32_t nr_samples,
> + uint32_t size)
>  {
> uint32_t res_create_buf[VCMD_RES_CREATE_SIZE],
> vtest_hdr[VTEST_HDR_SIZE];
>
> +   if (vws->protocol_version >= 1)
> +  return virgl_vtest_send_resource_create2(vws, handle, target,
> format,
> +   bind, width, height, depth,
> +   array_size, last_level,
> +   nr_samples, size);
> +
> vtest_hdr[VTEST_CMD_LEN] = VCMD_RES_CREATE_SIZE;
> vtest_hdr[VTEST_CMD_ID] = VCMD_RESOURCE_CREATE;
>
> @@ -282,7 +325,7 @@ int virgl_vtest_send_resource_unref(struct
> virgl_vtest_winsys *vws,
> return 0;
>  }
>
> -int virgl_vtest_send_transfer_cmd(struct virgl_vtest_winsys *vws,
> +static int virgl_vtest_send_transfer_cmd(struct virgl_vtest_winsys *vws,
>uint32_t vcmd,
>   

Re: [Mesa-dev] [PATCH 9/9] radv: enable shaderInt16 capability

2018-09-14 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

for the series.
On Fri, Sep 14, 2018 at 12:51 PM Samuel Pitoiset
 wrote:
>
> Not sure if this is all wired up. CTS does pass and the Tangrams
> demo seems to work (though with a local hack that is unrelated
> to 16-bit integer).
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_device.c | 2 +-
>  src/amd/vulkan/radv_shader.c | 1 +
>  2 files changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index 8989ec3553..af7754bea3 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -734,7 +734,7 @@ void radv_GetPhysicalDeviceFeatures(
> .shaderCullDistance   = true,
> .shaderFloat64= true,
> .shaderInt64  = true,
> -   .shaderInt16  = false,
> +   .shaderInt16  = true,
> .sparseBinding= true,
> .variableMultisampleRate  = true,
> .inheritedQueries = true,
> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
> index e05961339c..fc2033d6a9 100644
> --- a/src/amd/vulkan/radv_shader.c
> +++ b/src/amd/vulkan/radv_shader.c
> @@ -210,6 +210,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
> .image_write_without_format = true,
> .tessellation = true,
> .int64 = true,
> +   .int16 = true,
> .multiview = true,
> .subgroup_ballot = true,
> .subgroup_basic = true,
> --
> 2.19.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/9] ac: add 16-bit support to ac_build_umsb()

2018-09-14 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_llvm_build.c | 18 --
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 71723d5e91..905146c9f2 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1393,17 +1393,31 @@ ac_build_umsb(struct ac_llvm_context *ctx,
LLVMTypeRef type;
LLVMValueRef highest_bit;
LLVMValueRef zero;
+   unsigned bitsize;
 
-   if (ac_get_elem_bits(ctx, LLVMTypeOf(arg)) == 64) {
+   bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(arg));
+   switch (bitsize) {
+   case 64:
intrin_name = "llvm.ctlz.i64";
type = ctx->i64;
highest_bit = LLVMConstInt(ctx->i64, 63, false);
zero = ctx->i64_0;
-   } else {
+   break;
+   case 32:
intrin_name = "llvm.ctlz.i32";
type = ctx->i32;
highest_bit = LLVMConstInt(ctx->i32, 31, false);
zero = ctx->i32_0;
+   break;
+   case 16:
+   intrin_name = "llvm.ctlz.i16";
+   type = ctx->i16;
+   highest_bit = LLVMConstInt(ctx->i16, 15, false);
+   zero = ctx->i16_0;
+   break;
+   default:
+   unreachable(!"invalid bitsize");
+   break;
}
 
LLVMValueRef params[2] = {
-- 
2.19.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/9] ac: add ac_build_bifield_reverse() helper

2018-09-14 Thread Samuel Pitoiset
Are we missing 64-bit support?

Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_llvm_build.c  | 22 ++
 src/amd/common/ac_llvm_build.h  |  3 +++
 src/amd/common/ac_nir_to_llvm.c |  2 +-
 3 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 4fbe0ddb9c..a99314266c 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -2135,6 +2135,28 @@ LLVMValueRef ac_build_bit_count(struct ac_llvm_context 
*ctx, LLVMValueRef src0)
return result;
 }
 
+LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx,
+  LLVMValueRef src0)
+{
+   LLVMValueRef result;
+   unsigned bitsize;
+
+   bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
+
+   switch (bitsize) {
+   case 32:
+   result = ac_build_intrinsic(ctx, "llvm.bitreverse.i32", 
ctx->i32,
+   (LLVMValueRef []) { src0 }, 1,
+   AC_FUNC_ATTR_READNONE);
+   break;
+   default:
+   unreachable(!"invalid bitsize");
+   break;
+   }
+
+   return result;
+}
+
 #define AC_EXP_TARGET  0
 #define AC_EXP_ENABLED_CHANNELS 1
 #define AC_EXP_OUT02
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 98a61a2405..8524d1fa76 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -426,6 +426,9 @@ LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, 
LLVMValueRef src0,
 
 LLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef 
src0);
 
+LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx,
+  LLVMValueRef src0);
+
 void ac_optimize_vs_outputs(struct ac_llvm_context *ac,
LLVMValueRef main_fn,
uint8_t *vs_output_param_offset,
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 39489a4b3b..3e88ae66ec 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -836,7 +836,7 @@ static void visit_alu(struct ac_nir_context *ctx, const 
nir_alu_instr *instr)
result = emit_bitfield_insert(>ac, src[0], src[1], src[2], 
src[3]);
break;
case nir_op_bitfield_reverse:
-   result = ac_build_intrinsic(>ac, "llvm.bitreverse.i32", 
ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE);
+   result = ac_build_bitfield_reverse(>ac, src[0]);
break;
case nir_op_bit_count:
result = ac_build_bit_count(>ac, src[0]);
-- 
2.19.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 8/9] ac: add 16-bit support to ac_build_bitfield_reverse()

2018-09-14 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_llvm_build.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 98635ec25a..ab0ba09c83 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -2181,6 +2181,11 @@ LLVMValueRef ac_build_bitfield_reverse(struct 
ac_llvm_context *ctx,
(LLVMValueRef []) { src0 }, 1,
AC_FUNC_ATTR_READNONE);
break;
+   case 16:
+   result = ac_build_intrinsic(ctx, "llvm.bitreverse.i16", 
ctx->i16,
+   (LLVMValueRef []) { src0 }, 1,
+   AC_FUNC_ATTR_READNONE);
+   break;
default:
unreachable(!"invalid bitsize");
break;
-- 
2.19.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 9/9] radv: enable shaderInt16 capability

2018-09-14 Thread Samuel Pitoiset
Not sure if this is all wired up. CTS does pass and the Tangrams
demo seems to work (though with a local hack that is unrelated
to 16-bit integer).

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_device.c | 2 +-
 src/amd/vulkan/radv_shader.c | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 8989ec3553..af7754bea3 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -734,7 +734,7 @@ void radv_GetPhysicalDeviceFeatures(
.shaderCullDistance   = true,
.shaderFloat64= true,
.shaderInt64  = true,
-   .shaderInt16  = false,
+   .shaderInt16  = true,
.sparseBinding= true,
.variableMultisampleRate  = true,
.inheritedQueries = true,
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index e05961339c..fc2033d6a9 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -210,6 +210,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
.image_write_without_format = true,
.tessellation = true,
.int64 = true,
+   .int16 = true,
.multiview = true,
.subgroup_ballot = true,
.subgroup_basic = true,
-- 
2.19.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 7/9] ac: add 16-bit support to ac_build_bit_count()

2018-09-14 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_llvm_build.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 6955df48e0..98635ec25a 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -2154,6 +2154,11 @@ LLVMValueRef ac_build_bit_count(struct ac_llvm_context 
*ctx, LLVMValueRef src0)
(LLVMValueRef []) { src0 }, 1,
AC_FUNC_ATTR_READNONE);
break;
+   case 16:
+   result = ac_build_intrinsic(ctx, "llvm.ctpop.i16", ctx->i16,
+   (LLVMValueRef []) { src0 }, 1,
+   AC_FUNC_ATTR_READNONE);
+   break;
default:
unreachable(!"invalid bitsize");
break;
-- 
2.19.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/9] ac: add 16-bit support to ac_find_lsb()

2018-09-14 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_llvm_build.c | 15 +--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 905146c9f2..6955df48e0 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -2499,14 +2499,25 @@ LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx,
const char *intrin_name;
LLVMTypeRef type;
LLVMValueRef zero;
-   if (src0_bitsize == 64) {
+
+   switch (src0_bitsize) {
+   case 64:
intrin_name = "llvm.cttz.i64";
type = ctx->i64;
zero = ctx->i64_0;
-   } else {
+   break;
+   case 32:
intrin_name = "llvm.cttz.i32";
type = ctx->i32;
zero = ctx->i32_0;
+   break;
+   case 16:
+   intrin_name = "llvm.cttz.i16";
+   type = ctx->i16;
+   zero = ctx->i16_0;
+   break;
+   default:
+   unreachable(!"invalid bitsize");
}
 
LLVMValueRef params[2] = {
-- 
2.19.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/9] ac: add 16-bit support to ac_build_isign()

2018-09-14 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_llvm_build.c | 21 -
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 011cea5cd0..71723d5e91 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -2069,14 +2069,25 @@ LLVMValueRef ac_build_isign(struct ac_llvm_context 
*ctx, LLVMValueRef src0,
LLVMValueRef cmp, val, zero, one;
LLVMTypeRef type;
 
-   if (bitsize == 32) {
-   type = ctx->i32;
-   zero = ctx->i32_0;
-   one = ctx->i32_1;
-   } else {
+   switch (bitsize) {
+   case 64:
type = ctx->i64;
zero = ctx->i64_0;
one = ctx->i64_1;
+   break;
+   case 32:
+   type = ctx->i32;
+   zero = ctx->i32_0;
+   one = ctx->i32_1;
+   break;
+   case 16:
+   type = ctx->i16;
+   zero = ctx->i16_0;
+   one = ctx->i16_1;
+   break;
+   default:
+   unreachable(!"invalid bitsize");
+   break;
}
 
cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, src0, zero, "");
-- 
2.19.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/9] ac: add 16-bit constant values for zero and one

2018-09-14 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_llvm_build.c | 2 ++
 src/amd/common/ac_llvm_build.h | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index a99314266c..011cea5cd0 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -87,6 +87,8 @@ ac_llvm_context_init(struct ac_llvm_context *ctx,
ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
 
+   ctx->i16_0 = LLVMConstInt(ctx->i16, 0, false);
+   ctx->i16_1 = LLVMConstInt(ctx->i16, 1, false);
ctx->i32_0 = LLVMConstInt(ctx->i32, 0, false);
ctx->i32_1 = LLVMConstInt(ctx->i32, 1, false);
ctx->i64_0 = LLVMConstInt(ctx->i64, 0, false);
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 8524d1fa76..0df9234c66 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -74,6 +74,8 @@ struct ac_llvm_context {
LLVMTypeRef v4f32;
LLVMTypeRef v8i32;
 
+   LLVMValueRef i16_0;
+   LLVMValueRef i16_1;
LLVMValueRef i32_0;
LLVMValueRef i32_1;
LLVMValueRef i64_0;
-- 
2.19.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/9] ac: add ac_build_bit_count() helper

2018-09-14 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_llvm_build.c  | 28 
 src/amd/common/ac_llvm_build.h  |  2 ++
 src/amd/common/ac_nir_to_llvm.c |  7 +--
 3 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 1f5112e992..4fbe0ddb9c 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -2107,6 +2107,34 @@ LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, 
LLVMValueRef src0,
return val;
 }
 
+LLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0)
+{
+   LLVMValueRef result;
+   unsigned bitsize;
+
+   bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
+
+   switch (bitsize) {
+   case 64:
+   result = ac_build_intrinsic(ctx, "llvm.ctpop.i64", ctx->i64,
+   (LLVMValueRef []) { src0 }, 1,
+   AC_FUNC_ATTR_READNONE);
+
+   result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, "");
+   break;
+   case 32:
+   result = ac_build_intrinsic(ctx, "llvm.ctpop.i32", ctx->i32,
+   (LLVMValueRef []) { src0 }, 1,
+   AC_FUNC_ATTR_READNONE);
+   break;
+   default:
+   unreachable(!"invalid bitsize");
+   break;
+   }
+
+   return result;
+}
+
 #define AC_EXP_TARGET  0
 #define AC_EXP_ENABLED_CHANNELS 1
 #define AC_EXP_OUT02
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 0d261bae09..98a61a2405 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -424,6 +424,8 @@ LLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, 
LLVMValueRef src0,
 LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src0,
unsigned bitsize);
 
+LLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef 
src0);
+
 void ac_optimize_vs_outputs(struct ac_llvm_context *ac,
LLVMValueRef main_fn,
uint8_t *vs_output_param_offset,
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 18644107eb..39489a4b3b 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -839,12 +839,7 @@ static void visit_alu(struct ac_nir_context *ctx, const 
nir_alu_instr *instr)
result = ac_build_intrinsic(>ac, "llvm.bitreverse.i32", 
ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE);
break;
case nir_op_bit_count:
-   if (ac_get_elem_bits(>ac, LLVMTypeOf(src[0])) == 32)
-   result = ac_build_intrinsic(>ac, "llvm.ctpop.i32", 
ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE);
-   else {
-   result = ac_build_intrinsic(>ac, "llvm.ctpop.i64", 
ctx->ac.i64, src, 1, AC_FUNC_ATTR_READNONE);
-   result = LLVMBuildTrunc(ctx->ac.builder, result, 
ctx->ac.i32, "");
-   }
+   result = ac_build_bit_count(>ac, src[0]);
break;
case nir_op_vec2:
case nir_op_vec3:
-- 
2.19.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] anv: add support for VK_EXT_inline_uniform_block

2018-09-14 Thread Tapani Pälli
I can't say I know enough of all these parts but I went through the API 
functions and tried to check that you have proper checks in place. Will 
try to still review :)


I did not see any check against MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 
when creating pipeline layout. I'm not sure if such is necessary (since 
it's implicit rule), do you think there should there be check/assert?


one minor possible addition below ..

On 11.09.2018 23:22, Lionel Landwerlin wrote:

This new extension adds an implicitly allocated block of uniforms into
the descriptors sets through a new descriptor type. > We implement this by 
having a single BO in the descriptor set pool
from which we source uniforms.

Signed-off-by: Lionel Landwerlin 
---
  src/intel/vulkan/anv_cmd_buffer.c |   3 +
  src/intel/vulkan/anv_descriptor_set.c | 238 +-
  src/intel/vulkan/anv_device.c |  22 ++
  src/intel/vulkan/anv_extensions.py|   1 +
  .../vulkan/anv_nir_apply_pipeline_layout.c|  52 
  src/intel/vulkan/anv_private.h|  33 +++
  src/intel/vulkan/genX_cmd_buffer.c|  32 ++-
  7 files changed, 367 insertions(+), 14 deletions(-)

diff --git a/src/intel/vulkan/anv_cmd_buffer.c 
b/src/intel/vulkan/anv_cmd_buffer.c
index 8ef71b0ed9c..b14be94f470 100644
--- a/src/intel/vulkan/anv_cmd_buffer.c
+++ b/src/intel/vulkan/anv_cmd_buffer.c
@@ -651,6 +651,7 @@ anv_isl_format_for_descriptor_type(VkDescriptorType type)
 switch (type) {
 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+   case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
return ISL_FORMAT_R32G32B32A32_FLOAT;
  
 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:

@@ -1039,6 +1040,8 @@ void anv_CmdPushDescriptorSetKHR(
   }
   break;
  
+  case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:

+ unreachable("Invalid descriptor type for push descriptors");
default:
   break;
}
diff --git a/src/intel/vulkan/anv_descriptor_set.c 
b/src/intel/vulkan/anv_descriptor_set.c
index 3439f828900..2e5f2a1f288 100644
--- a/src/intel/vulkan/anv_descriptor_set.c
+++ b/src/intel/vulkan/anv_descriptor_set.c
@@ -26,8 +26,10 @@
  #include 
  #include 
  #include 
+#include 
  
  #include "util/mesa-sha1.h"

+#include "vk_util.h"
  
  #include "anv_private.h"
  
@@ -40,7 +42,8 @@ void anv_GetDescriptorSetLayoutSupport(

  const VkDescriptorSetLayoutCreateInfo*  pCreateInfo,
  VkDescriptorSetLayoutSupport*   pSupport)
  {
-   uint32_t surface_count[MESA_SHADER_STAGES] = { 0, };
+   int16_t surface_count[MESA_SHADER_STAGES] = { 0, };
+   int16_t inline_surface_indexes[MESA_SHADER_STAGES] = { -1, };
  
 for (uint32_t b = 0; b < pCreateInfo->bindingCount; b++) {

const VkDescriptorSetLayoutBinding *binding = 
>pBindings[b];
@@ -50,6 +53,15 @@ void anv_GetDescriptorSetLayoutSupport(
   /* There is no real limit on samplers */
   break;
  
+  case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:

+ anv_foreach_stage(s, binding->stageFlags) {
+if (inline_surface_indexes[s] < 0) {
+   inline_surface_indexes[s] = surface_count[s];
+   surface_count[s] += 1;
+}
+ }
+ break;
+
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
   if (binding->pImmutableSamplers) {
  for (uint32_t i = 0; i < binding->descriptorCount; i++) {
@@ -118,6 +130,9 @@ VkResult anv_CreateDescriptorSetLayout(
 memset(set_layout, 0, sizeof(*set_layout));
 set_layout->ref_cnt = 1;
 set_layout->binding_count = max_binding + 1;
+   set_layout->inline_blocks_descriptor_index = -1;
+   memset(set_layout->inline_blocks_surface_indexes,
+  -1, sizeof(set_layout->inline_blocks_surface_indexes));
  
 for (uint32_t b = 0; b <= max_binding; b++) {

/* Initialize all binding_layout entries to -1 */
@@ -159,9 +174,24 @@ VkResult anv_CreateDescriptorSetLayout(
  #ifndef NDEBUG
set_layout->binding[b].type = binding->descriptorType;
  #endif
-  set_layout->binding[b].array_size = binding->descriptorCount;
-  set_layout->binding[b].descriptor_index = set_layout->size;
-  set_layout->size += binding->descriptorCount;
+
+  if (binding->descriptorType == 
VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {


Maybe add here

assert(binding->descriptorCount % 4 == 0 &&
   binding->descriptorCount <= MAX_INLINE_UNIFORM_BLOCK_SIZE);

?


+ /* We only a single descriptor entry for all the inline uniforms. */
+ set_layout->binding[b].array_size = 1;
+ if (set_layout->inline_blocks_descriptor_index < 0) {
+set_layout->binding[b].descriptor_index =
+   set_layout->inline_blocks_descriptor_index =
+   set_layout->size;
+set_layout->size += 1;
+ } else {
+set_layout->binding[b].descriptor_index =

Re: [Mesa-dev] [PATCH] i965/fs: Don't propagate conditional modifiers from integer compares to adds

2018-09-14 Thread Alejandro Piñeiro
No shader-db changes, so perhaps adding a test on
test_fs_cmod_propagation? In any case, the patch looks good to me:

Reviewed-by: Alejandro Piñeiro 


On 14/09/18 00:06, Ian Romanick wrote:
> From: Ian Romanick 
>
> No shader-db changes on any Intel platform... which probably explains
> why no bugs have been bisected to this problem since it landed in Mesa
> 18.1. :( The commit mentioned below is in 18.2, so 18.1 would need a
> slightly different fix (due to code refactoring).
>
> Signed-off-by: Ian Romanick 
> Fixes: 77f269bb560 "i965/fs: Refactor propagation of conditional modifiers 
> from compares to adds"
> Cc: Matt Turner  (reviewed the original patch)
> Cc: Alejandro Piñeiro  (reviewed the original patch)
> ---
>  src/intel/compiler/brw_fs_cmod_propagation.cpp | 10 +-
>  1 file changed, 9 insertions(+), 1 deletion(-)
>
> diff --git a/src/intel/compiler/brw_fs_cmod_propagation.cpp 
> b/src/intel/compiler/brw_fs_cmod_propagation.cpp
> index 5b74f267359..5fb522f810f 100644
> --- a/src/intel/compiler/brw_fs_cmod_propagation.cpp
> +++ b/src/intel/compiler/brw_fs_cmod_propagation.cpp
> @@ -211,9 +211,17 @@ opt_cmod_propagation_local(const gen_device_info 
> *devinfo, bblock_t *block)
>/* A CMP with a second source of zero can match with anything.  A CMP
> * with a second source that is not zero can only match with an ADD
> * instruction.
> +   *
> +   * Only apply this optimization to float-point sources.  It can fail 
> for
> +   * integers.  For inputs a = 0x8000, b = 4, int(0x8000) < 4, 
> but
> +   * int(0x8000) - 4 overflows and results in 0x7ffc.  that's not
> +   * less than zero, so the flags get set differently than for (a < b).
> */
>if (inst->opcode == BRW_OPCODE_CMP && !inst->src[1].is_zero()) {
> - progress = cmod_propagate_cmp_to_add(devinfo, block, inst) || 
> progress;
> + if (brw_reg_type_is_floating_point(inst->src[0].type) &&
> + cmod_propagate_cmp_to_add(devinfo, block, inst))
> +progress = true;
> +
>   continue;
>}
>  

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium/util: don't let children of fork & exec inherit our thread affinity

2018-09-14 Thread Michel Dänzer
On 2018-09-14 10:53 a.m., Michel Dänzer wrote:
> On 2018-09-13 8:56 p.m., Marek Olšák wrote:
> 
>> +* What happens if a driver is unloaded and the app creates a thread?
> 
> I suppose the child process will likely crash, because the memory
> address where util_set_full_cpu_affinity was located will either be
> unmapped or have random other contents?
> 
> At least in theory, there could also be an issue where the application
> might have set its own thread affinity before calling fork, which would
> be clobbered by util_set_full_cpu_affinity in the child process.

Note that these two issues only apply to spawning a child process with
fork, not to spawning a thread in the same process.


BTW, Julien Cristau pointed out on IRC that setting the thread affinity
also seems to cause trouble with Firefox's sandbox. I'm seeing messages
like

 Sandbox: seccomp sandbox violation: pid 3039, tid 3039, syscall 203, args 3105 
128 140733772165072 1 16 140515081189120.

with Firefox 62, but apparently it can even crash with Firefox nightly
at least: https://bugzilla.mozilla.org/1490994


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] anv: descriptors: split allocation function

2018-09-14 Thread Tapani Pälli

did not spot any behavioral change, LGTM

Reviewed-by: Tapani Pälli 

On 11.09.2018 23:22, Lionel Landwerlin wrote:

The following commits will make the allocation more complicated so
split the free list allocation logic out.

Signed-off-by: Lionel Landwerlin 
---
  src/intel/vulkan/anv_descriptor_set.c | 68 +++
  src/intel/vulkan/anv_private.h|  5 +-
  2 files changed, 42 insertions(+), 31 deletions(-)

diff --git a/src/intel/vulkan/anv_descriptor_set.c 
b/src/intel/vulkan/anv_descriptor_set.c
index 2bd1d86f4d4..3439f828900 100644
--- a/src/intel/vulkan/anv_descriptor_set.c
+++ b/src/intel/vulkan/anv_descriptor_set.c
@@ -432,8 +432,8 @@ VkResult anv_CreateDescriptorPool(
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
  
 pool->size = pool_size;

-   pool->next = 0;
-   pool->free_list = EMPTY;
+   pool->next_set = 0;
+   pool->free_set_list = EMPTY;
  
 anv_state_stream_init(>surface_state_stream,

   >surface_state_pool, 4096);
@@ -467,8 +467,8 @@ VkResult anv_ResetDescriptorPool(
 ANV_FROM_HANDLE(anv_device, device, _device);
 ANV_FROM_HANDLE(anv_descriptor_pool, pool, descriptorPool);
  
-   pool->next = 0;

-   pool->free_list = EMPTY;
+   pool->next_set = 0;
+   pool->free_set_list = EMPTY;
 anv_state_stream_finish(>surface_state_stream);
 anv_state_stream_init(>surface_state_stream,
   >surface_state_pool, 4096);
@@ -496,35 +496,45 @@ struct surface_state_free_list_entry {
 struct anv_state state;
  };
  
+static struct anv_descriptor_set *

+anv_descriptor_alloc(struct anv_descriptor_pool *pool,
+ struct anv_descriptor_set_layout *layout,
+ size_t size)
+{
+   struct anv_descriptor_set *set = NULL;
+
+   if (size <= pool->size - pool->next_set) {
+  set = (struct anv_descriptor_set *) (pool->data + pool->next_set);
+  pool->next_set += size;
+  return set;
+   }
+
+   struct pool_free_list_entry *entry;
+   uint32_t *link = >free_set_list;
+   for (uint32_t f = pool->free_set_list; f != EMPTY; f = entry->next) {
+  entry = (struct pool_free_list_entry *) (pool->data + f);
+  if (size <= entry->size) {
+ uint32_t next = entry->next;
+ set = (struct anv_descriptor_set *) entry;
+ *link = next;
+ return set;
+  }
+  link = >next;
+   }
+
+   return NULL;
+}
+
  VkResult
  anv_descriptor_set_create(struct anv_device *device,
struct anv_descriptor_pool *pool,
struct anv_descriptor_set_layout *layout,
struct anv_descriptor_set **out_set)
  {
-   struct anv_descriptor_set *set;
 const size_t size = anv_descriptor_set_layout_size(layout);
-
-   set = NULL;
-   if (size <= pool->size - pool->next) {
-  set = (struct anv_descriptor_set *) (pool->data + pool->next);
-  pool->next += size;
-   } else {
-  struct pool_free_list_entry *entry;
-  uint32_t *link = >free_list;
-  for (uint32_t f = pool->free_list; f != EMPTY; f = entry->next) {
- entry = (struct pool_free_list_entry *) (pool->data + f);
- if (size <= entry->size) {
-*link = entry->next;
-set = (struct anv_descriptor_set *) entry;
-break;
- }
- link = >next;
-  }
-   }
-
+   struct anv_descriptor_set *set = anv_descriptor_alloc(pool, layout, size);
 if (set == NULL) {
-  if (pool->free_list != EMPTY) {
+  if (pool->free_set_list != EMPTY) {
   return vk_error(VK_ERROR_FRAGMENTED_POOL);
} else {
   return vk_error(VK_ERROR_OUT_OF_POOL_MEMORY);
@@ -603,13 +613,13 @@ anv_descriptor_set_destroy(struct anv_device *device,
  
 /* Put the descriptor set allocation back on the free list. */

 const uint32_t index = (char *) set - pool->data;
-   if (index + set->size == pool->next) {
-  pool->next = index;
+   if (index + set->size == pool->next_set) {
+  pool->next_set = index;
 } else {
struct pool_free_list_entry *entry = (struct pool_free_list_entry *) 
set;
-  entry->next = pool->free_list;
+  entry->next = pool->free_set_list;
entry->size = set->size;
-  pool->free_list = (char *) entry - pool->data;
+  pool->free_set_list = (char *) entry - pool->data;
 }
  }
  
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h

index d15a91dd014..372b7c69635 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1500,8 +1500,9 @@ struct anv_push_descriptor_set {
  
  struct anv_descriptor_pool {

 uint32_t size;
-   uint32_t next;
-   uint32_t free_list;
+
+   uint32_t free_set_list;
+   uint32_t next_set;
  
 struct anv_state_stream surface_state_stream;

 void *surface_state_free_list;


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org

Re: [Mesa-dev] [PATCH] radv: implement VK_EXT_conservative_rasterization

2018-09-14 Thread Samuel Pitoiset



On 9/14/18 11:10 AM, Bas Nieuwenhuizen wrote:

On Fri, Sep 14, 2018 at 10:31 AM Samuel Pitoiset
 wrote:




On 9/12/18 11:19 PM, Bas Nieuwenhuizen wrote:

On Wed, Sep 12, 2018 at 10:44 PM Samuel Pitoiset
 wrote:


Only supported by GFX9+.

The conservativeraster Sascha demo seems to work as expected.

Signed-off-by: Samuel Pitoiset 
---
   src/amd/vulkan/radv_device.c  | 14 +
   src/amd/vulkan/radv_extensions.py |  1 +
   src/amd/vulkan/radv_pipeline.c| 48 ++-
   3 files changed, 62 insertions(+), 1 deletion(-)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 7917ed7ffe..60776a863e 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -1152,6 +1152,20 @@ void radv_GetPhysicalDeviceProperties2(
  properties->protectedNoFault = false;
  break;
  }
+   case 
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT: {
+   VkPhysicalDeviceConservativeRasterizationPropertiesEXT 
*properties =
+   
(VkPhysicalDeviceConservativeRasterizationPropertiesEXT *)ext;
+   properties->primitiveOverestimationSize = 0;
+   properties->maxExtraPrimitiveOverestimationSize = 0;
+   properties->extraPrimitiveOverestimationSizeGranularity 
= 0;
+   properties->primitiveUnderestimation = VK_FALSE;
+   properties->conservativePointAndLineRasterization = 
VK_FALSE;
+   properties->degenerateTrianglesRasterized = VK_FALSE;
+   properties->degenerateLinesRasterized = VK_FALSE;
+   properties->fullyCoveredFragmentShaderInputVariable = 
VK_FALSE;
+   properties->conservativeRasterizationPostDepthCoverage 
= VK_FALSE;
+   break;
+   }
  default:
  break;
  }
diff --git a/src/amd/vulkan/radv_extensions.py 
b/src/amd/vulkan/radv_extensions.py
index fa35aabd3b..584926df39 100644
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -93,6 +93,7 @@ EXTENSIONS = [
   Extension('VK_EXT_direct_mode_display',   1, 
'VK_USE_PLATFORM_DISPLAY_KHR'),
   Extension('VK_EXT_acquire_xlib_display',  1, 
'VK_USE_PLATFORM_XLIB_XRANDR_EXT'),
   Extension('VK_EXT_conditional_rendering', 1, True),
+Extension('VK_EXT_conservative_rasterization',1, 
'device->rad_info.chip_class >= GFX9'),
   Extension('VK_EXT_display_surface_counter',   1, 
'VK_USE_PLATFORM_DISPLAY_KHR'),
   Extension('VK_EXT_display_control',   1, 
'VK_USE_PLATFORM_DISPLAY_KHR'),
   Extension('VK_EXT_debug_report',  9, True),
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index ae269c32c4..c54949ed42 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -2686,12 +2686,25 @@ radv_pipeline_generate_blend_state(struct radeon_cmdbuf 
*cs,
  pipeline->graphics.cb_target_mask = blend->cb_target_mask;
   }

+static const VkConservativeRasterizationModeEXT
+radv_get_conservative_raster_mode(const VkPipelineRasterizationStateCreateInfo 
*pCreateInfo)
+{
+   const VkPipelineRasterizationConservativeStateCreateInfoEXT 
*conservative_raster =
+   vk_find_struct_const(pCreateInfo->pNext, 
PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT);
+
+   if (!conservative_raster)
+   return VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT;
+   return conservative_raster->conservativeRasterizationMode;
+}

   static void
   radv_pipeline_generate_raster_state(struct radeon_cmdbuf *cs,
+   struct radv_pipeline *pipeline,
   const VkGraphicsPipelineCreateInfo 
*pCreateInfo)
   {
  const VkPipelineRasterizationStateCreateInfo *vkraster = 
pCreateInfo->pRasterizationState;
+   const VkConservativeRasterizationModeEXT mode =
+   radv_get_conservative_raster_mode(vkraster);

  radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
 S_028810_PS_UCP_MODE(3) |
@@ -2725,6 +2738,39 @@ radv_pipeline_generate_raster_state(struct radeon_cmdbuf 
*cs,
 
S_028814_POLY_OFFSET_FRONT_ENABLE(vkraster->depthBiasEnable ? 1 : 0) |
 
S_028814_POLY_OFFSET_BACK_ENABLE(vkraster->depthBiasEnable ? 1 : 0) |
 
S_028814_POLY_OFFSET_PARA_ENABLE(vkraster->depthBiasEnable ? 1 : 0));
+
+   /* Conservative rasterization. */
+   if (mode != VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT) {
+   struct radv_multisample_state *ms = >graphics.ms;
+   

[Mesa-dev] [PATCH 2/2 (resend)] virgl: Pass resource size and transfer offsets

2018-09-14 Thread Gert Wollny
From: Tomeu Vizoso 

Pass the size of a resource when creating it so a backing can be kept in
the other side.

Also pass the required offset to transfer commands.

This moves vtest closer to how virtio-gpu works, making it more useful
for testing.

v2: - Use new messages for creation and transfers, as changing the
  behavior of the existing messages would be messy given that we don't
  want to break compatibility with older servers.

v3: - Gert: Use correct strides: The resource corresponding to the output
  display might have a differnt line stride then the IOVs, so when 
  reading back to this resource take the resource stride and the the 
  IOV stride into account.

Signed-off-by: Tomeu Vizoso  (v2)
Signed-off-by: Gert Wollny 
---
 .../winsys/virgl/vtest/virgl_vtest_socket.c| 143 +++--
 .../winsys/virgl/vtest/virgl_vtest_winsys.c|  38 --
 .../winsys/virgl/vtest/virgl_vtest_winsys.h|  19 ++-
 src/gallium/winsys/virgl/vtest/vtest_protocol.h|  29 +
 4 files changed, 201 insertions(+), 28 deletions(-)

diff --git a/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c 
b/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c
index 4d20a63ad6..3aa01aabdf 100644
--- a/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c
+++ b/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c
@@ -221,6 +221,42 @@ int virgl_vtest_send_get_caps(struct virgl_vtest_winsys 
*vws,
return 0;
 }
 
+static int virgl_vtest_send_resource_create2(struct virgl_vtest_winsys *vws,
+ uint32_t handle,
+ enum pipe_texture_target target,
+ uint32_t format,
+ uint32_t bind,
+ uint32_t width,
+ uint32_t height,
+ uint32_t depth,
+ uint32_t array_size,
+ uint32_t last_level,
+ uint32_t nr_samples,
+ uint32_t size)
+{
+   uint32_t res_create_buf[VCMD_RES_CREATE2_SIZE], vtest_hdr[VTEST_HDR_SIZE];
+
+   vtest_hdr[VTEST_CMD_LEN] = VCMD_RES_CREATE2_SIZE;
+   vtest_hdr[VTEST_CMD_ID] = VCMD_RESOURCE_CREATE2;
+
+   res_create_buf[VCMD_RES_CREATE2_RES_HANDLE] = handle;
+   res_create_buf[VCMD_RES_CREATE2_TARGET] = target;
+   res_create_buf[VCMD_RES_CREATE2_FORMAT] = format;
+   res_create_buf[VCMD_RES_CREATE2_BIND] = bind;
+   res_create_buf[VCMD_RES_CREATE2_WIDTH] = width;
+   res_create_buf[VCMD_RES_CREATE2_HEIGHT] = height;
+   res_create_buf[VCMD_RES_CREATE2_DEPTH] = depth;
+   res_create_buf[VCMD_RES_CREATE2_ARRAY_SIZE] = array_size;
+   res_create_buf[VCMD_RES_CREATE2_LAST_LEVEL] = last_level;
+   res_create_buf[VCMD_RES_CREATE2_NR_SAMPLES] = nr_samples;
+   res_create_buf[VCMD_RES_CREATE2_DATA_SIZE] = size;
+
+   virgl_block_write(vws->sock_fd, _hdr, sizeof(vtest_hdr));
+   virgl_block_write(vws->sock_fd, _create_buf, sizeof(res_create_buf));
+
+   return 0;
+}
+
 int virgl_vtest_send_resource_create(struct virgl_vtest_winsys *vws,
  uint32_t handle,
  enum pipe_texture_target target,
@@ -231,10 +267,17 @@ int virgl_vtest_send_resource_create(struct 
virgl_vtest_winsys *vws,
  uint32_t depth,
  uint32_t array_size,
  uint32_t last_level,
- uint32_t nr_samples)
+ uint32_t nr_samples,
+ uint32_t size)
 {
uint32_t res_create_buf[VCMD_RES_CREATE_SIZE], vtest_hdr[VTEST_HDR_SIZE];
 
+   if (vws->protocol_version >= 1)
+  return virgl_vtest_send_resource_create2(vws, handle, target, format,
+   bind, width, height, depth,
+   array_size, last_level,
+   nr_samples, size);
+
vtest_hdr[VTEST_CMD_LEN] = VCMD_RES_CREATE_SIZE;
vtest_hdr[VTEST_CMD_ID] = VCMD_RESOURCE_CREATE;
 
@@ -282,7 +325,7 @@ int virgl_vtest_send_resource_unref(struct 
virgl_vtest_winsys *vws,
return 0;
 }
 
-int virgl_vtest_send_transfer_cmd(struct virgl_vtest_winsys *vws,
+static int virgl_vtest_send_transfer_cmd(struct virgl_vtest_winsys *vws,
   uint32_t vcmd,
   uint32_t handle,
   uint32_t level, uint32_t stride,
@@ -315,6 +358,73 @@ int virgl_vtest_send_transfer_cmd(struct 
virgl_vtest_winsys *vws,
return 0;
 }
 
+static int virgl_vtest_send_transfer_cmd2(struct virgl_vtest_winsys *vws,
+  

[Mesa-dev] [PATCH 1/2 (resend)] virgl: Negotiate version with vtest server

2018-09-14 Thread Gert Wollny
From: Tomeu Vizoso 

Check if server supports version negotation by sending a PING_PROTOCOL_VERSION
message right before a dummy RESOURCE_BUSY_WAIT. If we don't get a reply
for the first, we know the server doesn't support it.

If it does support it, we can query the max protocol version supported
by the server and fall back if needed.


v2: - Send a new message to negotiate the protocol version, checking if
  the server supports this message by immediately sending a busy wait
  message. (Dave Airlie)

v3: - Send a zero-arg command PING_PROTOCOL_VERSION so we actually keep
  compatibility with older servers. (Code by Dave Airlie)

Signed-off-by: Tomeu Vizoso 
Signed-off-by: Gert Wollny 

---
Resending the series because it didn't make it into patchwork 

 .../winsys/virgl/vtest/virgl_vtest_socket.c| 52 ++
 .../winsys/virgl/vtest/virgl_vtest_winsys.h|  2 +
 src/gallium/winsys/virgl/vtest/vtest_protocol.h| 10 +
 3 files changed, 64 insertions(+)

diff --git a/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c 
b/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c
index d25f9a3bd9..4d20a63ad6 100644
--- a/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c
+++ b/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c
@@ -101,6 +101,57 @@ static int virgl_vtest_send_init(struct virgl_vtest_winsys 
*vws)
return 0;
 }
 
+static int virgl_vtest_negotiate_version(struct virgl_vtest_winsys *vws)
+{
+   uint32_t vtest_hdr[VTEST_HDR_SIZE];
+   uint32_t version_buf[VCMD_PROTOCOL_VERSION_SIZE];
+   uint32_t busy_wait_buf[VCMD_BUSY_WAIT_SIZE];
+   uint32_t busy_wait_result[1];
+   int ret;
+
+   vtest_hdr[VTEST_CMD_LEN] = VCMD_PING_PROTOCOL_VERSION_SIZE;
+   vtest_hdr[VTEST_CMD_ID] = VCMD_PING_PROTOCOL_VERSION;
+   virgl_block_write(vws->sock_fd, _hdr, sizeof(vtest_hdr));
+
+   vtest_hdr[VTEST_CMD_LEN] = VCMD_BUSY_WAIT_SIZE;
+   vtest_hdr[VTEST_CMD_ID] = VCMD_RESOURCE_BUSY_WAIT;
+   busy_wait_buf[VCMD_BUSY_WAIT_HANDLE] = 0;
+   busy_wait_buf[VCMD_BUSY_WAIT_FLAGS] = 0;
+   virgl_block_write(vws->sock_fd, _hdr, sizeof(vtest_hdr));
+   virgl_block_write(vws->sock_fd, _wait_buf, sizeof(busy_wait_buf));
+
+   ret = virgl_block_read(vws->sock_fd, vtest_hdr, sizeof(vtest_hdr));
+   assert(ret);
+
+   if (vtest_hdr[VTEST_CMD_ID] == VCMD_PING_PROTOCOL_VERSION) {
+ /* Read dummy busy_wait response */
+ ret = virgl_block_read(vws->sock_fd, vtest_hdr, sizeof(vtest_hdr));
+ assert(ret);
+ ret = virgl_block_read(vws->sock_fd, busy_wait_result, 
sizeof(busy_wait_result));
+ assert(ret);
+
+ vtest_hdr[VTEST_CMD_LEN] = VCMD_PROTOCOL_VERSION_SIZE;
+ vtest_hdr[VTEST_CMD_ID] = VCMD_PROTOCOL_VERSION;
+ version_buf[VCMD_PROTOCOL_VERSION_VERSION] = VTEST_PROTOCOL_VERSION;
+ virgl_block_write(vws->sock_fd, _hdr, sizeof(vtest_hdr));
+ virgl_block_write(vws->sock_fd, _buf, sizeof(version_buf));
+
+ ret = virgl_block_read(vws->sock_fd, vtest_hdr, sizeof(vtest_hdr));
+ assert(ret);
+ ret = virgl_block_read(vws->sock_fd, version_buf, sizeof(version_buf));
+ assert(ret);
+ return version_buf[VCMD_PROTOCOL_VERSION_VERSION];
+   }
+
+   /* Read dummy busy_wait response */
+   assert(vtest_hdr[VTEST_CMD_ID] == VCMD_RESOURCE_BUSY_WAIT);
+   ret = virgl_block_read(vws->sock_fd, busy_wait_result, 
sizeof(busy_wait_result));
+   assert(ret);
+
+   /* Old server, return version 0 */
+   return 0;
+}
+
 int virgl_vtest_connect(struct virgl_vtest_winsys *vws)
 {
struct sockaddr_un un;
@@ -123,6 +174,7 @@ int virgl_vtest_connect(struct virgl_vtest_winsys *vws)
 
vws->sock_fd = sock;
virgl_vtest_send_init(vws);
+   vws->protocol_version = virgl_vtest_negotiate_version(vws);
return 0;
 }
 
diff --git a/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.h 
b/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.h
index 031037b6b5..3628c74644 100644
--- a/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.h
+++ b/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.h
@@ -49,6 +49,8 @@ struct virgl_vtest_winsys {
int num_delayed;
unsigned usecs;
mtx_t mutex;
+
+   unsigned protocol_version;
 };
 
 struct virgl_hw_res {
diff --git a/src/gallium/winsys/virgl/vtest/vtest_protocol.h 
b/src/gallium/winsys/virgl/vtest/vtest_protocol.h
index 95bd8c1d0b..8eb904e73f 100644
--- a/src/gallium/winsys/virgl/vtest/vtest_protocol.h
+++ b/src/gallium/winsys/virgl/vtest/vtest_protocol.h
@@ -24,6 +24,7 @@
 #define VTEST_PROTOCOL
 
 #define VTEST_DEFAULT_SOCKET_NAME "/tmp/.virgl_test"
+#define VTEST_PROTOCOL_VERSION 1
 
 /* 32-bit length field */
 /* 32-bit cmd field */
@@ -53,6 +54,10 @@
 /* 0 length cmd */
 /* resp VCMD_GET_CAPS + caps */
 
+#define VCMD_PING_PROTOCOL_VERSION 10
+
+#define VCMD_PROTOCOL_VERSION 11
+
 #define VCMD_RES_CREATE_SIZE 10
 #define VCMD_RES_CREATE_RES_HANDLE 0
 #define VCMD_RES_CREATE_TARGET 1
@@ -87,4 +92,9 @@
 #define VCMD_BUSY_WAIT_HANDLE 0
 #define VCMD_BUSY_WAIT_FLAGS 1
 
+#define VCMD_PING_PROTOCOL_VERSION_SIZE 1
+

Re: [Mesa-dev] [PATCH 2/2] virgl: Pass resource size and transfer offsets

2018-09-14 Thread Gert Wollny
Hello Dave, 

Am Freitag, den 14.09.2018, 13:25 +1000 schrieb Dave Airlie:
[...]

> This causes regressions in a bunch of piglits on skylake for me.
> 
> arb_copy_image-formats
> and some
> ./bin/fbo-generatemipmap-formats GL_EXT_texture_sRGB-s3tc
> 
> have some different results after this.

Comparing between mesa-master and v3 of the patch I get exactly the
same results on kabylake for "piglit run gpu -t formats" that include
these tests, or did you mean different results w.r.t. v2 of the patch?

In this case I get simply more failures. Funny enough v2 has the same
additional failures like when running though qemu, so there must be a
similar problem with strides on the virgl-drm mesa side.

I was testing with mesa master 1a263b377c plus these patches.  

HTH,
Gert 

PS: I'll be away for the next week


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   >