[Mesa-dev] [PATCH 07/11] freedreno/ir3: Use the separated dead write vars pass
No changes to shader-db expected. --- src/gallium/drivers/freedreno/ir3/ir3_nir.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_nir.c index db1d74fdee7..d5f42f2a231 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_nir.c @@ -94,6 +94,7 @@ ir3_optimize_loop(nir_shader *s) OPT_V(s, nir_lower_vars_to_ssa); progress |= OPT(s, nir_opt_copy_prop_vars); + progress |= OPT(s, nir_opt_dead_write_vars); progress |= OPT(s, nir_lower_alu_to_scalar); progress |= OPT(s, nir_lower_phis_to_scalar); -- 2.19.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 09/11] nir: Add tests for copy propagation of derefs
Also tests for removal of redundant loads, that we currently handle as part of the copy propagation. --- src/compiler/nir/tests/vars_tests.cpp | 300 ++ 1 file changed, 300 insertions(+) diff --git a/src/compiler/nir/tests/vars_tests.cpp b/src/compiler/nir/tests/vars_tests.cpp index cdd2a17fe92..b1fa04b5cb9 100644 --- a/src/compiler/nir/tests/vars_tests.cpp +++ b/src/compiler/nir/tests/vars_tests.cpp @@ -140,11 +140,131 @@ nir_imm_ivec2(nir_builder *build, int x, int y) } /* Allow grouping the tests while still sharing the helpers. */ +class nir_redundant_load_vars_test : public nir_vars_test {}; class nir_copy_prop_vars_test : public nir_vars_test {}; class nir_dead_write_vars_test : public nir_vars_test {}; } // namespace +TEST_F(nir_redundant_load_vars_test, duplicated_load) +{ + /* Load a variable twice in the same block. One should be removed. */ + + nir_variable *in = create_int(nir_var_shader_in, "in"); + nir_variable **out = create_many_int(nir_var_shader_out, "out", 2); + + nir_store_var(b, out[0], nir_load_var(b, in), 1); + nir_store_var(b, out[1], nir_load_var(b, in), 1); + + nir_validate_shader(b->shader); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2); + + bool progress = nir_opt_copy_prop_vars(b->shader); + EXPECT_TRUE(progress); + + nir_validate_shader(b->shader); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1); +} + +TEST_F(nir_redundant_load_vars_test, DISABLED_duplicated_load_in_two_blocks) +{ + /* Load a variable twice in different blocks. One should be removed. */ + + nir_variable *in = create_int(nir_var_shader_in, "in"); + nir_variable **out = create_many_int(nir_var_shader_out, "out", 2); + + nir_store_var(b, out[0], nir_load_var(b, in), 1); + + /* Forces the stores to be in different blocks. */ + nir_pop_if(b, nir_push_if(b, nir_imm_int(b, 0))); + + nir_store_var(b, out[1], nir_load_var(b, in), 1); + + nir_validate_shader(b->shader); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2); + + bool progress = nir_opt_copy_prop_vars(b->shader); + EXPECT_TRUE(progress); + + nir_validate_shader(b->shader); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1); +} + +TEST_F(nir_redundant_load_vars_test, DISABLED_invalidate_inside_if_block) +{ + /* Load variables, then write to some of then in different branches of the +* if statement. They should be invalidated accordingly. +*/ + + nir_variable **g = create_many_int(nir_var_global, "g", 3); + nir_variable **out = create_many_int(nir_var_shader_out, "out", 3); + + nir_load_var(b, g[0]); + nir_load_var(b, g[1]); + nir_load_var(b, g[2]); + + nir_if *if_stmt = nir_push_if(b, nir_imm_int(b, 0)); + nir_store_var(b, g[0], nir_imm_int(b, 10), 1); + + nir_push_else(b, if_stmt); + nir_store_var(b, g[1], nir_imm_int(b, 20), 1); + + nir_pop_if(b, if_stmt); + + nir_store_var(b, out[0], nir_load_var(b, g[0]), 1); + nir_store_var(b, out[1], nir_load_var(b, g[1]), 1); + nir_store_var(b, out[2], nir_load_var(b, g[2]), 1); + + nir_validate_shader(b->shader); + + bool progress = nir_opt_copy_prop_vars(b->shader); + EXPECT_TRUE(progress); + + /* There are 3 initial loads, plus 2 loads for the values invalidated +* inside the if statement. +*/ + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 5); + + /* We only load g[2] once. */ + unsigned g2_load_count = 0; + nir_intrinsic_instr *load = NULL; + for (int i = 0; i < 5; i++) { + load = find_next_intrinsic(nir_intrinsic_load_deref, load); + if (nir_intrinsic_get_var(load, 0) == g[2]) + g2_load_count++; + } + EXPECT_EQ(g2_load_count, 1); +} + +TEST_F(nir_redundant_load_vars_test, invalidate_live_load_in_the_end_of_loop) +{ + /* Invalidating a load in the end of loop body will apply to the whole loop +* body. +*/ + + nir_variable *v = create_int(nir_var_shader_storage, "v"); + + nir_load_var(b, v); + + nir_loop *loop = nir_push_loop(b); + + nir_if *if_stmt = nir_push_if(b, nir_imm_int(b, 0)); + nir_jump(b, nir_jump_break); + nir_pop_if(b, if_stmt); + + nir_load_var(b, v); + nir_store_var(b, v, nir_imm_int(b, 10), 1); + + nir_pop_loop(b, loop); + + bool progress = nir_opt_copy_prop_vars(b->shader); + ASSERT_FALSE(progress); +} + TEST_F(nir_copy_prop_vars_test, simple_copies) { nir_variable *in = create_int(nir_var_shader_in, "in"); @@ -199,6 +319,186 @@ TEST_F(nir_copy_prop_vars_test, simple_store_load) } } +TEST_F(nir_copy_prop_vars_test, store_store_load) +{ + nir_variable **v = create_many_ivec2(nir_var_local, "v", 2); + unsigned mask = 1 | 2; + + nir_ssa_def *first_value = nir_imm_ivec2(b, 10, 20); + nir_store_var(b, v[0], first_value, mask); + + nir_ssa_def *second_value = nir_imm_ivec2(b, 30, 40); + nir_store_var(b, v[0], second_value, mask); + + nir_ssa_def *read_value = nir_load_var(b, v[0]); + nir_store_var(b, v[1],
[Mesa-dev] [PATCH 08/11] nir: Remove handling of dead writes from copy_prop_vars
These are covered by another pass now. --- src/compiler/nir/nir_opt_copy_prop_vars.c | 84 +++ 1 file changed, 8 insertions(+), 76 deletions(-) diff --git a/src/compiler/nir/nir_opt_copy_prop_vars.c b/src/compiler/nir/nir_opt_copy_prop_vars.c index 9fecaf0eeec..5276aa176d8 100644 --- a/src/compiler/nir/nir_opt_copy_prop_vars.c +++ b/src/compiler/nir/nir_opt_copy_prop_vars.c @@ -38,10 +38,7 @@ * 1) Copy-propagation on variables that have indirect access. This includes * propagating from indirect stores into indirect loads. * - * 2) Dead code elimination of store_var and copy_var intrinsics based on - * killed destination values. - * - * 3) Removal of redundant load_deref intrinsics. We can't trust regular CSE + * 2) Removal of redundant load_deref intrinsics. We can't trust regular CSE * to do this because it isn't aware of variable writes that may alias the * value and make the former load invalid. * @@ -51,6 +48,8 @@ * rapidly get out of hand. Fortunately, for anything that is only ever * accessed directly, we get SSA based copy-propagation which is extremely * powerful so this isn't that great a loss. + * + * Removal of dead writes to variables is handled by another pass. */ struct value { @@ -64,9 +63,6 @@ struct value { struct copy_entry { struct list_head link; - nir_instr *store_instr[4]; - - unsigned comps_may_be_read; struct value src; nir_deref_instr *dst; @@ -114,44 +110,6 @@ copy_entry_remove(struct copy_prop_var_state *state, struct copy_entry *entry) list_add(>link, >copy_free_list); } -static void -remove_dead_writes(struct copy_prop_var_state *state, - struct copy_entry *entry, unsigned write_mask) -{ - /* We're overwriting another entry. Some of it's components may not -* have been read yet and, if that's the case, we may be able to delete -* some instructions but we have to be careful. -*/ - unsigned dead_comps = write_mask & ~entry->comps_may_be_read; - - for (unsigned mask = dead_comps; mask;) { - unsigned i = u_bit_scan(); - - nir_instr *instr = entry->store_instr[i]; - - /* We may have already deleted it on a previous iteration */ - if (!instr) - continue; - - /* See if this instr is used anywhere that it's not dead */ - bool keep = false; - for (unsigned j = 0; j < 4; j++) { - if (entry->store_instr[j] == instr) { -if (dead_comps & (1 << j)) { - entry->store_instr[j] = NULL; -} else { - keep = true; -} - } - } - - if (!keep) { - nir_instr_remove(instr); - state->progress = true; - } - } -} - static struct copy_entry * lookup_entry_for_deref(struct copy_prop_var_state *state, nir_deref_instr *deref, @@ -165,16 +123,6 @@ lookup_entry_for_deref(struct copy_prop_var_state *state, return NULL; } -static void -mark_aliased_entries_as_read(struct copy_prop_var_state *state, - nir_deref_instr *deref, unsigned components) -{ - list_for_each_entry(struct copy_entry, iter, >copies, link) { - if (nir_compare_derefs(iter->dst, deref) & nir_derefs_may_alias_bit) - iter->comps_may_be_read |= components; - } -} - static struct copy_entry * get_entry_and_kill_aliases(struct copy_prop_var_state *state, nir_deref_instr *deref, @@ -191,11 +139,6 @@ get_entry_and_kill_aliases(struct copy_prop_var_state *state, } nir_deref_compare_result comp = nir_compare_derefs(iter->dst, deref); - /* This is a store operation. If we completely overwrite some value, we - * want to delete any dead writes that may be present. - */ - if (comp & nir_derefs_b_contains_a_bit) - remove_dead_writes(state, iter, write_mask); if (comp & nir_derefs_equal_bit) { assert(entry == NULL); @@ -228,25 +171,19 @@ apply_barrier_for_modes(struct copy_prop_var_state *state, static void store_to_entry(struct copy_prop_var_state *state, struct copy_entry *entry, - const struct value *value, unsigned write_mask, - nir_instr *store_instr) + const struct value *value, unsigned write_mask) { - entry->comps_may_be_read &= ~write_mask; if (value->is_ssa) { entry->src.is_ssa = true; /* Only overwrite the written components */ for (unsigned i = 0; i < 4; i++) { - if (write_mask & (1 << i)) { -entry->store_instr[i] = store_instr; + if (write_mask & (1 << i)) entry->src.ssa[i] = value->ssa[i]; - } } } else { /* Non-ssa stores always write everything */ entry->src.is_ssa = false; entry->src.deref = value->deref; - for (unsigned i = 0; i < 4; i++) - entry->store_instr[i] = store_instr; } } @@ -490,9 +427,6 @@
[Mesa-dev] [PATCH 11/11] nir: Copy propagation between blocks
Extend the pass to propagate the copies information along the control flow graph. It performs two walks, first it collects the vars that were written inside each node. Then it walks applying the copy propagation using a list of copies previously available. At each node the list is invalidated according to results from the first walk. This approach is simpler than a full data-flow analysis, but covers various cases. If derefs are used for operating on more memory resources (e.g. SSBOs), the difference from a regular pass is expected to be more visible -- as the SSA copy propagation pass won't apply to those. A full data-flow analysis would handle more scenarios: conditional breaks in the control flow and merge equivalent effects from multiple branches (e.g. using a phi node to merge the source for writes to the same deref). However, as previous commentary in the code stated, its complexity 'rapidly get out of hand'. The current patch is a good intermediate step towards more complex analysis. The 'copies' linked list was modified to use util_dynarray to make it more convenient to clone it (to handle ifs/loops). Annotated shader-db results for Skylake: total instructions in shared programs: 15105796 -> 15105451 (<.01%) instructions in affected programs: 152293 -> 151948 (-0.23%) helped: 96 HURT: 17 All the HURTs and many HELPs are one instruction. Looking at pass by pass outputs, the copy prop kicks in removing a bunch of loads correctly, which ends up altering what other other optimizations kick. In those cases the copies would be propagated after lowering to SSA. In few HELPs we are actually helping doing more than was possible previously, e.g. consolidating load_uniforms from different blocks. Most of those are from shaders/dolphin/ubershaders/. total cycles in shared programs: 566048861 -> 565954876 (-0.02%) cycles in affected programs: 151461830 -> 151367845 (-0.06%) helped: 2933 HURT: 2950 A lot of noise on both sides. total loops in shared programs: 4603 -> 4603 (0.00%) loops in affected programs: 0 -> 0 helped: 0 HURT: 0 total spills in shared programs: 11085 -> 11073 (-0.11%) spills in affected programs: 23 -> 11 (-52.17%) helped: 1 HURT: 0 The shaders/dolphin/ubershaders/12.shader_test was able to pull a couple of loads from inside if statements and reuse them. total fills in shared programs: 23143 -> 23089 (-0.23%) fills in affected programs: 2718 -> 2664 (-1.99%) helped: 27 HURT: 0 All from shaders/dolphin/ubershaders/. LOST: 0 GAINED: 0 The other generations follow the same overall shape. The spills and fills HURTs are all from the same game. shader-db results for Broadwell. total instructions in shared programs: 15402037 -> 15401841 (<.01%) instructions in affected programs: 144386 -> 144190 (-0.14%) helped: 86 HURT: 9 total cycles in shared programs: 600912755 -> 600902486 (<.01%) cycles in affected programs: 185662820 -> 185652551 (<.01%) helped: 2598 HURT: 3053 total loops in shared programs: 4579 -> 4579 (0.00%) loops in affected programs: 0 -> 0 helped: 0 HURT: 0 total spills in shared programs: 80929 -> 80924 (<.01%) spills in affected programs: 720 -> 715 (-0.69%) helped: 1 HURT: 5 total fills in shared programs: 93057 -> 93013 (-0.05%) fills in affected programs: 3398 -> 3354 (-1.29%) helped: 27 HURT: 5 LOST: 0 GAINED: 2 shader-db results for Haswell: total instructions in shared programs: 9231975 -> 9230357 (-0.02%) instructions in affected programs: 44992 -> 43374 (-3.60%) helped: 27 HURT: 69 total cycles in shared programs: 87760587 -> 87727502 (-0.04%) cycles in affected programs: 7720673 -> 7687588 (-0.43%) helped: 1609 HURT: 1416 total loops in shared programs: 1830 -> 1830 (0.00%) loops in affected programs: 0 -> 0 helped: 0 HURT: 0 total spills in shared programs: 1988 -> 1692 (-14.89%) spills in affected programs: 296 -> 0 helped: 1 HURT: 0 total fills in shared programs: 2103 -> 1668 (-20.68%) fills in affected programs: 438 -> 3 (-99.32%) helped: 4 HURT: 0 LOST: 0 GAINED: 1 --- src/compiler/nir/nir_opt_copy_prop_vars.c | 394 +- 1 file changed, 317 insertions(+), 77 deletions(-) diff --git a/src/compiler/nir/nir_opt_copy_prop_vars.c b/src/compiler/nir/nir_opt_copy_prop_vars.c index f58abfbb69f..966ccbdec53 100644 --- a/src/compiler/nir/nir_opt_copy_prop_vars.c +++ b/src/compiler/nir/nir_opt_copy_prop_vars.c @@ -26,6 +26,7 @@ #include "nir_deref.h" #include "util/bitscan.h" +#include "util/u_dynarray.h" /** * Variable-based copy propagation @@ -42,16 +43,21 @@ * to do this because it isn't aware of variable writes that may
[Mesa-dev] [PATCH 06/11] intel/nir: Use the separated dead write vars pass
No changes to shader-db. --- src/intel/compiler/brw_nir.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index b38c3ba383d..77938efae31 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -553,6 +553,7 @@ brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler, OPT(nir_opt_find_array_copies); } OPT(nir_opt_copy_prop_vars); + OPT(nir_opt_dead_write_vars); if (is_scalar) { OPT(nir_lower_alu_to_scalar); -- 2.19.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 05/11] nir: Separate dead write removal into its own pass
Instead of doing this as part of the existing copy_prop_vars pass. Separation makes easier to expand the scope of both passes to be more than per-block. For copy propagation, the information about valid copies comes from previous instructions; while the dead write removal depends on information from later instructions ("have any instruction used this deref before overwrite it?"). Also change the tests to use this pass (instead of copy prop vars). Note that the disabled tests continue to fail, since the standalone pass is still per-block. v2: Remove entries from dynarray instead of marking items as deleted. Use foreach_reverse. (Caio) (all from Jason) Do not cache nir_deref_path. Not worthy for this patch. Clear unused writes when hitting a call instruction. Clean up enumeration of modes for barriers. Move metadata calls to the inner function. --- src/compiler/Makefile.sources | 1 + src/compiler/nir/meson.build | 1 + src/compiler/nir/nir.h | 2 + src/compiler/nir/nir_opt_dead_write_vars.c | 216 + src/compiler/nir/tests/vars_tests.cpp | 3 - 5 files changed, 220 insertions(+), 3 deletions(-) create mode 100644 src/compiler/nir/nir_opt_dead_write_vars.c diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources index d3b06564832..b65bb9b80b9 100644 --- a/src/compiler/Makefile.sources +++ b/src/compiler/Makefile.sources @@ -274,6 +274,7 @@ NIR_FILES = \ nir/nir_opt_cse.c \ nir/nir_opt_dce.c \ nir/nir_opt_dead_cf.c \ + nir/nir_opt_dead_write_vars.c \ nir/nir_opt_find_array_copies.c \ nir/nir_opt_gcm.c \ nir/nir_opt_global_to_local.c \ diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build index 1a7fa2d3327..d8f65640004 100644 --- a/src/compiler/nir/meson.build +++ b/src/compiler/nir/meson.build @@ -158,6 +158,7 @@ files_libnir = files( 'nir_opt_cse.c', 'nir_opt_dce.c', 'nir_opt_dead_cf.c', + 'nir_opt_dead_write_vars.c', 'nir_opt_find_array_copies.c', 'nir_opt_gcm.c', 'nir_opt_global_to_local.c', diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 599f469a714..80d145cac1e 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -3030,6 +3030,8 @@ bool nir_opt_dce(nir_shader *shader); bool nir_opt_dead_cf(nir_shader *shader); +bool nir_opt_dead_write_vars(nir_shader *shader); + bool nir_opt_find_array_copies(nir_shader *shader); bool nir_opt_gcm(nir_shader *shader, bool value_number); diff --git a/src/compiler/nir/nir_opt_dead_write_vars.c b/src/compiler/nir/nir_opt_dead_write_vars.c new file mode 100644 index 000..5a3145875cb --- /dev/null +++ b/src/compiler/nir/nir_opt_dead_write_vars.c @@ -0,0 +1,216 @@ +/* + * Copyright © 2018 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir.h" +#include "nir_builder.h" +#include "nir_deref.h" + +#include "util/u_dynarray.h" + +/** + * Elimination of dead writes based on derefs. + * + * Dead writes are stores and copies that write to a deref, which then gets + * another write before it was used (read or sourced for a copy). Those + * writes can be removed since they don't affect anything. + * + * For derefs that refer to a memory area that can be read after the program, + * the last write is considered used. The presence of certain instructions + * may also cause writes to be considered used, e.g. memory barrier (in this case + * the value must be written as other thread might use it). + * + * The write mask for store instructions is considered, so it is possible that + * a store is removed because of the combination of other stores overwritten + * its value. + */ + +/* Entry for unused_writes arrays. */ +struct write_entry { + /* If NULL indicates the entry is free to be reused. */ + nir_intrinsic_instr
[Mesa-dev] [PATCH 10/11] nir: Take call instruction into account in copy_prop_vars
Calls are not used yet (functions are inlined), but since new code is already taking them into account, do it here too. The convention here and in other places is that no writable memory is assumed to remain unchanged, as well as global variables. Also, explicitly state the modes affected (instead of using the reverse logic) in one of the apply_for_barrier_modes calls. Suggested (indirectly) by Jason. --- Jason suggested this for the other pass, so doing this here too. src/compiler/nir/nir_opt_copy_prop_vars.c | 17 +++-- 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/compiler/nir/nir_opt_copy_prop_vars.c b/src/compiler/nir/nir_opt_copy_prop_vars.c index 5276aa176d8..f58abfbb69f 100644 --- a/src/compiler/nir/nir_opt_copy_prop_vars.c +++ b/src/compiler/nir/nir_opt_copy_prop_vars.c @@ -404,6 +404,14 @@ copy_prop_vars_block(struct copy_prop_var_state *state, copy_entry_remove(state, iter); nir_foreach_instr_safe(instr, block) { + if (instr->type == nir_instr_type_call) { + apply_barrier_for_modes(copies, nir_var_shader_out | + nir_var_global | + nir_var_shader_storage | + nir_var_shared); + continue; + } + if (instr->type != nir_instr_type_intrinsic) continue; @@ -411,12 +419,9 @@ copy_prop_vars_block(struct copy_prop_var_state *state, switch (intrin->intrinsic) { case nir_intrinsic_barrier: case nir_intrinsic_memory_barrier: - /* If we hit a barrier, we need to trash everything that may possibly - * be accessible to another thread. Locals, globals, and things of - * the like are safe, however. - */ - apply_barrier_for_modes(state, ~(nir_var_local | nir_var_global | - nir_var_shader_in | nir_var_uniform)); + apply_barrier_for_modes(copies, nir_var_shader_out | + nir_var_shader_storage | + nir_var_shared); break; case nir_intrinsic_emit_vertex: -- 2.19.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 02/11] util: Add macro to get number of elements in dynarray
--- I've ended up not using this macro in this series, but it is useful for other cases, so kept it here. src/util/u_dynarray.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/util/u_dynarray.h b/src/util/u_dynarray.h index f74bfc7080b..53dde9241bb 100644 --- a/src/util/u_dynarray.h +++ b/src/util/u_dynarray.h @@ -149,6 +149,7 @@ util_dynarray_trim(struct util_dynarray *buf) #define util_dynarray_element(buf, type, idx) ((type*)(buf)->data + (idx)) #define util_dynarray_begin(buf) ((buf)->data) #define util_dynarray_end(buf) ((void*)util_dynarray_element((buf), char, (buf)->size)) +#define util_dynarray_num_elements(buf, type) ((buf)->size / sizeof(type)) #define util_dynarray_foreach(buf, type, elem) \ for (type *elem = (type *)(buf)->data; \ -- 2.19.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 04/11] nir: Add tests for dead write elimination
Note at the moment the pass called is nir_opt_copy_prop_vars, because dead write elimination is implemented there. Also added tests that involve identifying dead writes in multiple blocks (e.g. the overwrite happens in another block). Those currently fail as expected, so are marked to be skipped. --- src/compiler/nir/tests/vars_tests.cpp | 241 ++ 1 file changed, 241 insertions(+) diff --git a/src/compiler/nir/tests/vars_tests.cpp b/src/compiler/nir/tests/vars_tests.cpp index 7fbdb514349..dd913f04429 100644 --- a/src/compiler/nir/tests/vars_tests.cpp +++ b/src/compiler/nir/tests/vars_tests.cpp @@ -26,6 +26,9 @@ #include "nir.h" #include "nir_builder.h" +/* This optimization is done together with copy propagation. */ +#define nir_opt_dead_write_vars nir_opt_copy_prop_vars + namespace { class nir_vars_test : public ::testing::Test { @@ -141,6 +144,7 @@ nir_imm_ivec2(nir_builder *build, int x, int y) /* Allow grouping the tests while still sharing the helpers. */ class nir_copy_prop_vars_test : public nir_vars_test {}; +class nir_dead_write_vars_test : public nir_vars_test {}; } // namespace @@ -197,3 +201,240 @@ TEST_F(nir_copy_prop_vars_test, simple_store_load) EXPECT_EQ(store->src[1].ssa, stored_value); } } + +TEST_F(nir_dead_write_vars_test, no_dead_writes_in_block) +{ + nir_variable **v = create_many_int(nir_var_shader_storage, "v", 2); + + nir_store_var(b, v[0], nir_load_var(b, v[1]), 1); + + bool progress = nir_opt_dead_write_vars(b->shader); + ASSERT_FALSE(progress); +} + +TEST_F(nir_dead_write_vars_test, no_dead_writes_different_components_in_block) +{ + nir_variable **v = create_many_ivec2(nir_var_shader_storage, "v", 3); + + nir_store_var(b, v[0], nir_load_var(b, v[1]), 1 << 0); + nir_store_var(b, v[0], nir_load_var(b, v[2]), 1 << 1); + + bool progress = nir_opt_dead_write_vars(b->shader); + ASSERT_FALSE(progress); +} + +TEST_F(nir_dead_write_vars_test, no_dead_writes_in_if_statement) +{ + nir_variable **v = create_many_int(nir_var_shader_storage, "v", 6); + + nir_store_var(b, v[2], nir_load_var(b, v[0]), 1); + nir_store_var(b, v[3], nir_load_var(b, v[1]), 1); + + /* Each arm of the if statement will overwrite one store. */ + nir_if *if_stmt = nir_push_if(b, nir_imm_int(b, 0)); + nir_store_var(b, v[2], nir_load_var(b, v[4]), 1); + + nir_push_else(b, if_stmt); + nir_store_var(b, v[3], nir_load_var(b, v[5]), 1); + + nir_pop_if(b, if_stmt); + + bool progress = nir_opt_dead_write_vars(b->shader); + ASSERT_FALSE(progress); +} + +TEST_F(nir_dead_write_vars_test, no_dead_writes_in_loop_statement) +{ + nir_variable **v = create_many_int(nir_var_shader_storage, "v", 3); + + nir_store_var(b, v[0], nir_load_var(b, v[1]), 1); + + /* Loop will write other value. Since it might not be executed, it doesn't +* kill the first write. +*/ + nir_loop *loop = nir_push_loop(b); + + nir_if *if_stmt = nir_push_if(b, nir_imm_int(b, 0)); + nir_jump(b, nir_jump_break); + nir_pop_if(b, if_stmt); + + nir_store_var(b, v[0], nir_load_var(b, v[2]), 1); + nir_pop_loop(b, loop); + + bool progress = nir_opt_dead_write_vars(b->shader); + ASSERT_FALSE(progress); +} + +TEST_F(nir_dead_write_vars_test, dead_write_in_block) +{ + nir_variable **v = create_many_int(nir_var_shader_storage, "v", 3); + + nir_store_var(b, v[0], nir_load_var(b, v[1]), 1); + nir_ssa_def *load_v2 = nir_load_var(b, v[2]); + nir_store_var(b, v[0], load_v2, 1); + + bool progress = nir_opt_dead_write_vars(b->shader); + ASSERT_TRUE(progress); + + EXPECT_EQ(1, count_intrinsics(nir_intrinsic_store_deref)); + + nir_intrinsic_instr *store = find_next_intrinsic(nir_intrinsic_store_deref, NULL); + ASSERT_TRUE(store->src[1].is_ssa); + EXPECT_EQ(store->src[1].ssa, load_v2); +} + +TEST_F(nir_dead_write_vars_test, dead_write_components_in_block) +{ + nir_variable **v = create_many_ivec2(nir_var_shader_storage, "v", 3); + + nir_store_var(b, v[0], nir_load_var(b, v[1]), 1 << 0); + nir_ssa_def *load_v2 = nir_load_var(b, v[2]); + nir_store_var(b, v[0], load_v2, 1 << 0); + + bool progress = nir_opt_dead_write_vars(b->shader); + ASSERT_TRUE(progress); + + EXPECT_EQ(1, count_intrinsics(nir_intrinsic_store_deref)); + + nir_intrinsic_instr *store = find_next_intrinsic(nir_intrinsic_store_deref, NULL); + ASSERT_TRUE(store->src[1].is_ssa); + EXPECT_EQ(store->src[1].ssa, load_v2); +} + + +/* TODO: The DISABLED tests below depend on the dead write removal be able to + * identify dead writes between multiple blocks. This is still not + * implemented. + */ + +TEST_F(nir_dead_write_vars_test, DISABLED_dead_write_in_two_blocks) +{ + nir_variable **v = create_many_int(nir_var_shader_storage, "v", 3); + + nir_store_var(b, v[0], nir_load_var(b, v[1]), 1); + nir_ssa_def *load_v2 = nir_load_var(b, v[2]); + + /* Causes the stores to be in different blocks. */ + nir_pop_if(b, nir_push_if(b, nir_imm_int(b, 0))); + +
[Mesa-dev] [PATCH 03/11] nir: Add test file for vars related passes
Add basic helpers for doing tests on the vars related optimization passes. The main goal is to lower the barrier to create tests during development and debugging of the passes. Full coverage is not a requirement. --- src/compiler/Makefile.nir.am | 34 +++-- src/compiler/nir/meson.build | 11 ++ src/compiler/nir/tests/vars_tests.cpp | 199 ++ 3 files changed, 233 insertions(+), 11 deletions(-) create mode 100644 src/compiler/nir/tests/vars_tests.cpp diff --git a/src/compiler/Makefile.nir.am b/src/compiler/Makefile.nir.am index 4ccd7f36be9..c646c6bdc1e 100644 --- a/src/compiler/Makefile.nir.am +++ b/src/compiler/Makefile.nir.am @@ -60,25 +60,37 @@ nir/nir_opt_algebraic.c: nir/nir_opt_algebraic.py nir/nir_algebraic.py $(MKDIR_GEN) $(PYTHON_GEN) $(srcdir)/nir/nir_opt_algebraic.py > $@ || ($(RM) $@; false) -check_PROGRAMS += nir/tests/control_flow_tests +check_PROGRAMS += \ + nir/tests/control_flow_tests \ + nir/tests/vars_tests -nir_tests_control_flow_tests_CPPFLAGS = \ +NIR_TESTS_CPPFLAGS = \ $(AM_CPPFLAGS) \ -I$(top_builddir)/src/compiler/nir \ -I$(top_srcdir)/src/compiler/nir - -nir_tests_control_flow_tests_SOURCES = \ - nir/tests/control_flow_tests.cpp -nir_tests_control_flow_tests_CFLAGS = \ +NIR_TESTS_CFLAGS = \ $(PTHREAD_CFLAGS) -nir_tests_control_flow_tests_LDADD = \ - $(top_builddir)/src/gtest/libgtest.la \ - nir/libnir.la \ - $(top_builddir)/src/util/libmesautil.la \ +NIR_TESTS_LDADD = \ + $(top_builddir)/src/gtest/libgtest.la \ + nir/libnir.la \ + $(top_builddir)/src/util/libmesautil.la \ $(PTHREAD_LIBS) -TESTS += nir/tests/control_flow_tests +nir_tests_control_flow_tests_CPPFLAGS = $(NIR_TESTS_CPPFLAGS) +nir_tests_control_flow_tests_SOURCES = nir/tests/control_flow_tests.cpp +nir_tests_control_flow_tests_CFLAGS = $(NIR_TESTS_CFLAGS) +nir_tests_control_flow_tests_LDADD = $(NIR_TESTS_LDADD) + +nir_tests_vars_tests_CPPFLAGS = $(NIR_TESTS_CPPFLAGS) +nir_tests_vars_tests_SOURCES = nir/tests/vars_tests.cpp +nir_tests_vars_tests_CFLAGS = $(NIR_TESTS_CFLAGS) +nir_tests_vars_tests_LDADD = $(NIR_TESTS_LDADD) + + +TESTS += \ +nir/tests/control_flow_tests \ +nir/tests/vars_tests BUILT_SOURCES += \ diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build index 090aa7a628f..1a7fa2d3327 100644 --- a/src/compiler/nir/meson.build +++ b/src/compiler/nir/meson.build @@ -245,4 +245,15 @@ if with_tests link_with : libmesa_util, ) ) + test( +'nir_vars', +executable( + 'nir_vars_test', + files('tests/vars_tests.cpp'), + cpp_args : [cpp_vis_args, cpp_msvc_compat_args], + include_directories : [inc_common], + dependencies : [dep_thread, idep_gtest, idep_nir], + link_with : libmesa_util, +) + ) endif diff --git a/src/compiler/nir/tests/vars_tests.cpp b/src/compiler/nir/tests/vars_tests.cpp new file mode 100644 index 000..7fbdb514349 --- /dev/null +++ b/src/compiler/nir/tests/vars_tests.cpp @@ -0,0 +1,199 @@ +/* + * Copyright © 2018 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include + +#include "nir.h" +#include "nir_builder.h" + +namespace { + +class nir_vars_test : public ::testing::Test { +protected: + nir_vars_test(); + ~nir_vars_test(); + + nir_variable *create_int(nir_variable_mode mode, const char *name) { + if (mode == nir_var_local) + return nir_local_variable_create(b->impl, glsl_int_type(), name); + return nir_variable_create(b->shader, mode, glsl_int_type(), name); + } + + nir_variable *create_ivec2(nir_variable_mode mode, const char *name) { + const glsl_type *var_type = glsl_vector_type(GLSL_TYPE_INT, 2); + if (mode == nir_var_local) +
[Mesa-dev] [PATCH 00/11] NIR Copy Propagation between blocks
This series supersedes the "Global dead write vars removal pass". The goal here is to perform copy propagation among values in different blocks. While this has currently small benefits (it effectively helped some cases with uniforms), as we move other resources to be addressed with derefs (e.g. SSBOs), we expect it to be more useful. In particular with compute shaders. To be able to do this I had to extract the dead write removal from the copy propagation pass. When performing more than per-block, the information flows in different way for that optimization (backwards), so it helps to keep them separated. The pass uses an approach similar to what we do in GLSL copy prop. We propagate values forward following the control flow graph. It doesn't try to merge values from different branches or handle more detailed control flow. I think this approach is a good intermediate step. I've experimented with various approaches to implement a full data-flow analysis, but all of them ended up either too complex or too messy. Some factors to that were: (a) we have load/stores and copies, so a value in ACP needs to be "broken up into pieces", (b) copies with wildcards force us to take into consideration whether derefs are contained or not, at many levels, (c) we have writemasks (for the vectors) associated. In particular (b) made the deref_map tree-based structure I've discussed elsewhere not as good as I've expected. Because we want to keep track of "a[*].x", "a[1].x" and "a[indirect].x", the walk on the tree is not linear on the size of the deref. A future idea I'll explore is trying to split the problem in different pieces, directed by the inputs we see. E.g. maybe a data-flow analysis only of the copies, or only the fully qualified load/stores, or handle only scalars (after a vec to scalar pass). For now, I've shelved the global optimization for dead write removal. It wasn't helping any cases, so will wait until we have more derefs around to see the difference. Caio Marcelo de Oliveira Filho (11): util: Add foreach_reverse for dynarray util: Add macro to get number of elements in dynarray nir: Add test file for vars related passes nir: Add tests for dead write elimination nir: Separate dead write removal into its own pass intel/nir: Use the separated dead write vars pass freedreno/ir3: Use the separated dead write vars pass nir: Remove handling of dead writes from copy_prop_vars nir: Add tests for copy propagation of derefs nir: Take call instruction into account in copy_prop_vars nir: Copy propagation between blocks src/compiler/Makefile.nir.am| 34 +- src/compiler/Makefile.sources | 1 + src/compiler/nir/meson.build| 12 + src/compiler/nir/nir.h | 2 + src/compiler/nir/nir_opt_copy_prop_vars.c | 481 + src/compiler/nir/nir_opt_dead_write_vars.c | 216 ++ src/compiler/nir/tests/vars_tests.cpp | 737 src/gallium/drivers/freedreno/ir3/ir3_nir.c | 1 + src/intel/compiler/brw_nir.c| 1 + src/util/u_dynarray.h | 7 + 10 files changed, 1329 insertions(+), 163 deletions(-) create mode 100644 src/compiler/nir/nir_opt_dead_write_vars.c create mode 100644 src/compiler/nir/tests/vars_tests.cpp -- 2.19.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 01/11] util: Add foreach_reverse for dynarray
Useful to walk the array removing elements by swapping them with the last element. --- src/util/u_dynarray.h | 6 ++ 1 file changed, 6 insertions(+) diff --git a/src/util/u_dynarray.h b/src/util/u_dynarray.h index 6bea481d44b..f74bfc7080b 100644 --- a/src/util/u_dynarray.h +++ b/src/util/u_dynarray.h @@ -154,6 +154,12 @@ util_dynarray_trim(struct util_dynarray *buf) for (type *elem = (type *)(buf)->data; \ elem < (type *)((char *)(buf)->data + (buf)->size); elem++) +#define util_dynarray_foreach_reverse(buf, type, elem) \ + if ((buf)->size > 0) \ + for (type *elem = util_dynarray_top_ptr(buf, type); \ + elem >= (type *)(buf)->data; \ + elem--) + #define util_dynarray_delete_unordered(buf, type, v)\ do { \ unsigned num_elements = (buf)->size / sizeof(type); \ -- 2.19.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] Adding support for EXT_sRGB for Opengl ES
Any progress on adding EXT_sRGB support to Mesa? Jacob Lifshay ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 78123] svga prints out command errors
https://bugs.freedesktop.org/show_bug.cgi?id=78123 --- Comment #7 from Brian Paul --- John, is this still an issue? -- You are receiving this mail because: You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/5] anv/query: Add an emit_srm helper
--- src/intel/vulkan/genX_query.c | 53 ++- 1 file changed, 21 insertions(+), 32 deletions(-) diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c index 4ccbe2975de..7533ec05095 100644 --- a/src/intel/vulkan/genX_query.c +++ b/src/intel/vulkan/genX_query.c @@ -310,6 +310,22 @@ VkResult genX(GetQueryPoolResults)( return status; } +static void +emit_srm32(struct anv_batch *batch, struct anv_address addr, uint32_t reg) +{ + anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), srm) { + srm.MemoryAddress= addr; + srm.RegisterAddress = reg; + } +} + +static void +emit_srm64(struct anv_batch *batch, struct anv_address addr, uint32_t reg) +{ + emit_srm32(batch, anv_address_add(addr, 0), reg + 0); + emit_srm32(batch, anv_address_add(addr, 4), reg + 4); +} + static void emit_ps_depth_count(struct anv_cmd_buffer *cmd_buffer, struct anv_address addr) @@ -394,16 +410,7 @@ emit_pipeline_stat(struct anv_cmd_buffer *cmd_buffer, uint32_t stat, (1 << ARRAY_SIZE(vk_pipeline_stat_to_reg)) - 1); assert(stat < ARRAY_SIZE(vk_pipeline_stat_to_reg)); - uint32_t reg = vk_pipeline_stat_to_reg[stat]; - - anv_batch_emit(_buffer->batch, GENX(MI_STORE_REGISTER_MEM), lrm) { - lrm.RegisterAddress = reg; - lrm.MemoryAddress= anv_address_add(addr, 0); - } - anv_batch_emit(_buffer->batch, GENX(MI_STORE_REGISTER_MEM), lrm) { - lrm.RegisterAddress = reg + 4; - lrm.MemoryAddress= anv_address_add(addr, 4); - } + emit_srm64(_buffer->batch, addr, vk_pipeline_stat_to_reg[stat]); } void genX(CmdBeginQuery)( @@ -515,14 +522,7 @@ void genX(CmdWriteTimestamp)( switch (pipelineStage) { case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT: - anv_batch_emit(_buffer->batch, GENX(MI_STORE_REGISTER_MEM), srm) { - srm.RegisterAddress = TIMESTAMP; - srm.MemoryAddress= anv_address_add(query_addr, 8); - } - anv_batch_emit(_buffer->batch, GENX(MI_STORE_REGISTER_MEM), srm) { - srm.RegisterAddress = TIMESTAMP + 4; - srm.MemoryAddress= anv_address_add(query_addr, 12); - } + emit_srm64(_buffer->batch, anv_address_add(query_addr, 8), TIMESTAMP); break; default: @@ -689,21 +689,10 @@ gpu_write_query_result(struct anv_batch *batch, VkQueryResultFlags flags, uint32_t value_index, uint32_t reg) { - if (flags & VK_QUERY_RESULT_64_BIT) - dst_addr = anv_address_add(dst_addr, value_index * 8); - else - dst_addr = anv_address_add(dst_addr, value_index * 4); - - anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), srm) { - srm.RegisterAddress = reg; - srm.MemoryAddress= anv_address_add(dst_addr, 0); - } - if (flags & VK_QUERY_RESULT_64_BIT) { - anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), srm) { - srm.RegisterAddress = reg + 4; - srm.MemoryAddress= anv_address_add(dst_addr, 4); - } + emit_srm64(batch, anv_address_add(dst_addr, value_index * 8), reg); + } else { + emit_srm32(batch, anv_address_add(dst_addr, value_index * 4), reg); } } -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/5] anv/query: Use anv_address everywhere
Instead of passing around BOs and offsets, use addresses which are anv's GPU equivalent of pointers. --- src/intel/vulkan/genX_query.c | 121 ++ 1 file changed, 64 insertions(+), 57 deletions(-) diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c index 817a3a3c4e2..56d18e021e4 100644 --- a/src/intel/vulkan/genX_query.c +++ b/src/intel/vulkan/genX_query.c @@ -139,6 +139,15 @@ void genX(DestroyQueryPool)( vk_free2(>alloc, pAllocator, pool); } +static struct anv_address +anv_query_address(struct anv_query_pool *pool, uint32_t query) +{ + return (struct anv_address) { + .bo = >bo, + .offset = query * pool->stride, + }; +} + static void cpu_write_query_result(void *dst_slot, VkQueryResultFlags flags, uint32_t value_index, uint64_t result) @@ -303,13 +312,13 @@ VkResult genX(GetQueryPoolResults)( static void emit_ps_depth_count(struct anv_cmd_buffer *cmd_buffer, -struct anv_bo *bo, uint32_t offset) +struct anv_address addr) { anv_batch_emit(_buffer->batch, GENX(PIPE_CONTROL), pc) { pc.DestinationAddressType = DAT_PPGTT; pc.PostSyncOperation = WritePSDepthCount; pc.DepthStallEnable= true; - pc.Address = (struct anv_address) { bo, offset }; + pc.Address = addr; if (GEN_GEN == 9 && cmd_buffer->device->info.gt == 4) pc.CommandStreamerStallEnable = true; @@ -318,12 +327,12 @@ emit_ps_depth_count(struct anv_cmd_buffer *cmd_buffer, static void emit_query_availability(struct anv_cmd_buffer *cmd_buffer, -struct anv_bo *bo, uint32_t offset) +struct anv_address addr) { anv_batch_emit(_buffer->batch, GENX(PIPE_CONTROL), pc) { pc.DestinationAddressType = DAT_PPGTT; pc.PostSyncOperation = WriteImmediateData; - pc.Address = (struct anv_address) { bo, offset }; + pc.Address = addr; pc.ImmediateData = 1; } } @@ -340,20 +349,19 @@ emit_zero_queries(struct anv_cmd_buffer *cmd_buffer, const uint32_t num_elements = pool->stride / sizeof(uint64_t); for (uint32_t i = 0; i < num_queries; i++) { - uint32_t slot_offset = (first_index + i) * pool->stride; + struct anv_address slot_addr = + anv_query_address(pool, first_index + i); for (uint32_t j = 1; j < num_elements; j++) { anv_batch_emit(_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) { -sdi.Address.bo = >bo; -sdi.Address.offset = slot_offset + j * sizeof(uint64_t); +sdi.Address = anv_address_add(slot_addr, j * sizeof(uint64_t)); sdi.ImmediateData = 0ull; } anv_batch_emit(_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) { -sdi.Address.bo = >bo; -sdi.Address.offset = slot_offset + j * sizeof(uint64_t) + 4; +sdi.Address = anv_address_add(slot_addr, j * sizeof(uint64_t) + 4); sdi.ImmediateData = 0ull; } } - emit_query_availability(cmd_buffer, >bo, slot_offset); + emit_query_availability(cmd_buffer, slot_addr); } } @@ -368,10 +376,7 @@ void genX(CmdResetQueryPool)( for (uint32_t i = 0; i < queryCount; i++) { anv_batch_emit(_buffer->batch, GENX(MI_STORE_DATA_IMM), sdm) { - sdm.Address = (struct anv_address) { -.bo = >bo, -.offset = (firstQuery + i) * pool->stride, - }; + sdm.Address = anv_query_address(pool, firstQuery + i); sdm.ImmediateData = 0; } } @@ -393,7 +398,7 @@ static const uint32_t vk_pipeline_stat_to_reg[] = { static void emit_pipeline_stat(struct anv_cmd_buffer *cmd_buffer, uint32_t stat, - struct anv_bo *bo, uint32_t offset) + struct anv_address addr) { STATIC_ASSERT(ANV_PIPELINE_STATISTICS_MASK == (1 << ARRAY_SIZE(vk_pipeline_stat_to_reg)) - 1); @@ -402,12 +407,12 @@ emit_pipeline_stat(struct anv_cmd_buffer *cmd_buffer, uint32_t stat, uint32_t reg = vk_pipeline_stat_to_reg[stat]; anv_batch_emit(_buffer->batch, GENX(MI_STORE_REGISTER_MEM), lrm) { - lrm.RegisterAddress = reg, - lrm.MemoryAddress= (struct anv_address) { bo, offset }; + lrm.RegisterAddress = reg; + lrm.MemoryAddress= anv_address_add(addr, 0); } anv_batch_emit(_buffer->batch, GENX(MI_STORE_REGISTER_MEM), lrm) { - lrm.RegisterAddress = reg + 4, - lrm.MemoryAddress= (struct anv_address) { bo, offset + 4 }; + lrm.RegisterAddress = reg + 4; + lrm.MemoryAddress= anv_address_add(addr, 4); } } @@ -419,10 +424,11 @@ void genX(CmdBeginQuery)( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + struct anv_address query_addr = anv_query_address(pool,
[Mesa-dev] [PATCH 2/5] anv/query: Write both dwords in emit_zero_queries
Each query slot is a uint64_t and we were only zeroing half of it. Fixes: 7ec6e4e68980 "anv/query: implement multiview interactions" --- src/intel/vulkan/genX_query.c | 5 + 1 file changed, 5 insertions(+) diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c index 1b26401c9ff..817a3a3c4e2 100644 --- a/src/intel/vulkan/genX_query.c +++ b/src/intel/vulkan/genX_query.c @@ -347,6 +347,11 @@ emit_zero_queries(struct anv_cmd_buffer *cmd_buffer, sdi.Address.offset = slot_offset + j * sizeof(uint64_t); sdi.ImmediateData = 0ull; } + anv_batch_emit(_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) { +sdi.Address.bo = >bo; +sdi.Address.offset = slot_offset + j * sizeof(uint64_t) + 4; +sdi.ImmediateData = 0ull; + } } emit_query_availability(cmd_buffer, >bo, slot_offset); } -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/5] anv: Add a mi_memset and use it for zeroing queries
--- src/intel/vulkan/anv_genX.h| 4 src/intel/vulkan/genX_gpu_memcpy.c | 17 + src/intel/vulkan/genX_query.c | 14 ++ 3 files changed, 23 insertions(+), 12 deletions(-) diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index bef9b5bde4e..7921e0674a0 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -80,5 +80,9 @@ void genX(cmd_buffer_mi_memcpy)(struct anv_cmd_buffer *cmd_buffer, struct anv_address dst, struct anv_address src, uint32_t size); +void genX(cmd_buffer_mi_memset)(struct anv_cmd_buffer *cmd_buffer, +struct anv_address dst, uint32_t value, +uint32_t size); + void genX(blorp_exec)(struct blorp_batch *batch, const struct blorp_params *params); diff --git a/src/intel/vulkan/genX_gpu_memcpy.c b/src/intel/vulkan/genX_gpu_memcpy.c index 2b39f2fc009..fd78f4d125b 100644 --- a/src/intel/vulkan/genX_gpu_memcpy.c +++ b/src/intel/vulkan/genX_gpu_memcpy.c @@ -108,6 +108,23 @@ genX(cmd_buffer_mi_memcpy)(struct anv_cmd_buffer *cmd_buffer, return; } +void +genX(cmd_buffer_mi_memset)(struct anv_cmd_buffer *cmd_buffer, + struct anv_address dst, uint32_t value, + uint32_t size) +{ + /* This memset operates in units of dwords. */ + assert(size % 4 == 0); + assert(dst.offset % 4 == 0); + + for (uint32_t i = 0; i < size; i += 4) { + anv_batch_emit(_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) { + sdi.Address = anv_address_add(dst, i); + sdi.ImmediateData = value; + } + } +} + void genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer, struct anv_address dst, struct anv_address src, diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c index 56d18e021e4..4ccbe2975de 100644 --- a/src/intel/vulkan/genX_query.c +++ b/src/intel/vulkan/genX_query.c @@ -346,21 +346,11 @@ emit_zero_queries(struct anv_cmd_buffer *cmd_buffer, struct anv_query_pool *pool, uint32_t first_index, uint32_t num_queries) { - const uint32_t num_elements = pool->stride / sizeof(uint64_t); - for (uint32_t i = 0; i < num_queries; i++) { struct anv_address slot_addr = anv_query_address(pool, first_index + i); - for (uint32_t j = 1; j < num_elements; j++) { - anv_batch_emit(_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) { -sdi.Address = anv_address_add(slot_addr, j * sizeof(uint64_t)); -sdi.ImmediateData = 0ull; - } - anv_batch_emit(_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) { -sdi.Address = anv_address_add(slot_addr, j * sizeof(uint64_t) + 4); -sdi.ImmediateData = 0ull; - } - } + genX(cmd_buffer_mi_memset)(cmd_buffer, anv_address_add(slot_addr, 8), + 0, pool->stride - 8); emit_query_availability(cmd_buffer, slot_addr); } } -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/5] anv/query: Increment an index while writing results
Instead of computing an index at the end which we hope maps to the number of things written, just count the number of things as we go. --- src/intel/vulkan/genX_query.c | 67 --- 1 file changed, 31 insertions(+), 36 deletions(-) diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c index 011db549c08..1b26401c9ff 100644 --- a/src/intel/vulkan/genX_query.c +++ b/src/intel/vulkan/genX_query.c @@ -249,18 +249,19 @@ VkResult genX(GetQueryPoolResults)( */ bool write_results = available || (flags & VK_QUERY_RESULT_PARTIAL_BIT); - if (write_results) { - switch (pool->type) { - case VK_QUERY_TYPE_OCCLUSION: { -cpu_write_query_result(pData, flags, 0, slot[2] - slot[1]); -break; - } + uint32_t idx = 0; + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + if (write_results) +cpu_write_query_result(pData, flags, idx, slot[2] - slot[1]); + idx++; + break; - case VK_QUERY_TYPE_PIPELINE_STATISTICS: { -uint32_t statistics = pool->pipeline_statistics; -uint32_t idx = 0; -while (statistics) { - uint32_t stat = u_bit_scan(); + case VK_QUERY_TYPE_PIPELINE_STATISTICS: { + uint32_t statistics = pool->pipeline_statistics; + while (statistics) { +uint32_t stat = u_bit_scan(); +if (write_results) { uint64_t result = slot[idx * 2 + 2] - slot[idx * 2 + 1]; /* WaDividePSInvocationCountBy4:HSW,BDW */ @@ -269,29 +270,28 @@ VkResult genX(GetQueryPoolResults)( result >>= 2; cpu_write_query_result(pData, flags, idx, result); - - idx++; } -assert(idx == util_bitcount(pool->pipeline_statistics)); -break; +idx++; } + assert(idx == util_bitcount(pool->pipeline_statistics)); + break; + } - case VK_QUERY_TYPE_TIMESTAMP: { -cpu_write_query_result(pData, flags, 0, slot[1]); -break; - } - default: -unreachable("invalid pool type"); - } - } else { - status = VK_NOT_READY; + case VK_QUERY_TYPE_TIMESTAMP: + if (write_results) +cpu_write_query_result(pData, flags, idx, slot[1]); + idx++; + break; + + default: + unreachable("invalid pool type"); } - if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { - uint32_t idx = (pool->type == VK_QUERY_TYPE_PIPELINE_STATISTICS) ? -util_bitcount(pool->pipeline_statistics) : 1; + if (!write_results) + status = VK_NOT_READY; + + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) cpu_write_query_result(pData, flags, idx, available); - } pData += stride; if (pData >= data_end) @@ -749,17 +749,17 @@ void genX(CmdCopyQueryPoolResults)( for (uint32_t i = 0; i < queryCount; i++) { slot_offset = (firstQuery + i) * pool->stride; + uint32_t idx = 0; switch (pool->type) { case VK_QUERY_TYPE_OCCLUSION: compute_query_result(_buffer->batch, MI_ALU_REG2, >bo, slot_offset + 8); gpu_write_query_result(_buffer->batch, buffer, destOffset, -flags, 0, CS_GPR(2)); +flags, idx++, CS_GPR(2)); break; case VK_QUERY_TYPE_PIPELINE_STATISTICS: { uint32_t statistics = pool->pipeline_statistics; - uint32_t idx = 0; while (statistics) { uint32_t stat = u_bit_scan(); @@ -774,9 +774,7 @@ void genX(CmdCopyQueryPoolResults)( } gpu_write_query_result(_buffer->batch, buffer, destOffset, - flags, idx, CS_GPR(0)); - -idx++; + flags, idx++, CS_GPR(0)); } assert(idx == util_bitcount(pool->pipeline_statistics)); break; @@ -794,9 +792,6 @@ void genX(CmdCopyQueryPoolResults)( } if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { - uint32_t idx = (pool->type == VK_QUERY_TYPE_PIPELINE_STATISTICS) ? -util_bitcount(pool->pipeline_statistics) : 1; - emit_load_alu_reg_u64(_buffer->batch, CS_GPR(0), >bo, slot_offset); gpu_write_query_result(_buffer->batch, buffer, destOffset, -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 91098] vmwgfx null ptr dereference at vmw_screen_ioctl.c:76 due to ioctl failure
https://bugs.freedesktop.org/show_bug.cgi?id=91098 Brian Paul changed: What|Removed |Added Resolution|--- |WONTFIX Status|NEW |RESOLVED --- Comment #2 from Brian Paul --- There's been no follow-up from the original poster. Closing. Re-open if it's still and issue for you. -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 48143] [vmwgfx] src/gallium/drivers/svga/svga_tgsi_insn.c:273:get_temp: Assertion `i < 32' failed.
https://bugs.freedesktop.org/show_bug.cgi?id=48143 Brian Paul changed: What|Removed |Added Resolution|--- |WONTFIX Status|NEW |RESOLVED --- Comment #1 from Brian Paul --- This should not be an issue with the current driver. We support much more than 32 temps regs now. -- You are receiving this mail because: You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 92983] [vmwgfx] SIGABRT vmw_screen_ioctl.c:461
https://bugs.freedesktop.org/show_bug.cgi?id=92983 Brian Paul changed: What|Removed |Added CC||v...@freedesktop.org --- Comment #1 from Brian Paul --- Vinson, this is a pretty old bug. Do you want to re-test it? -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 107878] Artifacting Hair on Overwatch vega56
https://bugs.freedesktop.org/show_bug.cgi?id=107878 --- Comment #6 from Timothy Arceri --- (In reply to gloriouseggroll from comment #5) > can confirm fixed on llvm8 for me as well. (still broken with current mesa > git and llvm 7) If you want llvm7 to work you might need to do a git bisect to see what fixed it and see if the fix can be backported to llvm 7 (assuming whoever fixed it isn't trying to get it included already). -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 93970] Second Life - Advanced Lighting Model shader fails to compile on Radeon SI driver
https://bugs.freedesktop.org/show_bug.cgi?id=93970 Timothy Arceri changed: What|Removed |Added Resolution|--- |NOTOURBUG Status|NEEDINFO|RESOLVED --- Comment #5 from Timothy Arceri --- I downloaded the latest Second Life viewer (which is apparently not longer supported for Linux) and Advanced Lighting Model seems to work without any workarounds so seems this was fixed in the game at some point. Closing. -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 96542] Tonga Unreal elemental black lava since radeonsi: enable OpenGL 4.3
https://bugs.freedesktop.org/show_bug.cgi?id=96542 Timothy Arceri changed: What|Removed |Added Resolution|--- |NOTOURBUG Status|NEW |RESOLVED --- Comment #3 from Timothy Arceri --- As per comment #1 when running on the Nvidia blob is always takes a GL 3.2 path. When I replay an apitrace taken on radeonsi the Nvidia blob also renders the lava black so I this does indeed seem to be a bug in the demo. Closing. -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 107878] Artifacting Hair on Overwatch vega56
https://bugs.freedesktop.org/show_bug.cgi?id=107878 --- Comment #5 from gloriouseggr...@gmail.com --- can confirm fixed on llvm8 for me as well. (still broken with current mesa git and llvm 7) -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] gallium/util: don't let children of fork & exec inherit our thread affinity
Since only Blender and Firefox experience problems, they can be blacklisted for this optimization, and then we can expand the blacklist as we go. Marek On Fri, Sep 14, 2018 at 9:04 PM, Marek Olšák wrote: > On Fri, Sep 14, 2018 at 4:53 AM, Michel Dänzer wrote: >> On 2018-09-13 8:56 p.m., Marek Olšák wrote: >>> On Thu, Sep 13, 2018 at 11:48 AM, Michel Dänzer wrote: On 2018-09-13 2:40 a.m., Marek Olšák wrote: > From: Marek Olšák > > [...] > > static void > -util_init_cache_number(void) > +util_init_thread_pinning(void) > { > /* Get a semi-random number. */ > int64_t t = os_time_get_nano(); > L3_cache_number = (t ^ (t >> 8) ^ (t >> 16)); > + > + /* Reset thread affinity for all children of fork and exec to prevent I don't think exec (which doesn't spawn a child, it replaces the current process "image" with a new one) has anything to do with this. > +* spawned processes and threads from inheriting the current thread's > +* affinity. As the name implies, pthread_atfork only affects child processes spawned with fork(), not new threads. As such, I'm afraid this won't help at least for blender, which AFAICT doesn't call fork, it only spawns threads. >>> >>> All created threads and processes are just some variants of fork. >> >> fork(2) spawns a new process, not a new thread in the same process. Its >> current implementation in glibc uses clone(2), which is also used for >> spawning threads, but that's an implementation detail. The kernel still >> has the dedicated fork system calls. >> >> pthread_atfork only affects new processes created with fork(2), not new >> threads created in the same process. >> >> >>> This patch is enough to stop inheriting thread affinity from X and >>> gnome-shell to GL apps, to gcc run within an X terminal, etc. >>> Everything within X inherits the thread affinity of X or gnome-shell, >>> including gcc. >> >> FWIW, X clients are not descendants of the X server, so they must have >> inherited it from something else. >> >> Anyway, now I understand the scope of this patch, thanks. But the commit >> log and comment need to be fixed not to be misleading by talking about >> exec and spawned threads. E.g.: >> >> gallium/util: don't let child processes inherit our thread affinity >> >>/* Prevent child processes from inheriting the current thread's >> * affinity. >> >> >> That leaves: >> >>> +* What happens if a driver is unloaded and the app creates a thread? >> >> I suppose the child process will likely crash, because the memory >> address where util_set_full_cpu_affinity was located will either be >> unmapped or have random other contents? >> >> At least in theory, there could also be an issue where the application >> might have set its own thread affinity before calling fork, which would >> be clobbered by util_set_full_cpu_affinity in the child process. >> >> >> Last but not least, this doesn't solve the issue of apps such as >> blender, which spawn their own worker threads after initializing OpenGL >> (possibly not themselves directly, but via the toolkit or another >> library; e.g. GTK+4 uses OpenGL by default), inheriting the thread affinity. >> >> >> Due to these issues, setting the thread affinity needs to be disabled by >> default, and only white-listed for applications where it's known safe >> and beneficial. This sucks, but I'm afraid that's the reality until >> there's better API available which allows solving these issues. > > We don't have the bandwidth to maintain whitelists. This will either > have to be always on or always off. > > On the positive side, only Ryzens with multiple CCXs get all the > benefits and disadvantages. > > Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 106922] Tangrams demo: LLVM ERROR: Cannot select: 0x7e8d8750: i16 = bitcast 0x7e8d8af8
https://bugs.freedesktop.org/show_bug.cgi?id=106922 --- Comment #7 from Christoph Haag --- Created attachment 141569 --> https://bugs.freedesktop.org/attachment.cgi?id=141569=edit corruption at small resolutions Nice, with the patches it runs without crashing. At 1920x1080 it looks good, but when choosing smaller resolutions, corruption appears. In the default 720x400 it's pretty bad. I have no idea if the corruption is in any way related to the 16 bit support though. RX 480, llvm 8.0.0svn_r169421, latest radv git + patches -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] gallium/util: don't let children of fork & exec inherit our thread affinity
On Fri, Sep 14, 2018 at 4:53 AM, Michel Dänzer wrote: > On 2018-09-13 8:56 p.m., Marek Olšák wrote: >> On Thu, Sep 13, 2018 at 11:48 AM, Michel Dänzer wrote: >>> On 2018-09-13 2:40 a.m., Marek Olšák wrote: From: Marek Olšák [...] static void -util_init_cache_number(void) +util_init_thread_pinning(void) { /* Get a semi-random number. */ int64_t t = os_time_get_nano(); L3_cache_number = (t ^ (t >> 8) ^ (t >> 16)); + + /* Reset thread affinity for all children of fork and exec to prevent >>> >>> I don't think exec (which doesn't spawn a child, it replaces the current >>> process "image" with a new one) has anything to do with this. >>> >>> +* spawned processes and threads from inheriting the current thread's +* affinity. >>> >>> As the name implies, pthread_atfork only affects child processes spawned >>> with fork(), not new threads. As such, I'm afraid this won't help at >>> least for blender, which AFAICT doesn't call fork, it only spawns threads. >> >> All created threads and processes are just some variants of fork. > > fork(2) spawns a new process, not a new thread in the same process. Its > current implementation in glibc uses clone(2), which is also used for > spawning threads, but that's an implementation detail. The kernel still > has the dedicated fork system calls. > > pthread_atfork only affects new processes created with fork(2), not new > threads created in the same process. > > >> This patch is enough to stop inheriting thread affinity from X and >> gnome-shell to GL apps, to gcc run within an X terminal, etc. >> Everything within X inherits the thread affinity of X or gnome-shell, >> including gcc. > > FWIW, X clients are not descendants of the X server, so they must have > inherited it from something else. > > Anyway, now I understand the scope of this patch, thanks. But the commit > log and comment need to be fixed not to be misleading by talking about > exec and spawned threads. E.g.: > > gallium/util: don't let child processes inherit our thread affinity > >/* Prevent child processes from inheriting the current thread's > * affinity. > > > That leaves: > >> +* What happens if a driver is unloaded and the app creates a thread? > > I suppose the child process will likely crash, because the memory > address where util_set_full_cpu_affinity was located will either be > unmapped or have random other contents? > > At least in theory, there could also be an issue where the application > might have set its own thread affinity before calling fork, which would > be clobbered by util_set_full_cpu_affinity in the child process. > > > Last but not least, this doesn't solve the issue of apps such as > blender, which spawn their own worker threads after initializing OpenGL > (possibly not themselves directly, but via the toolkit or another > library; e.g. GTK+4 uses OpenGL by default), inheriting the thread affinity. > > > Due to these issues, setting the thread affinity needs to be disabled by > default, and only white-listed for applications where it's known safe > and beneficial. This sucks, but I'm afraid that's the reality until > there's better API available which allows solving these issues. We don't have the bandwidth to maintain whitelists. This will either have to be always on or always off. On the positive side, only Ryzens with multiple CCXs get all the benefits and disadvantages. Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] gallium/util: don't let children of fork & exec inherit our thread affinity
On Fri, Sep 14, 2018 at 12:54 PM, Nicholas Miell wrote: > On 09/12/2018 05:40 PM, Marek Olšák wrote: >> +static void >> +util_set_full_cpu_affinity(void) >> +{ >> + cpu_set_t cpuset; >> + >> + CPU_ZERO(); >> + for (unsigned i = 0; i < CPU_SETSIZE; i++) >> + CPU_SET(i, ); >> + >> + pthread_setaffinity_np(pthread_self(), sizeof(cpuset), ); >> +} >> >> static void >> -util_init_cache_number(void) >> +util_init_thread_pinning(void) >> { >> /* Get a semi-random number. */ >> int64_t t = os_time_get_nano(); >> L3_cache_number = (t ^ (t >> 8) ^ (t >> 16)); >> + >> + /* Reset thread affinity for all children of fork and exec to prevent >> +* spawned processes and threads from inheriting the current thread's >> +* affinity. >> +* >> +* What happens if a driver is unloaded and the app creates a thread? >> +*/ >> + pthread_atfork(NULL, NULL, util_set_full_cpu_affinity); >> } >> > > You should probably save and restore the application's affinity mask > rather than assuming the mask is set to all CPUs. The affinity mask references a random CCX for each OpenGL context, and we don't know which thread called fork in the child. Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 104681] Einstein@Home BOINC FGRPB1G GPU app crash
https://bugs.freedesktop.org/show_bug.cgi?id=104681 Timothy Arceri changed: What|Removed |Added Component|Mesa core |Gallium/StateTracker/Clover -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 106833] glLinkProgram is expected to fail when vertex attribute aliasing happens on ES3.0 context or later
https://bugs.freedesktop.org/show_bug.cgi?id=106833 Timothy Arceri changed: What|Removed |Added QA Contact|mesa-dev@lists.freedesktop. |intel-3d-bugs@lists.freedes |org |ktop.org Component|Mesa core |glsl-compiler -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 106996] Compute shader compiling fails for invalid input layout qualifier used
https://bugs.freedesktop.org/show_bug.cgi?id=106996 Timothy Arceri changed: What|Removed |Added QA Contact|mesa-dev@lists.freedesktop. |intel-3d-bugs@lists.freedes |org |ktop.org Component|Mesa core |glsl-compiler -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 78123] svga prints out command errors
https://bugs.freedesktop.org/show_bug.cgi?id=78123 Timothy Arceri changed: What|Removed |Added Component|Other |Drivers/Gallium/vmwgfx -- You are receiving this mail because: You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 106677] vmwgfx: atom (electron-based app) causes corruption, hangs
https://bugs.freedesktop.org/show_bug.cgi?id=106677 Timothy Arceri changed: What|Removed |Added Component|Other |Drivers/Gallium/vmwgfx -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 100037] [vmwgfx] Invalid SVGA3D command: 1202
https://bugs.freedesktop.org/show_bug.cgi?id=100037 Timothy Arceri changed: What|Removed |Added Component|Other |Drivers/Gallium/vmwgfx -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 91098] vmwgfx null ptr dereference at vmw_screen_ioctl.c:76 due to ioctl failure
https://bugs.freedesktop.org/show_bug.cgi?id=91098 Timothy Arceri changed: What|Removed |Added Component|Other |Drivers/Gallium/vmwgfx -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 48143] [vmwgfx] src/gallium/drivers/svga/svga_tgsi_insn.c:273:get_temp: Assertion `i < 32' failed.
https://bugs.freedesktop.org/show_bug.cgi?id=48143 Timothy Arceri changed: What|Removed |Added Component|Other |Drivers/Gallium/vmwgfx -- You are receiving this mail because: You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 92983] [vmwgfx] SIGABRT vmw_screen_ioctl.c:461
https://bugs.freedesktop.org/show_bug.cgi?id=92983 Timothy Arceri changed: What|Removed |Added Component|Mesa core |Drivers/Gallium/vmwgfx -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 101405] x.org/wiki/GalliumStatus/: Add description for DEPRECATED
https://bugs.freedesktop.org/show_bug.cgi?id=101405 --- Comment #2 from David Hedlund --- (In reply to Timothy Arceri from comment #1) > I don't think there is much point fixing this that page is out of date > (hasn't been updated in over 5 years) and its usefulness is questionable. Thank you. Can you please add a disclaimer on https://www.x.org/wiki/GalliumStatus/ to make this clear? -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 101405] x.org/wiki/GalliumStatus/: Add description for DEPRECATED
https://bugs.freedesktop.org/show_bug.cgi?id=101405 Timothy Arceri changed: What|Removed |Added Resolution|--- |WONTFIX Status|REOPENED|RESOLVED --- Comment #1 from Timothy Arceri --- I don't think there is much point fixing this that page is out of date (hasn't been updated in over 5 years) and its usefulness is questionable. -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] glsl: Avoid propagating incompatible type of initializer
Series: Reviewed-by: Timothy Arceri Are there piglit tests to go with this? On 15/8/18 10:46 pm, Danylo Piliaiev wrote: do_assignment validated assigment but when rhs type was not compatible it proceeded without issues and returned error_emitted = false. On the other hand process_initializer expected do_assignment to always return compatible type and never fail. As a result when variable was initialized with incompatible type the type of variable changed to the incompatible one. This manifested in unnecessary error messages and in one case in crash. Example GLSL: vec4 tmp = vec2(0.0); tmp.z -= 1.0; Past error messages: initializer of type vec2 cannot be assigned to variable of type vec4 invalid swizzle / mask `z' type mismatch operands to arithmetic operators must be numeric After this patch: initializer of type vec2 cannot be assigned to variable of type vec4 In the other case when we initialize variable with incompatible struct, accessing variable's field leaded to a crash. Example: uniform struct {float field;} data; ... vec4 tmp = data; tmp.x -= 1.0; After the patch there is only error line without a crash: initializer of type #anon_struct cannot be assigned to variable of type vec4 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107547 Signed-off-by: Danylo Piliaiev --- src/compiler/glsl/ast_to_hir.cpp | 62 +--- 1 file changed, 33 insertions(+), 29 deletions(-) diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp index 5d3f10b682..93e7c8ec33 100644 --- a/src/compiler/glsl/ast_to_hir.cpp +++ b/src/compiler/glsl/ast_to_hir.cpp @@ -1012,6 +1012,8 @@ do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state, mark_whole_array_access(rhs); mark_whole_array_access(lhs); } + } else { + error_emitted = true; } /* Most callers of do_assignment (assign, add_assign, pre_inc/dec, @@ -4562,41 +4564,43 @@ process_initializer(ir_variable *var, ast_declaration *decl, /* Never emit code to initialize a uniform. */ const glsl_type *initializer_type; + bool error_emitted = false; if (!type->qualifier.flags.q.uniform) { - do_assignment(initializer_instructions, state, - NULL, - lhs, rhs, - , true, - true, - type->get_location()); + error_emitted = +do_assignment(initializer_instructions, state, + NULL, lhs, rhs, + , true, true, + type->get_location()); initializer_type = result->type; } else initializer_type = rhs->type; - var->constant_initializer = rhs->constant_expression_value(mem_ctx); - var->data.has_initializer = true; + if (!error_emitted) { + var->constant_initializer = rhs->constant_expression_value(mem_ctx); + var->data.has_initializer = true; - /* If the declared variable is an unsized array, it must inherrit - * its full type from the initializer. A declaration such as - * - * uniform float a[] = float[](1.0, 2.0, 3.0, 3.0); - * - * becomes - * - * uniform float a[4] = float[](1.0, 2.0, 3.0, 3.0); - * - * The assignment generated in the if-statement (below) will also - * automatically handle this case for non-uniforms. - * - * If the declared variable is not an array, the types must - * already match exactly. As a result, the type assignment - * here can be done unconditionally. For non-uniforms the call - * to do_assignment can change the type of the initializer (via - * the implicit conversion rules). For uniforms the initializer - * must be a constant expression, and the type of that expression - * was validated above. - */ - var->type = initializer_type; + /* If the declared variable is an unsized array, it must inherrit + * its full type from the initializer. A declaration such as + * + * uniform float a[] = float[](1.0, 2.0, 3.0, 3.0); + * + * becomes + * + * uniform float a[4] = float[](1.0, 2.0, 3.0, 3.0); + * + * The assignment generated in the if-statement (below) will also + * automatically handle this case for non-uniforms. + * + * If the declared variable is not an array, the types must + * already match exactly. As a result, the type assignment + * here can be done unconditionally. For non-uniforms the call + * to do_assignment can change the type of the initializer (via + * the implicit conversion rules). For uniforms the initializer + * must be a constant expression, and the type of that expression + * was
[Mesa-dev] [Bug 107939] Commit 888b7fc causes performance regression
https://bugs.freedesktop.org/show_bug.cgi?id=107939 --- Comment #1 from Timothy Arceri --- Since the referenced sha is for the 18.1 rather than the master branch I'm copying here for completeness. commit 888b7fcaf4d4f25c90c318495c7c38066cff29fb Author: Samuel Pitoiset Date: Wed Jun 13 20:19:23 2018 +0200 radv: don't fast clear HTILE for 16-bit depth surfaces on GFX8 This causes rendering issues in Shadow Warrior 2 with DXVK. Cc: mesa-sta...@lists.freedesktop.org Fixes: ccc64f3133 ("radv: enable TC-compat HTILE for 16-bit depth surfaces on GFX8") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106912 Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen (cherry picked from commit 51e23d34190076159129dd7b449b95a1ac3d4949) -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 107923] build_id.c:126: multiple definition of `build_id_length'
https://bugs.freedesktop.org/show_bug.cgi?id=107923 Vinson Lee changed: What|Removed |Added Keywords||bisected CC||fdo-b...@engestrom.ch, ||i...@freedesktop.org --- Comment #5 from Vinson Lee --- 8396043f304bb2a752130230055605c5c966e89f is the first bad commit commit 8396043f304bb2a752130230055605c5c966e89f Author: Dylan Baker Date: Tue Aug 21 09:46:46 2018 -0700 Replace uses of _mesa_bitcount with util_bitcount and _mesa_bitcount_64 with util_bitcount_64. This fixes a build problem in nir for platforms that don't have popcount or popcountll, such as 32bit msvc. v2: - Fix additional uses of _mesa_bitcount added after this was originally written Acked-by: Eric Engestrom (v1) Acked-by: Eric Anholt Reviewed-by: Ian Romanick :04 04 9b4d3f30a8c2cb4d4a549b92c6db3cf499338579 dcad5de2d7a236b4fe35516fde4abf5d24516415 M src bisect run success -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] genxml: Add SO_PRIM_STORAGE_NEEDED and SO_NUM_PRIMS_WRITTEN
--- src/intel/genxml/gen10.xml | 32 src/intel/genxml/gen11.xml | 32 src/intel/genxml/gen7.xml | 32 src/intel/genxml/gen75.xml | 32 src/intel/genxml/gen8.xml | 32 src/intel/genxml/gen9.xml | 32 6 files changed, 192 insertions(+) diff --git a/src/intel/genxml/gen10.xml b/src/intel/genxml/gen10.xml index abd5da297d6..0bb38a76a78 100644 --- a/src/intel/genxml/gen10.xml +++ b/src/intel/genxml/gen10.xml @@ -3553,6 +3553,38 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/intel/genxml/gen11.xml b/src/intel/genxml/gen11.xml index 1b3befbbfc9..6eed5f99d92 100644 --- a/src/intel/genxml/gen11.xml +++ b/src/intel/genxml/gen11.xml @@ -3551,6 +3551,38 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/intel/genxml/gen7.xml b/src/intel/genxml/gen7.xml index 6dde7973e69..7600a27bcb0 100644 --- a/src/intel/genxml/gen7.xml +++ b/src/intel/genxml/gen7.xml @@ -2489,6 +2489,38 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/intel/genxml/gen75.xml b/src/intel/genxml/gen75.xml index dfc3d891498..103723168bd 100644 --- a/src/intel/genxml/gen75.xml +++ b/src/intel/genxml/gen75.xml @@ -2972,6 +2972,38 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/intel/genxml/gen8.xml b/src/intel/genxml/gen8.xml index d42c63aabd8..364cecf5d67 100644 --- a/src/intel/genxml/gen8.xml +++ b/src/intel/genxml/gen8.xml @@ -3206,6 +3206,38 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/intel/genxml/gen9.xml b/src/intel/genxml/gen9.xml index ca268254503..4a26ae98fb9 100644 --- a/src/intel/genxml/gen9.xml +++ b/src/intel/genxml/gen9.xml @@ -3491,6 +3491,38 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 100960] Special block from Minecraft mod rendered out of place
https://bugs.freedesktop.org/show_bug.cgi?id=100960 --- Comment #9 from Fabian Maurer --- Created attachment 141566 --> https://bugs.freedesktop.org/attachment.cgi?id=141566=edit Windows - Call 2245521 - Framebuffer -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 97516] GLX_OML_swap_method not fully supported
https://bugs.freedesktop.org/show_bug.cgi?id=97516 --- Comment #4 from Sven Arvidsson --- I tried the suggested hack of adding _DRI_ATTRIB_SWAP_EXCHANGE to back_buffer_modes[]. In the case of Brink, it allows me to launch the game, but in fullscreen mode it stops updating the screen, so only a single frame is shown. Running the game windowed seems to work fine. It could be a problem on my end, though I did rebuild system Mesa (32- and 64bit) so the server side should be picking up the changes too. -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 107923] build_id.c:126: multiple definition of `build_id_length'
https://bugs.freedesktop.org/show_bug.cgi?id=107923 --- Comment #4 from Dylan Baker --- This seems to be auto tools specific, I cn't replicate with the closest meson configuration I could come up with (-Dbuildtype=debug -Dglx=gallium-xlib -Ddri-drivers= -Dvulkan-drivers= -Dgallium-drivers=swrast,svga -Dgbm=false -Degl=false); meson doesn't have a toggle to turn off direct glx or tls, so it could be related to that. -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/4] anv/so_memcpy: Use the correct SO_BUFFER size on gen8+
On Wed, Sep 12, 2018 at 12:06:49AM -0500, Jason Ekstrand wrote: > This shouldn't matter as we'll never write OOB anyway but we may as well > get it right. It's supposed to be in dwords - 1. > --- > src/intel/vulkan/genX_gpu_memcpy.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > This patch is Reviewed-by: Nanley Chery > diff --git a/src/intel/vulkan/genX_gpu_memcpy.c > b/src/intel/vulkan/genX_gpu_memcpy.c > index 57abd8cd5c1..cba820a1866 100644 > --- a/src/intel/vulkan/genX_gpu_memcpy.c > +++ b/src/intel/vulkan/genX_gpu_memcpy.c > @@ -222,7 +222,7 @@ genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer > *cmd_buffer, > > #if GEN_GEN >= 8 >sob.SOBufferEnable = true; > - sob.SurfaceSize = size - 1; > + sob.SurfaceSize = size / 4 - 1; > #else >sob.SurfacePitch = bs; >sob.SurfaceEndAddress = sob.SurfaceBaseAddress; > -- > 2.17.1 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] mesa: Additional FlipY applications
On 09/14/2018 01:09 PM, Fritz Koenig wrote: Instances where direction was determined based on winsys or user fbo and should be determined based on FlipY. Key STATE_FB_WPOS_Y_TRANSFORM for of FlipY instead of _mesa_is_user_fbo. This corrects gl_FragCoord usage when applying GL_MESA_framebuffer_flip_y. Fixes: ab05dd183cc ("i965: implement GL_MESA_framebuffer_flip_y [v3]") --- src/mesa/main/multisample.c | 4 ++-- src/mesa/program/prog_statevars.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mesa/main/multisample.c b/src/mesa/main/multisample.c index 8beb1d839e..d494a43ac7 100644 --- a/src/mesa/main/multisample.c +++ b/src/mesa/main/multisample.c @@ -94,8 +94,8 @@ _mesa_GetMultisamplefv(GLenum pname, GLuint index, GLfloat * val) ctx->Driver.GetSamplePosition(ctx, ctx->DrawBuffer, index, val); - /* winsys FBOs are upside down */ - if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) + /* FBOs can be upside down (winsys always are)*/ + if (ctx->DrawBuffer->FlipY) val[1] = 1.0f - val[1]; return; diff --git a/src/mesa/program/prog_statevars.c b/src/mesa/program/prog_statevars.c index 4d7f388cfb..3bbe451399 100644 --- a/src/mesa/program/prog_statevars.c +++ b/src/mesa/program/prog_statevars.c @@ -571,7 +571,7 @@ _mesa_fetch_state(struct gl_context *ctx, const gl_state_index16 state[], case STATE_FB_WPOS_Y_TRANSFORM: /* A driver may negate this conditional by using ZW swizzle * instead of XY (based on e.g. some other state). */ - if (_mesa_is_user_fbo(ctx->DrawBuffer)) { + if (!ctx->DrawBuffer->FlipY) { /* Identity (XY) followed by flipping Y upside down (ZW). */ value[0] = 1.0F; value[1] = 0.0F; For both, Reviewed-by: Brian Paul Tag for stable branch? ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 107923] build_id.c:126: multiple definition of `build_id_length'
https://bugs.freedesktop.org/show_bug.cgi?id=107923 --- Comment #3 from Brian Paul --- I'm seeing similar issues. $ ../autogen.sh CFLAGS="-g -O0" CXXFLAGS="-g -O0" --enable-debug --enable-xlib-glx --disable-driglx-direct --disable-dri --with-gallium-drivers=swrast,svga --disable-gbm --disable-egl results in a lot of multiply-defined symbols. This has probably been happening for the past week or so. I haven't had any time to investigate though. Vinson, perhaps you could bisect it if you have time. -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] mesa: Additional FlipY applications
Instances where direction was determined based on winsys or user fbo and should be determined based on FlipY. Key STATE_FB_WPOS_Y_TRANSFORM for of FlipY instead of _mesa_is_user_fbo. This corrects gl_FragCoord usage when applying GL_MESA_framebuffer_flip_y. Fixes: ab05dd183cc ("i965: implement GL_MESA_framebuffer_flip_y [v3]") --- src/mesa/main/multisample.c | 4 ++-- src/mesa/program/prog_statevars.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mesa/main/multisample.c b/src/mesa/main/multisample.c index 8beb1d839e..d494a43ac7 100644 --- a/src/mesa/main/multisample.c +++ b/src/mesa/main/multisample.c @@ -94,8 +94,8 @@ _mesa_GetMultisamplefv(GLenum pname, GLuint index, GLfloat * val) ctx->Driver.GetSamplePosition(ctx, ctx->DrawBuffer, index, val); - /* winsys FBOs are upside down */ - if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) + /* FBOs can be upside down (winsys always are)*/ + if (ctx->DrawBuffer->FlipY) val[1] = 1.0f - val[1]; return; diff --git a/src/mesa/program/prog_statevars.c b/src/mesa/program/prog_statevars.c index 4d7f388cfb..3bbe451399 100644 --- a/src/mesa/program/prog_statevars.c +++ b/src/mesa/program/prog_statevars.c @@ -571,7 +571,7 @@ _mesa_fetch_state(struct gl_context *ctx, const gl_state_index16 state[], case STATE_FB_WPOS_Y_TRANSFORM: /* A driver may negate this conditional by using ZW swizzle * instead of XY (based on e.g. some other state). */ - if (_mesa_is_user_fbo(ctx->DrawBuffer)) { + if (!ctx->DrawBuffer->FlipY) { /* Identity (XY) followed by flipping Y upside down (ZW). */ value[0] = 1.0F; value[1] = 0.0F; -- 2.19.0.397.gdd90340f6a-goog ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/2] mesa: FramebufferParameteri parameter checking
Missing break; causes parameter checking to never pass GL_FRAMEBUFFER_FLIP_Y_MESA paramers. Fixes: 318c265160 ("mesa: GL_MESA_framebuffer_flip_y extension [v4]") --- src/mesa/main/fbobject.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index edb86438e3..3263fce845 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -1503,6 +1503,7 @@ framebuffer_parameteri(struct gl_context *ctx, struct gl_framebuffer *fb, if (!ctx->Extensions.MESA_framebuffer_flip_y) goto invalid_pname_enum; cannot_be_winsys_fbo = true; + break; default: goto invalid_pname_enum; } -- 2.19.0.397.gdd90340f6a-goog ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] move pthread_setaffinity_np check to the build system
Quoting Eric Engestrom (2018-09-14 09:39:27) > On Thursday, 2018-09-13 11:41:38 -0700, Dylan Baker wrote: > > Rather than trying to encode all of the rules in a header, lets just put > > them in the build system where they belong. This fixes the build on > > FreeBSD, which does have pthraed_setaffinity_np, but it's in a > > pthread_np.h, not behind _GNU_SOURCE. FreeBSD also implements cpu_set > > slightly differently, so additional changes would be required to get it > > working right there anyway. > > > > Fixes: 9f1bbbdbbd77d346c74c7abbb31f399151a85713 > >("util: try to fix the Android and MacOS build") > > Cc: Marek Ol\u0161ák > > Cc: Emil Velikov > > --- > > configure.ac| 16 > > meson.build | 7 +++ > > src/util/u_thread.h | 4 > > 3 files changed, 23 insertions(+), 4 deletions(-) > > > > diff --git a/configure.ac b/configure.ac > > index f8bb131cb63..d10236dbead 100644 > > --- a/configure.ac > > +++ b/configure.ac > > @@ -968,6 +968,22 @@ if test "x$pthread_stubs_possible" = xyes; then > > PKG_CHECK_MODULES(PTHREADSTUBS, pthread-stubs >= 0.4) > > fi > > > > +save_LIBS="$LIBS" > > +LIBS="$PTHREAD_LIBS" > > +AC_MSG_CHECKING(whether pthread_setaffinity_np is supported) > > +AC_LINK_IFELSE([AC_LANG_SOURCE([[ > > +#define _GNU_SOURCE > > +#include > > +int main() { > > + void *a = (void*) _setaffinity_np; > > + long b = (long) a; > > + return (int) b; > > +}]])], > > + [DEFINES="$DEFINES -DPTHREAD_SETAFFINITY"]; > > -DHAVE_PTHREAD_SETAFFINITY > > With that, and assuming the AC_LINK test code is correct: I just copied the code that meson generates, lol. I'm not really sure that link is required, it would probably work to just compile it. > Reviewed-by: Eric Engestrom > > > + AC_MSG_RESULT([yes]), > > + AC_MSG_RESULT([no])) > > +LIBS="$save_LIBS" > > + > > dnl Check for futex for fast inline simple_mtx_t. > > AC_CHECK_HEADER([linux/futex.h], [DEFINES="$DEFINES -DHAVE_LINUX_FUTEX_H"]) > > > > diff --git a/meson.build b/meson.build > > index 0d534b9b4a9..0588ebf8e7a 100644 > > --- a/meson.build > > +++ b/meson.build > > @@ -1070,6 +1070,13 @@ pre_args += '-DHAVE_ZLIB' > > dep_thread = dependency('threads') > > if dep_thread.found() and host_machine.system() != 'windows' > >pre_args += '-DHAVE_PTHREAD' > > + if cc.has_function( > > + 'pthread_setaffinity_np', > > + dependencies : dep_thread, > > + prefix : '#include ', > > + args : '-D_GNU_SOURCE') > > +pre_args += '-DHAVE_PTHREAD_SETAFFINITY' > > + endif > > endif > > if with_amd_vk or with_gallium_radeonsi or with_gallium_r600 or > > with_gallium_opencl > >dep_elf = dependency('libelf', required : false) > > diff --git a/src/util/u_thread.h b/src/util/u_thread.h > > index eee6f3c712d..7538d7d634b 100644 > > --- a/src/util/u_thread.h > > +++ b/src/util/u_thread.h > > @@ -36,10 +36,6 @@ > > #include > > #endif > > > > -#if defined(HAVE_PTHREAD) && !defined(ANDROID) && !defined(__APPLE__) > > -#define HAVE_PTHREAD_SETAFFINITY > > -#endif > > - > > static inline thrd_t u_thread_create(int (*routine)(void *), void *param) > > { > > thrd_t thread; > > -- > > 2.19.0 > > > > ___ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev signature.asc Description: signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 107923] build_id.c:126: multiple definition of `build_id_length'
https://bugs.freedesktop.org/show_bug.cgi?id=107923 --- Comment #2 from Vinson Lee --- (In reply to Sergii Romantsov from comment #1) > could you, please, specify your build-configuration? ./autogen.sh --disable-dri --disable-egl --disable-gbm --enable-debug --with-dri-drivers=swrast --with-gallium-drivers=swrast --with-platforms=x11 -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 107765] [regression] Batman Arkham City crashes with DXVK under wine
https://bugs.freedesktop.org/show_bug.cgi?id=107765 --- Comment #2 from farmboy0+freedesk...@googlemail.com --- I use vanilla wine 3.12/3.14 DXVK 0.7.0 and 0.7.1 in a 32 or 64 bit prefix. I have a R9 380X Kernel 4.18.5 with DC enabled. I tried Mesa git, no change. Latest console output before crash: info: DXGI: Setting display mode: 1920x1080@60 warn: DxgiSwapChain::QueryInterface: Unknown interface query warn: 94d99bdb-f1f8-4ab0-b236-7da0170edab1 warn: DXGI: MakeWindowAssociation: Ignoring flags 013b:fixme:wtsapi:WTSRegisterSessionNotification Stub 0x60226 0x warning: The VAD has been replaced by a hack pending a complete rewrite info: DxgiVkPresenter: Recreating swap chain: Format: VK_FORMAT_B8G8R8A8_UNORM Present mode: VK_PRESENT_MODE_FIFO_KHR Buffer size: 1920x1080 013b:fixme:imm:ImmReleaseContext (0x60226, 0xae463b8): stub err: D3D11DeviceContext::SetPredication: Stub mesa: for the -simplifycfg-sink-common option: may only occur zero or one times! ../mesa-/src/amd/vulkan/radv_device.c:3936: FINISHME: Illegal color amapps\common\Batman Arkham City GOTY\Binaries\Win32\BatmanAC.exe: ../mesa-/src/amd/vulkan/radv_pipeline.c:486: si_choose_spi_color_format: Assertion `!"unhandled blend format"' failed. 016c:fixme:dbghelp:elf_search_auxv can't find symbol in module 016c:fixme:dbghelp:validate_addr64 Unsupported address f7d4 -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] nvir: Always split 64-bit IMAD/IMUL operations
Hi Dylan, this patch only matters for debug builds (DEBUG set), but your merge result looks correct nonetheless. Thanks On Fri, Sep 14, 2018 at 6:16 PM, Dylan Baker wrote: > Quoting Pierre Moreau (2017-12-04 15:51:04) >> Those operations do not map to actual hardware instructions, therefore >> those should always be lowered to 32-bit instructions. >> >> Fixes: 009c54aa7af "nv50/ir: Split 64-bit integer MAD/MUL operations" >> Signed-off-by: Pierre Moreau >> --- >> src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 2 +- >> 1 file changed, 1 insertion(+), 1 deletion(-) >> >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp >> b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp >> index 61d4e6a2d0..14bdcea2ca 100644 >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp >> @@ -3794,7 +3794,7 @@ Program::optimizeSSA(int level) >> RUN_PASS(2, AlgebraicOpt, run); >> RUN_PASS(2, ModifierFolding, run); // before load propagation -> less >> checks >> RUN_PASS(1, ConstantFolding, foldAll); >> - RUN_PASS(1, Split64BitOpPreRA, run); >> + RUN_PASS(0, Split64BitOpPreRA, run); >> RUN_PASS(1, LoadPropagation, run); >> RUN_PASS(1, IndirectPropagation, run); >> RUN_PASS(2, MemoryOpt, run); >> -- >> 2.15.0 >> > > Hi Pierre, > > There was a small conflict when applying this to 18.1; I think my resolution > is > correct, but you can see the version of the patch here: > https://gitlab.freedesktop.org/mesa/mesa/commit/649aff1a8788684c3160ab6001016054de251f39 > please let me know if any changes are needed. > > Dylan > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] gallium/util: don't let children of fork & exec inherit our thread affinity
On 09/12/2018 05:40 PM, Marek Olšák wrote: > +static void > +util_set_full_cpu_affinity(void) > +{ > + cpu_set_t cpuset; > + > + CPU_ZERO(); > + for (unsigned i = 0; i < CPU_SETSIZE; i++) > + CPU_SET(i, ); > + > + pthread_setaffinity_np(pthread_self(), sizeof(cpuset), ); > +} > > static void > -util_init_cache_number(void) > +util_init_thread_pinning(void) > { > /* Get a semi-random number. */ > int64_t t = os_time_get_nano(); > L3_cache_number = (t ^ (t >> 8) ^ (t >> 16)); > + > + /* Reset thread affinity for all children of fork and exec to prevent > +* spawned processes and threads from inheriting the current thread's > +* affinity. > +* > +* What happens if a driver is unloaded and the app creates a thread? > +*/ > + pthread_atfork(NULL, NULL, util_set_full_cpu_affinity); > } > You should probably save and restore the application's affinity mask rather than assuming the mask is set to all CPUs. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] move pthread_setaffinity_np check to the build system
Quoting Dylan Baker (2018-09-13 11:41:38) > Rather than trying to encode all of the rules in a header, lets just put > them in the build system where they belong. This fixes the build on > FreeBSD, which does have pthraed_setaffinity_np, but it's in a > pthread_np.h, not behind _GNU_SOURCE. FreeBSD also implements cpu_set > slightly differently, so additional changes would be required to get it > working right there anyway. > > Fixes: 9f1bbbdbbd77d346c74c7abbb31f399151a85713 >("util: try to fix the Android and MacOS build") > Cc: Marek Ol\u0161ák > Cc: Emil Velikov > --- > configure.ac| 16 > meson.build | 7 +++ > src/util/u_thread.h | 4 > 3 files changed, 23 insertions(+), 4 deletions(-) > > diff --git a/configure.ac b/configure.ac > index f8bb131cb63..d10236dbead 100644 > --- a/configure.ac > +++ b/configure.ac > @@ -968,6 +968,22 @@ if test "x$pthread_stubs_possible" = xyes; then > PKG_CHECK_MODULES(PTHREADSTUBS, pthread-stubs >= 0.4) > fi > > +save_LIBS="$LIBS" > +LIBS="$PTHREAD_LIBS" > +AC_MSG_CHECKING(whether pthread_setaffinity_np is supported) > +AC_LINK_IFELSE([AC_LANG_SOURCE([[ > +#define _GNU_SOURCE > +#include > +int main() { > + void *a = (void*) _setaffinity_np; > + long b = (long) a; > + return (int) b; > +}]])], > + [DEFINES="$DEFINES -DPTHREAD_SETAFFINITY"]; This should be -DHAVE_PTHREAD_SETAFFINITY, I've fixed this locally > + AC_MSG_RESULT([yes]), > + AC_MSG_RESULT([no])) > +LIBS="$save_LIBS" > + > dnl Check for futex for fast inline simple_mtx_t. > AC_CHECK_HEADER([linux/futex.h], [DEFINES="$DEFINES -DHAVE_LINUX_FUTEX_H"]) > > diff --git a/meson.build b/meson.build > index 0d534b9b4a9..0588ebf8e7a 100644 > --- a/meson.build > +++ b/meson.build > @@ -1070,6 +1070,13 @@ pre_args += '-DHAVE_ZLIB' > dep_thread = dependency('threads') > if dep_thread.found() and host_machine.system() != 'windows' >pre_args += '-DHAVE_PTHREAD' > + if cc.has_function( > + 'pthread_setaffinity_np', > + dependencies : dep_thread, > + prefix : '#include ', > + args : '-D_GNU_SOURCE') > +pre_args += '-DHAVE_PTHREAD_SETAFFINITY' > + endif > endif > if with_amd_vk or with_gallium_radeonsi or with_gallium_r600 or > with_gallium_opencl >dep_elf = dependency('libelf', required : false) > diff --git a/src/util/u_thread.h b/src/util/u_thread.h > index eee6f3c712d..7538d7d634b 100644 > --- a/src/util/u_thread.h > +++ b/src/util/u_thread.h > @@ -36,10 +36,6 @@ > #include > #endif > > -#if defined(HAVE_PTHREAD) && !defined(ANDROID) && !defined(__APPLE__) > -#define HAVE_PTHREAD_SETAFFINITY > -#endif > - > static inline thrd_t u_thread_create(int (*routine)(void *), void *param) > { > thrd_t thread; > -- > 2.19.0 > signature.asc Description: signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] r600/sb: use safe math optimizatiosn when TGSI contains precise operations
I suppose ideally it would only affect instruction chains which have a precise modifier somewhere. But it's better than just ignoring it completely. Reviewed-by: Roland Scheidegger Am 14.09.2018 um 16:56 schrieb Gert Wollny: > Fixes: > dEQP-GLES3.functional.shaders.invariance.highp.common_subexpression_3 > dEQP-GLES3.functional.shaders.invariance.mediump.common_subexpression_3 > dEQP-GLES3.functional.shaders.invariance.lowp.common_subexpression_3 > > Signed-off-by: Gert Wollny > --- > src/gallium/drivers/r600/r600_asm.h | 1 + > src/gallium/drivers/r600/r600_shader.c | 3 +++ > src/gallium/drivers/r600/sb/sb_bc_parser.cpp | 2 +- > 3 files changed, 5 insertions(+), 1 deletion(-) > > diff --git a/src/gallium/drivers/r600/r600_asm.h > b/src/gallium/drivers/r600/r600_asm.h > index 5841044bf8..ca9280a7a8 100644 > --- a/src/gallium/drivers/r600/r600_asm.h > +++ b/src/gallium/drivers/r600/r600_asm.h > @@ -277,6 +277,7 @@ struct r600_bytecode { > struct r600_bytecode_output pending_outputs[5]; > int n_pending_outputs; > boolean need_wait_ack; /* emit a pending WAIT_ACK prior > to control flow */ > + boolean precise; > }; > > /* eg_asm.c */ > diff --git a/src/gallium/drivers/r600/r600_shader.c > b/src/gallium/drivers/r600/r600_shader.c > index 2229dc8fab..408939d110 100644 > --- a/src/gallium/drivers/r600/r600_shader.c > +++ b/src/gallium/drivers/r600/r600_shader.c > @@ -3879,6 +3879,9 @@ static int r600_shader_from_tgsi(struct r600_context > *rctx, > ctx.inst_info = > _shader_tgsi_instruction[opcode]; > else > ctx.inst_info = > _shader_tgsi_instruction[opcode]; > + > + ctx.bc->precise |= > ctx.parse.FullToken.FullInstruction.Instruction.Precise; > + > r = ctx.inst_info->process(); > if (r) > goto out_err; > diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp > b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp > index a7b828268b..eafc1cb8ec 100644 > --- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp > +++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp > @@ -75,7 +75,7 @@ int bc_parser::decode() { > } > > sh = new shader(ctx, t, bc->debug_id); > - sh->safe_math = sb_context::safe_math || (t == TARGET_COMPUTE); > + sh->safe_math = sb_context::safe_math || (t == TARGET_COMPUTE || > bc->precise); > > int r = decode_shader(); > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] move pthread_setaffinity_np check to the build system
On Thursday, 2018-09-13 11:41:38 -0700, Dylan Baker wrote: > Rather than trying to encode all of the rules in a header, lets just put > them in the build system where they belong. This fixes the build on > FreeBSD, which does have pthraed_setaffinity_np, but it's in a > pthread_np.h, not behind _GNU_SOURCE. FreeBSD also implements cpu_set > slightly differently, so additional changes would be required to get it > working right there anyway. > > Fixes: 9f1bbbdbbd77d346c74c7abbb31f399151a85713 >("util: try to fix the Android and MacOS build") > Cc: Marek Olšák > Cc: Emil Velikov > --- > configure.ac| 16 > meson.build | 7 +++ > src/util/u_thread.h | 4 > 3 files changed, 23 insertions(+), 4 deletions(-) > > diff --git a/configure.ac b/configure.ac > index f8bb131cb63..d10236dbead 100644 > --- a/configure.ac > +++ b/configure.ac > @@ -968,6 +968,22 @@ if test "x$pthread_stubs_possible" = xyes; then > PKG_CHECK_MODULES(PTHREADSTUBS, pthread-stubs >= 0.4) > fi > > +save_LIBS="$LIBS" > +LIBS="$PTHREAD_LIBS" > +AC_MSG_CHECKING(whether pthread_setaffinity_np is supported) > +AC_LINK_IFELSE([AC_LANG_SOURCE([[ > +#define _GNU_SOURCE > +#include > +int main() { > + void *a = (void*) _setaffinity_np; > + long b = (long) a; > + return (int) b; > +}]])], > + [DEFINES="$DEFINES -DPTHREAD_SETAFFINITY"]; -DHAVE_PTHREAD_SETAFFINITY With that, and assuming the AC_LINK test code is correct: Reviewed-by: Eric Engestrom > + AC_MSG_RESULT([yes]), > + AC_MSG_RESULT([no])) > +LIBS="$save_LIBS" > + > dnl Check for futex for fast inline simple_mtx_t. > AC_CHECK_HEADER([linux/futex.h], [DEFINES="$DEFINES -DHAVE_LINUX_FUTEX_H"]) > > diff --git a/meson.build b/meson.build > index 0d534b9b4a9..0588ebf8e7a 100644 > --- a/meson.build > +++ b/meson.build > @@ -1070,6 +1070,13 @@ pre_args += '-DHAVE_ZLIB' > dep_thread = dependency('threads') > if dep_thread.found() and host_machine.system() != 'windows' >pre_args += '-DHAVE_PTHREAD' > + if cc.has_function( > + 'pthread_setaffinity_np', > + dependencies : dep_thread, > + prefix : '#include ', > + args : '-D_GNU_SOURCE') > +pre_args += '-DHAVE_PTHREAD_SETAFFINITY' > + endif > endif > if with_amd_vk or with_gallium_radeonsi or with_gallium_r600 or > with_gallium_opencl >dep_elf = dependency('libelf', required : false) > diff --git a/src/util/u_thread.h b/src/util/u_thread.h > index eee6f3c712d..7538d7d634b 100644 > --- a/src/util/u_thread.h > +++ b/src/util/u_thread.h > @@ -36,10 +36,6 @@ > #include > #endif > > -#if defined(HAVE_PTHREAD) && !defined(ANDROID) && !defined(__APPLE__) > -#define HAVE_PTHREAD_SETAFFINITY > -#endif > - > static inline thrd_t u_thread_create(int (*routine)(void *), void *param) > { > thrd_t thread; > -- > 2.19.0 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] nvir: Always split 64-bit IMAD/IMUL operations
Quoting Pierre Moreau (2017-12-04 15:51:04) > Those operations do not map to actual hardware instructions, therefore > those should always be lowered to 32-bit instructions. > > Fixes: 009c54aa7af "nv50/ir: Split 64-bit integer MAD/MUL operations" > Signed-off-by: Pierre Moreau > --- > src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > index 61d4e6a2d0..14bdcea2ca 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > @@ -3794,7 +3794,7 @@ Program::optimizeSSA(int level) > RUN_PASS(2, AlgebraicOpt, run); > RUN_PASS(2, ModifierFolding, run); // before load propagation -> less > checks > RUN_PASS(1, ConstantFolding, foldAll); > - RUN_PASS(1, Split64BitOpPreRA, run); > + RUN_PASS(0, Split64BitOpPreRA, run); > RUN_PASS(1, LoadPropagation, run); > RUN_PASS(1, IndirectPropagation, run); > RUN_PASS(2, MemoryOpt, run); > -- > 2.15.0 > Hi Pierre, There was a small conflict when applying this to 18.1; I think my resolution is correct, but you can see the version of the patch here: https://gitlab.freedesktop.org/mesa/mesa/commit/649aff1a8788684c3160ab6001016054de251f39 please let me know if any changes are needed. Dylan signature.asc Description: signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 107939] Commit 888b7fc causes performance regression
https://bugs.freedesktop.org/show_bug.cgi?id=107939 Michel Dänzer changed: What|Removed |Added Component|Drivers/Gallium/radeonsi|Drivers/Vulkan/radeon Assignee|dri-devel@lists.freedesktop |mesa-dev@lists.freedesktop. |.org|org QA Contact|dri-devel@lists.freedesktop |mesa-dev@lists.freedesktop. |.org|org -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965/fs: Don't propagate conditional modifiers from integer compares to adds
On 09/14/2018 02:52 AM, Alejandro Piñeiro wrote: > No shader-db changes, so perhaps adding a test on > test_fs_cmod_propagation? In any case, the patch looks good to me: I should have mentioned in the commit message, but I added a piglit test: https://patchwork.freedesktop.org/patch/249182/ > Reviewed-by: Alejandro Piñeiro > > > On 14/09/18 00:06, Ian Romanick wrote: >> From: Ian Romanick >> >> No shader-db changes on any Intel platform... which probably explains >> why no bugs have been bisected to this problem since it landed in Mesa >> 18.1. :( The commit mentioned below is in 18.2, so 18.1 would need a >> slightly different fix (due to code refactoring). >> >> Signed-off-by: Ian Romanick >> Fixes: 77f269bb560 "i965/fs: Refactor propagation of conditional modifiers >> from compares to adds" >> Cc: Matt Turner (reviewed the original patch) >> Cc: Alejandro Piñeiro (reviewed the original patch) >> --- >> src/intel/compiler/brw_fs_cmod_propagation.cpp | 10 +- >> 1 file changed, 9 insertions(+), 1 deletion(-) >> >> diff --git a/src/intel/compiler/brw_fs_cmod_propagation.cpp >> b/src/intel/compiler/brw_fs_cmod_propagation.cpp >> index 5b74f267359..5fb522f810f 100644 >> --- a/src/intel/compiler/brw_fs_cmod_propagation.cpp >> +++ b/src/intel/compiler/brw_fs_cmod_propagation.cpp >> @@ -211,9 +211,17 @@ opt_cmod_propagation_local(const gen_device_info >> *devinfo, bblock_t *block) >>/* A CMP with a second source of zero can match with anything. A CMP >> * with a second source that is not zero can only match with an ADD >> * instruction. >> + * >> + * Only apply this optimization to float-point sources. It can fail >> for >> + * integers. For inputs a = 0x8000, b = 4, int(0x8000) < 4, >> but >> + * int(0x8000) - 4 overflows and results in 0x7ffc. that's >> not >> + * less than zero, so the flags get set differently than for (a < b). >> */ >>if (inst->opcode == BRW_OPCODE_CMP && !inst->src[1].is_zero()) { >> - progress = cmod_propagate_cmp_to_add(devinfo, block, inst) || >> progress; >> + if (brw_reg_type_is_floating_point(inst->src[0].type) && >> + cmod_propagate_cmp_to_add(devinfo, block, inst)) >> +progress = true; >> + >> continue; >>} >> ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] r600/sb: use safe math optimizatiosn when TGSI contains precise operations
The typo in subject is fixed locally. Am Freitag, den 14.09.2018, 16:56 +0200 schrieb Gert Wollny: > Fixes: > dEQP- > GLES3.functional.shaders.invariance.highp.common_subexpression_3 > dEQP- > GLES3.functional.shaders.invariance.mediump.common_subexpression_3 > dEQP- > GLES3.functional.shaders.invariance.lowp.common_subexpression_3 > > Signed-off-by: Gert Wollny > --- > src/gallium/drivers/r600/r600_asm.h | 1 + > src/gallium/drivers/r600/r600_shader.c | 3 +++ > src/gallium/drivers/r600/sb/sb_bc_parser.cpp | 2 +- > 3 files changed, 5 insertions(+), 1 deletion(-) > > diff --git a/src/gallium/drivers/r600/r600_asm.h > b/src/gallium/drivers/r600/r600_asm.h > index 5841044bf8..ca9280a7a8 100644 > --- a/src/gallium/drivers/r600/r600_asm.h > +++ b/src/gallium/drivers/r600/r600_asm.h > @@ -277,6 +277,7 @@ struct r600_bytecode { > struct r600_bytecode_output pending_outputs[5]; > int n_pending_outputs; > boolean need_wait_ack; /* emit a > pending WAIT_ACK prior to control flow */ > + boolean precise; > }; > > /* eg_asm.c */ > diff --git a/src/gallium/drivers/r600/r600_shader.c > b/src/gallium/drivers/r600/r600_shader.c > index 2229dc8fab..408939d110 100644 > --- a/src/gallium/drivers/r600/r600_shader.c > +++ b/src/gallium/drivers/r600/r600_shader.c > @@ -3879,6 +3879,9 @@ static int r600_shader_from_tgsi(struct > r600_context *rctx, > ctx.inst_info = > _shader_tgsi_instruction[opcode]; > else > ctx.inst_info = > _shader_tgsi_instruction[opcode]; > + > + ctx.bc->precise |= > ctx.parse.FullToken.FullInstruction.Instruction.Precise; > + > r = ctx.inst_info->process(); > if (r) > goto out_err; > diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp > b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp > index a7b828268b..eafc1cb8ec 100644 > --- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp > +++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp > @@ -75,7 +75,7 @@ int bc_parser::decode() { > } > > sh = new shader(ctx, t, bc->debug_id); > - sh->safe_math = sb_context::safe_math || (t == > TARGET_COMPUTE); > + sh->safe_math = sb_context::safe_math || (t == > TARGET_COMPUTE || bc->precise); > > int r = decode_shader(); > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2 (resend)] virgl: Pass resource size and transfer offsets
Am Freitag, den 14.09.2018, 15:26 +0300 schrieb andrey simiklit: [...] > > + if (vcmd == VCMD_TRANSFER_PUT2) > > + vtest_hdr[VTEST_CMD_LEN] += data_size + 3 / 4; > > Looks like a copy/paste mistake) > I suppose that it is should be like: > ... = (data_size + 3) / 4; > or may be just: > ... = data_size; Good catch I'll have to check what is actually correct. Tomeu maybe you could clarify? Best, Gert > > > + > > + cmd[VCMD_TRANSFER2_RES_HANDLE] = handle; > > + cmd[VCMD_TRANSFER2_LEVEL] = level; > > + cmd[VCMD_TRANSFER2_X] = box->x; > > + cmd[VCMD_TRANSFER2_Y] = box->y; > > + cmd[VCMD_TRANSFER2_Z] = box->z; > > + cmd[VCMD_TRANSFER2_WIDTH] = box->width; > > + cmd[VCMD_TRANSFER2_HEIGHT] = box->height; > > + cmd[VCMD_TRANSFER2_DEPTH] = box->depth; > > + cmd[VCMD_TRANSFER2_DATA_SIZE] = data_size; > > + cmd[VCMD_TRANSFER2_OFFSET] = offset; > > + virgl_block_write(vws->sock_fd, _hdr, sizeof(vtest_hdr)); > > + virgl_block_write(vws->sock_fd, , sizeof(cmd)); > > + > > + return 0; > > +} > > + > > +int virgl_vtest_send_transfer_get(struct virgl_vtest_winsys *vws, > > + uint32_t handle, > > + uint32_t level, uint32_t stride, > > + uint32_t layer_stride, > > + const struct pipe_box *box, > > + uint32_t data_size, > > + uint32_t offset) > > +{ > > + if (vws->protocol_version < 1) > > + return virgl_vtest_send_transfer_cmd(vws, VCMD_TRANSFER_GET, > > handle, > > + level, stride, > > layer_stride, box, > > + data_size); > > + > > + return virgl_vtest_send_transfer_cmd2(vws, VCMD_TRANSFER_GET2, > > handle, > > +level, box, data_size, > > offset); > > +} > > + > > +int virgl_vtest_send_transfer_put(struct virgl_vtest_winsys *vws, > > + uint32_t handle, > > + uint32_t level, uint32_t stride, > > + uint32_t layer_stride, > > + const struct pipe_box *box, > > + uint32_t data_size, > > + uint32_t offset) > > +{ > > + if (vws->protocol_version < 1) > > + return virgl_vtest_send_transfer_cmd(vws, VCMD_TRANSFER_PUT, > > handle, > > + level, stride, > > layer_stride, box, > > + data_size); > > + > > + return virgl_vtest_send_transfer_cmd2(vws, VCMD_TRANSFER_PUT2, > > handle, > > +level, box, data_size, > > offset); > > +} > > + > > int virgl_vtest_send_transfer_put_data(struct virgl_vtest_winsys > > *vws, > > void *data, > > uint32_t data_size) > > @@ -327,20 +437,27 @@ int virgl_vtest_recv_transfer_get_data(struct > > virgl_vtest_winsys *vws, > > uint32_t data_size, > > uint32_t stride, > > const struct pipe_box *box, > > - uint32_t format) > > + uint32_t format, uint32_t > > res_stride) > > { > > - void *line; > > - void *ptr = data; > > - int hblocks = util_format_get_nblocksy(format, box->height); > > - > > - line = malloc(stride); > > - while (hblocks) { > > - virgl_block_read(vws->sock_fd, line, stride); > > - memcpy(ptr, line, util_format_get_stride(format, box- > > >width)); > > + char *ptr = data; > > + uint32_t bytes_to_read = data_size; > > + char dump[1024]; > > + > > + /* Copy the date from the IOV to the target resource respecting > > +* the different strides */ > > + for (int y = 0 ; y < box->height && bytes_to_read > 0; ++y) { > > + uint32_t btr = MIN2(res_stride, bytes_to_read); > > + virgl_block_read(vws->sock_fd, ptr, btr); > >ptr += stride; > > - hblocks--; > > + bytes_to_read -= btr; > > + } > > + > > + /* It seems that there may be extra bytes that need to be read > > */ > > + while (bytes_to_read > 0 && bytes_to_read < data_size) { > > + uint32_t btr = MIN2(sizeof(dump), bytes_to_read); > > + virgl_block_read(vws->sock_fd, dump, btr); > > + bytes_to_read -= btr; > > } > > - free(line); > > return 0; > > } > > > > diff --git a/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c > > b/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c > > index 6c03a6b359..52a5245b6a 100644 > > --- a/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c > > +++ b/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c > > @@ -79,9 +79,13 @@ virgl_vtest_transfer_put(struct
[Mesa-dev] [PATCH] r600/sb: use safe math optimizatiosn when TGSI contains precise operations
Fixes: dEQP-GLES3.functional.shaders.invariance.highp.common_subexpression_3 dEQP-GLES3.functional.shaders.invariance.mediump.common_subexpression_3 dEQP-GLES3.functional.shaders.invariance.lowp.common_subexpression_3 Signed-off-by: Gert Wollny --- src/gallium/drivers/r600/r600_asm.h | 1 + src/gallium/drivers/r600/r600_shader.c | 3 +++ src/gallium/drivers/r600/sb/sb_bc_parser.cpp | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 5841044bf8..ca9280a7a8 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -277,6 +277,7 @@ struct r600_bytecode { struct r600_bytecode_output pending_outputs[5]; int n_pending_outputs; boolean need_wait_ack; /* emit a pending WAIT_ACK prior to control flow */ + boolean precise; }; /* eg_asm.c */ diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 2229dc8fab..408939d110 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -3879,6 +3879,9 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, ctx.inst_info = _shader_tgsi_instruction[opcode]; else ctx.inst_info = _shader_tgsi_instruction[opcode]; + + ctx.bc->precise |= ctx.parse.FullToken.FullInstruction.Instruction.Precise; + r = ctx.inst_info->process(); if (r) goto out_err; diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp index a7b828268b..eafc1cb8ec 100644 --- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp +++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp @@ -75,7 +75,7 @@ int bc_parser::decode() { } sh = new shader(ctx, t, bc->debug_id); - sh->safe_math = sb_context::safe_math || (t == TARGET_COMPUTE); + sh->safe_math = sb_context::safe_math || (t == TARGET_COMPUTE || bc->precise); int r = decode_shader(); -- 2.16.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 107547] shader crashing glsl_compiler (uniform block assigned to vec2, then component substraced by 1)
https://bugs.freedesktop.org/show_bug.cgi?id=107547 --- Comment #4 from Sergii Romantsov --- One more version: https://patchwork.freedesktop.org/series/48256/ -- You are receiving this mail because: You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v1] glsl: missed error_emitted for do_assignment
Seems that patch is simplified version of already exist one: https://patchwork.freedesktop.org/series/48256/ On Fri, Sep 14, 2018 at 4:39 PM, Sergii Romantsov < sergii.romant...@gmail.com> wrote: > During do_assignment a validation of rhs may fail. > Because of lack error_emitted an error_value may not be generated. > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107547 > Signed-off-by: Sergii Romantsov > --- > src/compiler/glsl/ast_to_hir.cpp | 2 ++ > 1 file changed, 2 insertions(+) > > diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_ > hir.cpp > index 5d3f10b..da1654b 100644 > --- a/src/compiler/glsl/ast_to_hir.cpp > +++ b/src/compiler/glsl/ast_to_hir.cpp > @@ -1013,6 +1013,8 @@ do_assignment(exec_list *instructions, struct > _mesa_glsl_parse_state *state, > mark_whole_array_access(lhs); >} > } > + else > + error_emitted = true; > > /* Most callers of do_assignment (assign, add_assign, pre_inc/dec, > * but not post_inc) need the converted assigned value as an rvalue > -- > 2.7.4 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > -- Sergii Romantsov GlobalLogic Inc. www.globallogic.com ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 97516] GLX_OML_swap_method not fully supported
https://bugs.freedesktop.org/show_bug.cgi?id=97516 --- Comment #3 from Sven Arvidsson --- As a temporary workaround to get the game Brink running I disabled the GLX_OML_swap_method. Wine has a check, and will ignore the attribute if the extension is missing. Brink starts and runs, though I'm not sure what the possible side effects are? -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 107547] shader crashing glsl_compiler (uniform block assigned to vec2, then component substraced by 1)
https://bugs.freedesktop.org/show_bug.cgi?id=107547 --- Comment #3 from Sergii Romantsov --- Proposed patch: https://patchwork.freedesktop.org/patch/249568/ -- You are receiving this mail because: You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v1] glsl: missed error_emitted for do_assignment
During do_assignment a validation of rhs may fail. Because of lack error_emitted an error_value may not be generated. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107547 Signed-off-by: Sergii Romantsov --- src/compiler/glsl/ast_to_hir.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp index 5d3f10b..da1654b 100644 --- a/src/compiler/glsl/ast_to_hir.cpp +++ b/src/compiler/glsl/ast_to_hir.cpp @@ -1013,6 +1013,8 @@ do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state, mark_whole_array_access(lhs); } } + else + error_emitted = true; /* Most callers of do_assignment (assign, add_assign, pre_inc/dec, * but not post_inc) need the converted assigned value as an rvalue -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 107391] feature request: enforceable vsync and anisotropic filtering via environment variables
https://bugs.freedesktop.org/show_bug.cgi?id=107391 --- Comment #2 from tempel.jul...@gmail.com --- DXVK now also supports enforcing vsync on/off. :) However, it would still be nice to also have these options for outside of DXVK since native Linux apps might not offer full control to the user either (admittedly less likely). -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 107391] feature request: enforceable vsync and anisotropic filtering via environment variables
https://bugs.freedesktop.org/show_bug.cgi?id=107391 --- Comment #1 from Samuel Pitoiset --- DXVK now allows to force AF, so I guess the only feature you want is vsync? -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 106769] radv: add support for shaderStorageImageMultisample
https://bugs.freedesktop.org/show_bug.cgi?id=106769 Samuel Pitoiset changed: What|Removed |Added Summary|radv: Support |radv: add support for |VK_EXT_shader_viewport_inde |shaderStorageImageMultisamp |x_layer and |le |shaderStorageImageMultisamp | |le | -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] radv: fix descriptor pool allocation size
Reviewed-by: Bas Nieuwenhuizen On Fri, Sep 14, 2018 at 2:55 PM Samuel Pitoiset wrote: > > The size has to be multiplied by the number of sets. > > This gets rid of the OUT_OF_POOL_KHR error and fixes > the Tangrams demo. > > CC: 18.1 18.2 > Signed-off-by: Samuel Pitoiset > --- > src/amd/vulkan/radv_descriptor_set.c | 3 ++- > 1 file changed, 2 insertions(+), 1 deletion(-) > > diff --git a/src/amd/vulkan/radv_descriptor_set.c > b/src/amd/vulkan/radv_descriptor_set.c > index c4341f6ac5..49d0811bb0 100644 > --- a/src/amd/vulkan/radv_descriptor_set.c > +++ b/src/amd/vulkan/radv_descriptor_set.c > @@ -569,9 +569,10 @@ VkResult radv_CreateDescriptorPool( > } > > if (!(pCreateInfo->flags & > VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT)) { > - uint64_t host_size = pCreateInfo->maxSets * sizeof(struct > radv_descriptor_set); > + uint64_t host_size = sizeof(struct radv_descriptor_set); > host_size += sizeof(struct radeon_winsys_bo*) * bo_count; > host_size += sizeof(struct radv_descriptor_range) * > range_count; > + host_size *= pCreateInfo->maxSets; > size += host_size; > } else { > size += sizeof(struct radv_descriptor_pool_entry) * > pCreateInfo->maxSets; > -- > 2.19.0 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 106922] Tangrams demo: LLVM ERROR: Cannot select: 0x7e8d8750: i16 = bitcast 0x7e8d8af8
https://bugs.freedesktop.org/show_bug.cgi?id=106922 --- Comment #6 from Samuel Pitoiset --- The demo should now work if you apply: https://patchwork.freedesktop.org/series/49701/ https://patchwork.freedesktop.org/series/49710/ -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] radv: fix descriptor pool allocation size
The size has to be multiplied by the number of sets. This gets rid of the OUT_OF_POOL_KHR error and fixes the Tangrams demo. CC: 18.1 18.2 Signed-off-by: Samuel Pitoiset --- src/amd/vulkan/radv_descriptor_set.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/amd/vulkan/radv_descriptor_set.c b/src/amd/vulkan/radv_descriptor_set.c index c4341f6ac5..49d0811bb0 100644 --- a/src/amd/vulkan/radv_descriptor_set.c +++ b/src/amd/vulkan/radv_descriptor_set.c @@ -569,9 +569,10 @@ VkResult radv_CreateDescriptorPool( } if (!(pCreateInfo->flags & VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT)) { - uint64_t host_size = pCreateInfo->maxSets * sizeof(struct radv_descriptor_set); + uint64_t host_size = sizeof(struct radv_descriptor_set); host_size += sizeof(struct radeon_winsys_bo*) * bo_count; host_size += sizeof(struct radv_descriptor_range) * range_count; + host_size *= pCreateInfo->maxSets; size += host_size; } else { size += sizeof(struct radv_descriptor_pool_entry) * pCreateInfo->maxSets; -- 2.19.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH mesa v2] vulkan/wsi/display: check if wsi_swapchain_init() succeeded
Rb On September 14, 2018 04:12:44 Eric Engestrom wrote: Fixes: da997ebec929421939553 "vulkan: Add KHR_display extension using DRM [v10]" Cc: Keith Packard Cc: Jason Ekstrand Signed-off-by: Eric Engestrom --- v2: don't forget to free the swapchain too (Jason) --- src/vulkan/wsi/wsi_common_display.c | 4 1 file changed, 4 insertions(+) diff --git a/src/vulkan/wsi/wsi_common_display.c b/src/vulkan/wsi/wsi_common_display.c index 1e90bba460cba190c5ab..338fad6be792385db9f4 100644 --- a/src/vulkan/wsi/wsi_common_display.c +++ b/src/vulkan/wsi/wsi_common_display.c @@ -1712,6 +1712,10 @@ wsi_display_surface_create_swapchain( VkResult result = wsi_swapchain_init(wsi_device, >base, device, create_info, allocator); + if (result != VK_SUCCESS) { + vk_free(allocator, chain); + return result; + } chain->base.destroy = wsi_display_swapchain_destroy; chain->base.get_wsi_image = wsi_display_get_wsi_image; -- Cheers, Eric ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 107351] Android 8.1: radv segfault with 3Dmark vulkan benchmarks
https://bugs.freedesktop.org/show_bug.cgi?id=107351 Samuel Pitoiset changed: What|Removed |Added Resolution|--- |FIXED Status|NEW |RESOLVED --- Comment #16 from Samuel Pitoiset --- Thanks for letting us know. Closing! -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 107923] build_id.c:126: multiple definition of `build_id_length'
https://bugs.freedesktop.org/show_bug.cgi?id=107923 --- Comment #1 from Sergii Romantsov --- Hello, Vinson, could you, please, specify your build-configuration? I don't see such output. -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2 (resend)] virgl: Pass resource size and transfer offsets
Hello, Please find my comment below: Regards, Andrii. On Fri, Sep 14, 2018 at 12:23 PM Gert Wollny wrote: > From: Tomeu Vizoso > > Pass the size of a resource when creating it so a backing can be kept in > the other side. > > Also pass the required offset to transfer commands. > > This moves vtest closer to how virtio-gpu works, making it more useful > for testing. > > v2: - Use new messages for creation and transfers, as changing the > behavior of the existing messages would be messy given that we don't > want to break compatibility with older servers. > > v3: - Gert: Use correct strides: The resource corresponding to the output > display might have a differnt line stride then the IOVs, so when > reading back to this resource take the resource stride and the the > IOV stride into account. > > Signed-off-by: Tomeu Vizoso (v2) > Signed-off-by: Gert Wollny > --- > .../winsys/virgl/vtest/virgl_vtest_socket.c| 143 > +++-- > .../winsys/virgl/vtest/virgl_vtest_winsys.c| 38 -- > .../winsys/virgl/vtest/virgl_vtest_winsys.h| 19 ++- > src/gallium/winsys/virgl/vtest/vtest_protocol.h| 29 + > 4 files changed, 201 insertions(+), 28 deletions(-) > > diff --git a/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c > b/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c > index 4d20a63ad6..3aa01aabdf 100644 > --- a/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c > +++ b/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c > @@ -221,6 +221,42 @@ int virgl_vtest_send_get_caps(struct > virgl_vtest_winsys *vws, > return 0; > } > > +static int virgl_vtest_send_resource_create2(struct virgl_vtest_winsys > *vws, > + uint32_t handle, > + enum pipe_texture_target > target, > + uint32_t format, > + uint32_t bind, > + uint32_t width, > + uint32_t height, > + uint32_t depth, > + uint32_t array_size, > + uint32_t last_level, > + uint32_t nr_samples, > + uint32_t size) > +{ > + uint32_t res_create_buf[VCMD_RES_CREATE2_SIZE], > vtest_hdr[VTEST_HDR_SIZE]; > + > + vtest_hdr[VTEST_CMD_LEN] = VCMD_RES_CREATE2_SIZE; > + vtest_hdr[VTEST_CMD_ID] = VCMD_RESOURCE_CREATE2; > + > + res_create_buf[VCMD_RES_CREATE2_RES_HANDLE] = handle; > + res_create_buf[VCMD_RES_CREATE2_TARGET] = target; > + res_create_buf[VCMD_RES_CREATE2_FORMAT] = format; > + res_create_buf[VCMD_RES_CREATE2_BIND] = bind; > + res_create_buf[VCMD_RES_CREATE2_WIDTH] = width; > + res_create_buf[VCMD_RES_CREATE2_HEIGHT] = height; > + res_create_buf[VCMD_RES_CREATE2_DEPTH] = depth; > + res_create_buf[VCMD_RES_CREATE2_ARRAY_SIZE] = array_size; > + res_create_buf[VCMD_RES_CREATE2_LAST_LEVEL] = last_level; > + res_create_buf[VCMD_RES_CREATE2_NR_SAMPLES] = nr_samples; > + res_create_buf[VCMD_RES_CREATE2_DATA_SIZE] = size; > + > + virgl_block_write(vws->sock_fd, _hdr, sizeof(vtest_hdr)); > + virgl_block_write(vws->sock_fd, _create_buf, > sizeof(res_create_buf)); > + > + return 0; > +} > + > int virgl_vtest_send_resource_create(struct virgl_vtest_winsys *vws, > uint32_t handle, > enum pipe_texture_target target, > @@ -231,10 +267,17 @@ int virgl_vtest_send_resource_create(struct > virgl_vtest_winsys *vws, > uint32_t depth, > uint32_t array_size, > uint32_t last_level, > - uint32_t nr_samples) > + uint32_t nr_samples, > + uint32_t size) > { > uint32_t res_create_buf[VCMD_RES_CREATE_SIZE], > vtest_hdr[VTEST_HDR_SIZE]; > > + if (vws->protocol_version >= 1) > + return virgl_vtest_send_resource_create2(vws, handle, target, > format, > + bind, width, height, depth, > + array_size, last_level, > + nr_samples, size); > + > vtest_hdr[VTEST_CMD_LEN] = VCMD_RES_CREATE_SIZE; > vtest_hdr[VTEST_CMD_ID] = VCMD_RESOURCE_CREATE; > > @@ -282,7 +325,7 @@ int virgl_vtest_send_resource_unref(struct > virgl_vtest_winsys *vws, > return 0; > } > > -int virgl_vtest_send_transfer_cmd(struct virgl_vtest_winsys *vws, > +static int virgl_vtest_send_transfer_cmd(struct virgl_vtest_winsys *vws, >uint32_t vcmd, >
Re: [Mesa-dev] [PATCH 9/9] radv: enable shaderInt16 capability
Reviewed-by: Bas Nieuwenhuizen for the series. On Fri, Sep 14, 2018 at 12:51 PM Samuel Pitoiset wrote: > > Not sure if this is all wired up. CTS does pass and the Tangrams > demo seems to work (though with a local hack that is unrelated > to 16-bit integer). > > Signed-off-by: Samuel Pitoiset > --- > src/amd/vulkan/radv_device.c | 2 +- > src/amd/vulkan/radv_shader.c | 1 + > 2 files changed, 2 insertions(+), 1 deletion(-) > > diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c > index 8989ec3553..af7754bea3 100644 > --- a/src/amd/vulkan/radv_device.c > +++ b/src/amd/vulkan/radv_device.c > @@ -734,7 +734,7 @@ void radv_GetPhysicalDeviceFeatures( > .shaderCullDistance = true, > .shaderFloat64= true, > .shaderInt64 = true, > - .shaderInt16 = false, > + .shaderInt16 = true, > .sparseBinding= true, > .variableMultisampleRate = true, > .inheritedQueries = true, > diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c > index e05961339c..fc2033d6a9 100644 > --- a/src/amd/vulkan/radv_shader.c > +++ b/src/amd/vulkan/radv_shader.c > @@ -210,6 +210,7 @@ radv_shader_compile_to_nir(struct radv_device *device, > .image_write_without_format = true, > .tessellation = true, > .int64 = true, > + .int16 = true, > .multiview = true, > .subgroup_ballot = true, > .subgroup_basic = true, > -- > 2.19.0 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/9] ac: add 16-bit support to ac_build_umsb()
Signed-off-by: Samuel Pitoiset --- src/amd/common/ac_llvm_build.c | 18 -- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 71723d5e91..905146c9f2 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -1393,17 +1393,31 @@ ac_build_umsb(struct ac_llvm_context *ctx, LLVMTypeRef type; LLVMValueRef highest_bit; LLVMValueRef zero; + unsigned bitsize; - if (ac_get_elem_bits(ctx, LLVMTypeOf(arg)) == 64) { + bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(arg)); + switch (bitsize) { + case 64: intrin_name = "llvm.ctlz.i64"; type = ctx->i64; highest_bit = LLVMConstInt(ctx->i64, 63, false); zero = ctx->i64_0; - } else { + break; + case 32: intrin_name = "llvm.ctlz.i32"; type = ctx->i32; highest_bit = LLVMConstInt(ctx->i32, 31, false); zero = ctx->i32_0; + break; + case 16: + intrin_name = "llvm.ctlz.i16"; + type = ctx->i16; + highest_bit = LLVMConstInt(ctx->i16, 15, false); + zero = ctx->i16_0; + break; + default: + unreachable(!"invalid bitsize"); + break; } LLVMValueRef params[2] = { -- 2.19.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/9] ac: add ac_build_bifield_reverse() helper
Are we missing 64-bit support? Signed-off-by: Samuel Pitoiset --- src/amd/common/ac_llvm_build.c | 22 ++ src/amd/common/ac_llvm_build.h | 3 +++ src/amd/common/ac_nir_to_llvm.c | 2 +- 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 4fbe0ddb9c..a99314266c 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -2135,6 +2135,28 @@ LLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0) return result; } +LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx, + LLVMValueRef src0) +{ + LLVMValueRef result; + unsigned bitsize; + + bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0)); + + switch (bitsize) { + case 32: + result = ac_build_intrinsic(ctx, "llvm.bitreverse.i32", ctx->i32, + (LLVMValueRef []) { src0 }, 1, + AC_FUNC_ATTR_READNONE); + break; + default: + unreachable(!"invalid bitsize"); + break; + } + + return result; +} + #define AC_EXP_TARGET 0 #define AC_EXP_ENABLED_CHANNELS 1 #define AC_EXP_OUT02 diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index 98a61a2405..8524d1fa76 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -426,6 +426,9 @@ LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src0, LLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0); +LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx, + LLVMValueRef src0); + void ac_optimize_vs_outputs(struct ac_llvm_context *ac, LLVMValueRef main_fn, uint8_t *vs_output_param_offset, diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 39489a4b3b..3e88ae66ec 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -836,7 +836,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) result = emit_bitfield_insert(>ac, src[0], src[1], src[2], src[3]); break; case nir_op_bitfield_reverse: - result = ac_build_intrinsic(>ac, "llvm.bitreverse.i32", ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE); + result = ac_build_bitfield_reverse(>ac, src[0]); break; case nir_op_bit_count: result = ac_build_bit_count(>ac, src[0]); -- 2.19.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 8/9] ac: add 16-bit support to ac_build_bitfield_reverse()
Signed-off-by: Samuel Pitoiset --- src/amd/common/ac_llvm_build.c | 5 + 1 file changed, 5 insertions(+) diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 98635ec25a..ab0ba09c83 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -2181,6 +2181,11 @@ LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx, (LLVMValueRef []) { src0 }, 1, AC_FUNC_ATTR_READNONE); break; + case 16: + result = ac_build_intrinsic(ctx, "llvm.bitreverse.i16", ctx->i16, + (LLVMValueRef []) { src0 }, 1, + AC_FUNC_ATTR_READNONE); + break; default: unreachable(!"invalid bitsize"); break; -- 2.19.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 9/9] radv: enable shaderInt16 capability
Not sure if this is all wired up. CTS does pass and the Tangrams demo seems to work (though with a local hack that is unrelated to 16-bit integer). Signed-off-by: Samuel Pitoiset --- src/amd/vulkan/radv_device.c | 2 +- src/amd/vulkan/radv_shader.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 8989ec3553..af7754bea3 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -734,7 +734,7 @@ void radv_GetPhysicalDeviceFeatures( .shaderCullDistance = true, .shaderFloat64= true, .shaderInt64 = true, - .shaderInt16 = false, + .shaderInt16 = true, .sparseBinding= true, .variableMultisampleRate = true, .inheritedQueries = true, diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index e05961339c..fc2033d6a9 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -210,6 +210,7 @@ radv_shader_compile_to_nir(struct radv_device *device, .image_write_without_format = true, .tessellation = true, .int64 = true, + .int16 = true, .multiview = true, .subgroup_ballot = true, .subgroup_basic = true, -- 2.19.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 7/9] ac: add 16-bit support to ac_build_bit_count()
Signed-off-by: Samuel Pitoiset --- src/amd/common/ac_llvm_build.c | 5 + 1 file changed, 5 insertions(+) diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 6955df48e0..98635ec25a 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -2154,6 +2154,11 @@ LLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0) (LLVMValueRef []) { src0 }, 1, AC_FUNC_ATTR_READNONE); break; + case 16: + result = ac_build_intrinsic(ctx, "llvm.ctpop.i16", ctx->i16, + (LLVMValueRef []) { src0 }, 1, + AC_FUNC_ATTR_READNONE); + break; default: unreachable(!"invalid bitsize"); break; -- 2.19.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 6/9] ac: add 16-bit support to ac_find_lsb()
Signed-off-by: Samuel Pitoiset --- src/amd/common/ac_llvm_build.c | 15 +-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 905146c9f2..6955df48e0 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -2499,14 +2499,25 @@ LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx, const char *intrin_name; LLVMTypeRef type; LLVMValueRef zero; - if (src0_bitsize == 64) { + + switch (src0_bitsize) { + case 64: intrin_name = "llvm.cttz.i64"; type = ctx->i64; zero = ctx->i64_0; - } else { + break; + case 32: intrin_name = "llvm.cttz.i32"; type = ctx->i32; zero = ctx->i32_0; + break; + case 16: + intrin_name = "llvm.cttz.i16"; + type = ctx->i16; + zero = ctx->i16_0; + break; + default: + unreachable(!"invalid bitsize"); } LLVMValueRef params[2] = { -- 2.19.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/9] ac: add 16-bit support to ac_build_isign()
Signed-off-by: Samuel Pitoiset --- src/amd/common/ac_llvm_build.c | 21 - 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 011cea5cd0..71723d5e91 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -2069,14 +2069,25 @@ LLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0, LLVMValueRef cmp, val, zero, one; LLVMTypeRef type; - if (bitsize == 32) { - type = ctx->i32; - zero = ctx->i32_0; - one = ctx->i32_1; - } else { + switch (bitsize) { + case 64: type = ctx->i64; zero = ctx->i64_0; one = ctx->i64_1; + break; + case 32: + type = ctx->i32; + zero = ctx->i32_0; + one = ctx->i32_1; + break; + case 16: + type = ctx->i16; + zero = ctx->i16_0; + one = ctx->i16_1; + break; + default: + unreachable(!"invalid bitsize"); + break; } cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, src0, zero, ""); -- 2.19.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/9] ac: add 16-bit constant values for zero and one
Signed-off-by: Samuel Pitoiset --- src/amd/common/ac_llvm_build.c | 2 ++ src/amd/common/ac_llvm_build.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index a99314266c..011cea5cd0 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -87,6 +87,8 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, ctx->v4f32 = LLVMVectorType(ctx->f32, 4); ctx->v8i32 = LLVMVectorType(ctx->i32, 8); + ctx->i16_0 = LLVMConstInt(ctx->i16, 0, false); + ctx->i16_1 = LLVMConstInt(ctx->i16, 1, false); ctx->i32_0 = LLVMConstInt(ctx->i32, 0, false); ctx->i32_1 = LLVMConstInt(ctx->i32, 1, false); ctx->i64_0 = LLVMConstInt(ctx->i64, 0, false); diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index 8524d1fa76..0df9234c66 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -74,6 +74,8 @@ struct ac_llvm_context { LLVMTypeRef v4f32; LLVMTypeRef v8i32; + LLVMValueRef i16_0; + LLVMValueRef i16_1; LLVMValueRef i32_0; LLVMValueRef i32_1; LLVMValueRef i64_0; -- 2.19.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/9] ac: add ac_build_bit_count() helper
Signed-off-by: Samuel Pitoiset --- src/amd/common/ac_llvm_build.c | 28 src/amd/common/ac_llvm_build.h | 2 ++ src/amd/common/ac_nir_to_llvm.c | 7 +-- 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 1f5112e992..4fbe0ddb9c 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -2107,6 +2107,34 @@ LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src0, return val; } +LLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0) +{ + LLVMValueRef result; + unsigned bitsize; + + bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0)); + + switch (bitsize) { + case 64: + result = ac_build_intrinsic(ctx, "llvm.ctpop.i64", ctx->i64, + (LLVMValueRef []) { src0 }, 1, + AC_FUNC_ATTR_READNONE); + + result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, ""); + break; + case 32: + result = ac_build_intrinsic(ctx, "llvm.ctpop.i32", ctx->i32, + (LLVMValueRef []) { src0 }, 1, + AC_FUNC_ATTR_READNONE); + break; + default: + unreachable(!"invalid bitsize"); + break; + } + + return result; +} + #define AC_EXP_TARGET 0 #define AC_EXP_ENABLED_CHANNELS 1 #define AC_EXP_OUT02 diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index 0d261bae09..98a61a2405 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -424,6 +424,8 @@ LLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0, LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize); +LLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0); + void ac_optimize_vs_outputs(struct ac_llvm_context *ac, LLVMValueRef main_fn, uint8_t *vs_output_param_offset, diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 18644107eb..39489a4b3b 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -839,12 +839,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) result = ac_build_intrinsic(>ac, "llvm.bitreverse.i32", ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE); break; case nir_op_bit_count: - if (ac_get_elem_bits(>ac, LLVMTypeOf(src[0])) == 32) - result = ac_build_intrinsic(>ac, "llvm.ctpop.i32", ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE); - else { - result = ac_build_intrinsic(>ac, "llvm.ctpop.i64", ctx->ac.i64, src, 1, AC_FUNC_ATTR_READNONE); - result = LLVMBuildTrunc(ctx->ac.builder, result, ctx->ac.i32, ""); - } + result = ac_build_bit_count(>ac, src[0]); break; case nir_op_vec2: case nir_op_vec3: -- 2.19.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] anv: add support for VK_EXT_inline_uniform_block
I can't say I know enough of all these parts but I went through the API functions and tried to check that you have proper checks in place. Will try to still review :) I did not see any check against MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS when creating pipeline layout. I'm not sure if such is necessary (since it's implicit rule), do you think there should there be check/assert? one minor possible addition below .. On 11.09.2018 23:22, Lionel Landwerlin wrote: This new extension adds an implicitly allocated block of uniforms into the descriptors sets through a new descriptor type. > We implement this by having a single BO in the descriptor set pool from which we source uniforms. Signed-off-by: Lionel Landwerlin --- src/intel/vulkan/anv_cmd_buffer.c | 3 + src/intel/vulkan/anv_descriptor_set.c | 238 +- src/intel/vulkan/anv_device.c | 22 ++ src/intel/vulkan/anv_extensions.py| 1 + .../vulkan/anv_nir_apply_pipeline_layout.c| 52 src/intel/vulkan/anv_private.h| 33 +++ src/intel/vulkan/genX_cmd_buffer.c| 32 ++- 7 files changed, 367 insertions(+), 14 deletions(-) diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 8ef71b0ed9c..b14be94f470 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -651,6 +651,7 @@ anv_isl_format_for_descriptor_type(VkDescriptorType type) switch (type) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: return ISL_FORMAT_R32G32B32A32_FLOAT; case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: @@ -1039,6 +1040,8 @@ void anv_CmdPushDescriptorSetKHR( } break; + case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: + unreachable("Invalid descriptor type for push descriptors"); default: break; } diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c index 3439f828900..2e5f2a1f288 100644 --- a/src/intel/vulkan/anv_descriptor_set.c +++ b/src/intel/vulkan/anv_descriptor_set.c @@ -26,8 +26,10 @@ #include #include #include +#include #include "util/mesa-sha1.h" +#include "vk_util.h" #include "anv_private.h" @@ -40,7 +42,8 @@ void anv_GetDescriptorSetLayoutSupport( const VkDescriptorSetLayoutCreateInfo* pCreateInfo, VkDescriptorSetLayoutSupport* pSupport) { - uint32_t surface_count[MESA_SHADER_STAGES] = { 0, }; + int16_t surface_count[MESA_SHADER_STAGES] = { 0, }; + int16_t inline_surface_indexes[MESA_SHADER_STAGES] = { -1, }; for (uint32_t b = 0; b < pCreateInfo->bindingCount; b++) { const VkDescriptorSetLayoutBinding *binding = >pBindings[b]; @@ -50,6 +53,15 @@ void anv_GetDescriptorSetLayoutSupport( /* There is no real limit on samplers */ break; + case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: + anv_foreach_stage(s, binding->stageFlags) { +if (inline_surface_indexes[s] < 0) { + inline_surface_indexes[s] = surface_count[s]; + surface_count[s] += 1; +} + } + break; + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: if (binding->pImmutableSamplers) { for (uint32_t i = 0; i < binding->descriptorCount; i++) { @@ -118,6 +130,9 @@ VkResult anv_CreateDescriptorSetLayout( memset(set_layout, 0, sizeof(*set_layout)); set_layout->ref_cnt = 1; set_layout->binding_count = max_binding + 1; + set_layout->inline_blocks_descriptor_index = -1; + memset(set_layout->inline_blocks_surface_indexes, + -1, sizeof(set_layout->inline_blocks_surface_indexes)); for (uint32_t b = 0; b <= max_binding; b++) { /* Initialize all binding_layout entries to -1 */ @@ -159,9 +174,24 @@ VkResult anv_CreateDescriptorSetLayout( #ifndef NDEBUG set_layout->binding[b].type = binding->descriptorType; #endif - set_layout->binding[b].array_size = binding->descriptorCount; - set_layout->binding[b].descriptor_index = set_layout->size; - set_layout->size += binding->descriptorCount; + + if (binding->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) { Maybe add here assert(binding->descriptorCount % 4 == 0 && binding->descriptorCount <= MAX_INLINE_UNIFORM_BLOCK_SIZE); ? + /* We only a single descriptor entry for all the inline uniforms. */ + set_layout->binding[b].array_size = 1; + if (set_layout->inline_blocks_descriptor_index < 0) { +set_layout->binding[b].descriptor_index = + set_layout->inline_blocks_descriptor_index = + set_layout->size; +set_layout->size += 1; + } else { +set_layout->binding[b].descriptor_index =
Re: [Mesa-dev] [PATCH] i965/fs: Don't propagate conditional modifiers from integer compares to adds
No shader-db changes, so perhaps adding a test on test_fs_cmod_propagation? In any case, the patch looks good to me: Reviewed-by: Alejandro Piñeiro On 14/09/18 00:06, Ian Romanick wrote: > From: Ian Romanick > > No shader-db changes on any Intel platform... which probably explains > why no bugs have been bisected to this problem since it landed in Mesa > 18.1. :( The commit mentioned below is in 18.2, so 18.1 would need a > slightly different fix (due to code refactoring). > > Signed-off-by: Ian Romanick > Fixes: 77f269bb560 "i965/fs: Refactor propagation of conditional modifiers > from compares to adds" > Cc: Matt Turner (reviewed the original patch) > Cc: Alejandro Piñeiro (reviewed the original patch) > --- > src/intel/compiler/brw_fs_cmod_propagation.cpp | 10 +- > 1 file changed, 9 insertions(+), 1 deletion(-) > > diff --git a/src/intel/compiler/brw_fs_cmod_propagation.cpp > b/src/intel/compiler/brw_fs_cmod_propagation.cpp > index 5b74f267359..5fb522f810f 100644 > --- a/src/intel/compiler/brw_fs_cmod_propagation.cpp > +++ b/src/intel/compiler/brw_fs_cmod_propagation.cpp > @@ -211,9 +211,17 @@ opt_cmod_propagation_local(const gen_device_info > *devinfo, bblock_t *block) >/* A CMP with a second source of zero can match with anything. A CMP > * with a second source that is not zero can only match with an ADD > * instruction. > + * > + * Only apply this optimization to float-point sources. It can fail > for > + * integers. For inputs a = 0x8000, b = 4, int(0x8000) < 4, > but > + * int(0x8000) - 4 overflows and results in 0x7ffc. that's not > + * less than zero, so the flags get set differently than for (a < b). > */ >if (inst->opcode == BRW_OPCODE_CMP && !inst->src[1].is_zero()) { > - progress = cmod_propagate_cmp_to_add(devinfo, block, inst) || > progress; > + if (brw_reg_type_is_floating_point(inst->src[0].type) && > + cmod_propagate_cmp_to_add(devinfo, block, inst)) > +progress = true; > + > continue; >} > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] gallium/util: don't let children of fork & exec inherit our thread affinity
On 2018-09-14 10:53 a.m., Michel Dänzer wrote: > On 2018-09-13 8:56 p.m., Marek Olšák wrote: > >> +* What happens if a driver is unloaded and the app creates a thread? > > I suppose the child process will likely crash, because the memory > address where util_set_full_cpu_affinity was located will either be > unmapped or have random other contents? > > At least in theory, there could also be an issue where the application > might have set its own thread affinity before calling fork, which would > be clobbered by util_set_full_cpu_affinity in the child process. Note that these two issues only apply to spawning a child process with fork, not to spawning a thread in the same process. BTW, Julien Cristau pointed out on IRC that setting the thread affinity also seems to cause trouble with Firefox's sandbox. I'm seeing messages like Sandbox: seccomp sandbox violation: pid 3039, tid 3039, syscall 203, args 3105 128 140733772165072 1 16 140515081189120. with Firefox 62, but apparently it can even crash with Firefox nightly at least: https://bugzilla.mozilla.org/1490994 -- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] anv: descriptors: split allocation function
did not spot any behavioral change, LGTM Reviewed-by: Tapani Pälli On 11.09.2018 23:22, Lionel Landwerlin wrote: The following commits will make the allocation more complicated so split the free list allocation logic out. Signed-off-by: Lionel Landwerlin --- src/intel/vulkan/anv_descriptor_set.c | 68 +++ src/intel/vulkan/anv_private.h| 5 +- 2 files changed, 42 insertions(+), 31 deletions(-) diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c index 2bd1d86f4d4..3439f828900 100644 --- a/src/intel/vulkan/anv_descriptor_set.c +++ b/src/intel/vulkan/anv_descriptor_set.c @@ -432,8 +432,8 @@ VkResult anv_CreateDescriptorPool( return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); pool->size = pool_size; - pool->next = 0; - pool->free_list = EMPTY; + pool->next_set = 0; + pool->free_set_list = EMPTY; anv_state_stream_init(>surface_state_stream, >surface_state_pool, 4096); @@ -467,8 +467,8 @@ VkResult anv_ResetDescriptorPool( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_descriptor_pool, pool, descriptorPool); - pool->next = 0; - pool->free_list = EMPTY; + pool->next_set = 0; + pool->free_set_list = EMPTY; anv_state_stream_finish(>surface_state_stream); anv_state_stream_init(>surface_state_stream, >surface_state_pool, 4096); @@ -496,35 +496,45 @@ struct surface_state_free_list_entry { struct anv_state state; }; +static struct anv_descriptor_set * +anv_descriptor_alloc(struct anv_descriptor_pool *pool, + struct anv_descriptor_set_layout *layout, + size_t size) +{ + struct anv_descriptor_set *set = NULL; + + if (size <= pool->size - pool->next_set) { + set = (struct anv_descriptor_set *) (pool->data + pool->next_set); + pool->next_set += size; + return set; + } + + struct pool_free_list_entry *entry; + uint32_t *link = >free_set_list; + for (uint32_t f = pool->free_set_list; f != EMPTY; f = entry->next) { + entry = (struct pool_free_list_entry *) (pool->data + f); + if (size <= entry->size) { + uint32_t next = entry->next; + set = (struct anv_descriptor_set *) entry; + *link = next; + return set; + } + link = >next; + } + + return NULL; +} + VkResult anv_descriptor_set_create(struct anv_device *device, struct anv_descriptor_pool *pool, struct anv_descriptor_set_layout *layout, struct anv_descriptor_set **out_set) { - struct anv_descriptor_set *set; const size_t size = anv_descriptor_set_layout_size(layout); - - set = NULL; - if (size <= pool->size - pool->next) { - set = (struct anv_descriptor_set *) (pool->data + pool->next); - pool->next += size; - } else { - struct pool_free_list_entry *entry; - uint32_t *link = >free_list; - for (uint32_t f = pool->free_list; f != EMPTY; f = entry->next) { - entry = (struct pool_free_list_entry *) (pool->data + f); - if (size <= entry->size) { -*link = entry->next; -set = (struct anv_descriptor_set *) entry; -break; - } - link = >next; - } - } - + struct anv_descriptor_set *set = anv_descriptor_alloc(pool, layout, size); if (set == NULL) { - if (pool->free_list != EMPTY) { + if (pool->free_set_list != EMPTY) { return vk_error(VK_ERROR_FRAGMENTED_POOL); } else { return vk_error(VK_ERROR_OUT_OF_POOL_MEMORY); @@ -603,13 +613,13 @@ anv_descriptor_set_destroy(struct anv_device *device, /* Put the descriptor set allocation back on the free list. */ const uint32_t index = (char *) set - pool->data; - if (index + set->size == pool->next) { - pool->next = index; + if (index + set->size == pool->next_set) { + pool->next_set = index; } else { struct pool_free_list_entry *entry = (struct pool_free_list_entry *) set; - entry->next = pool->free_list; + entry->next = pool->free_set_list; entry->size = set->size; - pool->free_list = (char *) entry - pool->data; + pool->free_set_list = (char *) entry - pool->data; } } diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index d15a91dd014..372b7c69635 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1500,8 +1500,9 @@ struct anv_push_descriptor_set { struct anv_descriptor_pool { uint32_t size; - uint32_t next; - uint32_t free_list; + + uint32_t free_set_list; + uint32_t next_set; struct anv_state_stream surface_state_stream; void *surface_state_free_list; ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org
Re: [Mesa-dev] [PATCH] radv: implement VK_EXT_conservative_rasterization
On 9/14/18 11:10 AM, Bas Nieuwenhuizen wrote: On Fri, Sep 14, 2018 at 10:31 AM Samuel Pitoiset wrote: On 9/12/18 11:19 PM, Bas Nieuwenhuizen wrote: On Wed, Sep 12, 2018 at 10:44 PM Samuel Pitoiset wrote: Only supported by GFX9+. The conservativeraster Sascha demo seems to work as expected. Signed-off-by: Samuel Pitoiset --- src/amd/vulkan/radv_device.c | 14 + src/amd/vulkan/radv_extensions.py | 1 + src/amd/vulkan/radv_pipeline.c| 48 ++- 3 files changed, 62 insertions(+), 1 deletion(-) diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 7917ed7ffe..60776a863e 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -1152,6 +1152,20 @@ void radv_GetPhysicalDeviceProperties2( properties->protectedNoFault = false; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT: { + VkPhysicalDeviceConservativeRasterizationPropertiesEXT *properties = + (VkPhysicalDeviceConservativeRasterizationPropertiesEXT *)ext; + properties->primitiveOverestimationSize = 0; + properties->maxExtraPrimitiveOverestimationSize = 0; + properties->extraPrimitiveOverestimationSizeGranularity = 0; + properties->primitiveUnderestimation = VK_FALSE; + properties->conservativePointAndLineRasterization = VK_FALSE; + properties->degenerateTrianglesRasterized = VK_FALSE; + properties->degenerateLinesRasterized = VK_FALSE; + properties->fullyCoveredFragmentShaderInputVariable = VK_FALSE; + properties->conservativeRasterizationPostDepthCoverage = VK_FALSE; + break; + } default: break; } diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py index fa35aabd3b..584926df39 100644 --- a/src/amd/vulkan/radv_extensions.py +++ b/src/amd/vulkan/radv_extensions.py @@ -93,6 +93,7 @@ EXTENSIONS = [ Extension('VK_EXT_direct_mode_display', 1, 'VK_USE_PLATFORM_DISPLAY_KHR'), Extension('VK_EXT_acquire_xlib_display', 1, 'VK_USE_PLATFORM_XLIB_XRANDR_EXT'), Extension('VK_EXT_conditional_rendering', 1, True), +Extension('VK_EXT_conservative_rasterization',1, 'device->rad_info.chip_class >= GFX9'), Extension('VK_EXT_display_surface_counter', 1, 'VK_USE_PLATFORM_DISPLAY_KHR'), Extension('VK_EXT_display_control', 1, 'VK_USE_PLATFORM_DISPLAY_KHR'), Extension('VK_EXT_debug_report', 9, True), diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index ae269c32c4..c54949ed42 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -2686,12 +2686,25 @@ radv_pipeline_generate_blend_state(struct radeon_cmdbuf *cs, pipeline->graphics.cb_target_mask = blend->cb_target_mask; } +static const VkConservativeRasterizationModeEXT +radv_get_conservative_raster_mode(const VkPipelineRasterizationStateCreateInfo *pCreateInfo) +{ + const VkPipelineRasterizationConservativeStateCreateInfoEXT *conservative_raster = + vk_find_struct_const(pCreateInfo->pNext, PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT); + + if (!conservative_raster) + return VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT; + return conservative_raster->conservativeRasterizationMode; +} static void radv_pipeline_generate_raster_state(struct radeon_cmdbuf *cs, + struct radv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo) { const VkPipelineRasterizationStateCreateInfo *vkraster = pCreateInfo->pRasterizationState; + const VkConservativeRasterizationModeEXT mode = + radv_get_conservative_raster_mode(vkraster); radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL, S_028810_PS_UCP_MODE(3) | @@ -2725,6 +2738,39 @@ radv_pipeline_generate_raster_state(struct radeon_cmdbuf *cs, S_028814_POLY_OFFSET_FRONT_ENABLE(vkraster->depthBiasEnable ? 1 : 0) | S_028814_POLY_OFFSET_BACK_ENABLE(vkraster->depthBiasEnable ? 1 : 0) | S_028814_POLY_OFFSET_PARA_ENABLE(vkraster->depthBiasEnable ? 1 : 0)); + + /* Conservative rasterization. */ + if (mode != VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT) { + struct radv_multisample_state *ms = >graphics.ms; +
[Mesa-dev] [PATCH 2/2 (resend)] virgl: Pass resource size and transfer offsets
From: Tomeu Vizoso Pass the size of a resource when creating it so a backing can be kept in the other side. Also pass the required offset to transfer commands. This moves vtest closer to how virtio-gpu works, making it more useful for testing. v2: - Use new messages for creation and transfers, as changing the behavior of the existing messages would be messy given that we don't want to break compatibility with older servers. v3: - Gert: Use correct strides: The resource corresponding to the output display might have a differnt line stride then the IOVs, so when reading back to this resource take the resource stride and the the IOV stride into account. Signed-off-by: Tomeu Vizoso (v2) Signed-off-by: Gert Wollny --- .../winsys/virgl/vtest/virgl_vtest_socket.c| 143 +++-- .../winsys/virgl/vtest/virgl_vtest_winsys.c| 38 -- .../winsys/virgl/vtest/virgl_vtest_winsys.h| 19 ++- src/gallium/winsys/virgl/vtest/vtest_protocol.h| 29 + 4 files changed, 201 insertions(+), 28 deletions(-) diff --git a/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c b/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c index 4d20a63ad6..3aa01aabdf 100644 --- a/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c +++ b/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c @@ -221,6 +221,42 @@ int virgl_vtest_send_get_caps(struct virgl_vtest_winsys *vws, return 0; } +static int virgl_vtest_send_resource_create2(struct virgl_vtest_winsys *vws, + uint32_t handle, + enum pipe_texture_target target, + uint32_t format, + uint32_t bind, + uint32_t width, + uint32_t height, + uint32_t depth, + uint32_t array_size, + uint32_t last_level, + uint32_t nr_samples, + uint32_t size) +{ + uint32_t res_create_buf[VCMD_RES_CREATE2_SIZE], vtest_hdr[VTEST_HDR_SIZE]; + + vtest_hdr[VTEST_CMD_LEN] = VCMD_RES_CREATE2_SIZE; + vtest_hdr[VTEST_CMD_ID] = VCMD_RESOURCE_CREATE2; + + res_create_buf[VCMD_RES_CREATE2_RES_HANDLE] = handle; + res_create_buf[VCMD_RES_CREATE2_TARGET] = target; + res_create_buf[VCMD_RES_CREATE2_FORMAT] = format; + res_create_buf[VCMD_RES_CREATE2_BIND] = bind; + res_create_buf[VCMD_RES_CREATE2_WIDTH] = width; + res_create_buf[VCMD_RES_CREATE2_HEIGHT] = height; + res_create_buf[VCMD_RES_CREATE2_DEPTH] = depth; + res_create_buf[VCMD_RES_CREATE2_ARRAY_SIZE] = array_size; + res_create_buf[VCMD_RES_CREATE2_LAST_LEVEL] = last_level; + res_create_buf[VCMD_RES_CREATE2_NR_SAMPLES] = nr_samples; + res_create_buf[VCMD_RES_CREATE2_DATA_SIZE] = size; + + virgl_block_write(vws->sock_fd, _hdr, sizeof(vtest_hdr)); + virgl_block_write(vws->sock_fd, _create_buf, sizeof(res_create_buf)); + + return 0; +} + int virgl_vtest_send_resource_create(struct virgl_vtest_winsys *vws, uint32_t handle, enum pipe_texture_target target, @@ -231,10 +267,17 @@ int virgl_vtest_send_resource_create(struct virgl_vtest_winsys *vws, uint32_t depth, uint32_t array_size, uint32_t last_level, - uint32_t nr_samples) + uint32_t nr_samples, + uint32_t size) { uint32_t res_create_buf[VCMD_RES_CREATE_SIZE], vtest_hdr[VTEST_HDR_SIZE]; + if (vws->protocol_version >= 1) + return virgl_vtest_send_resource_create2(vws, handle, target, format, + bind, width, height, depth, + array_size, last_level, + nr_samples, size); + vtest_hdr[VTEST_CMD_LEN] = VCMD_RES_CREATE_SIZE; vtest_hdr[VTEST_CMD_ID] = VCMD_RESOURCE_CREATE; @@ -282,7 +325,7 @@ int virgl_vtest_send_resource_unref(struct virgl_vtest_winsys *vws, return 0; } -int virgl_vtest_send_transfer_cmd(struct virgl_vtest_winsys *vws, +static int virgl_vtest_send_transfer_cmd(struct virgl_vtest_winsys *vws, uint32_t vcmd, uint32_t handle, uint32_t level, uint32_t stride, @@ -315,6 +358,73 @@ int virgl_vtest_send_transfer_cmd(struct virgl_vtest_winsys *vws, return 0; } +static int virgl_vtest_send_transfer_cmd2(struct virgl_vtest_winsys *vws, +
[Mesa-dev] [PATCH 1/2 (resend)] virgl: Negotiate version with vtest server
From: Tomeu Vizoso Check if server supports version negotation by sending a PING_PROTOCOL_VERSION message right before a dummy RESOURCE_BUSY_WAIT. If we don't get a reply for the first, we know the server doesn't support it. If it does support it, we can query the max protocol version supported by the server and fall back if needed. v2: - Send a new message to negotiate the protocol version, checking if the server supports this message by immediately sending a busy wait message. (Dave Airlie) v3: - Send a zero-arg command PING_PROTOCOL_VERSION so we actually keep compatibility with older servers. (Code by Dave Airlie) Signed-off-by: Tomeu Vizoso Signed-off-by: Gert Wollny --- Resending the series because it didn't make it into patchwork .../winsys/virgl/vtest/virgl_vtest_socket.c| 52 ++ .../winsys/virgl/vtest/virgl_vtest_winsys.h| 2 + src/gallium/winsys/virgl/vtest/vtest_protocol.h| 10 + 3 files changed, 64 insertions(+) diff --git a/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c b/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c index d25f9a3bd9..4d20a63ad6 100644 --- a/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c +++ b/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c @@ -101,6 +101,57 @@ static int virgl_vtest_send_init(struct virgl_vtest_winsys *vws) return 0; } +static int virgl_vtest_negotiate_version(struct virgl_vtest_winsys *vws) +{ + uint32_t vtest_hdr[VTEST_HDR_SIZE]; + uint32_t version_buf[VCMD_PROTOCOL_VERSION_SIZE]; + uint32_t busy_wait_buf[VCMD_BUSY_WAIT_SIZE]; + uint32_t busy_wait_result[1]; + int ret; + + vtest_hdr[VTEST_CMD_LEN] = VCMD_PING_PROTOCOL_VERSION_SIZE; + vtest_hdr[VTEST_CMD_ID] = VCMD_PING_PROTOCOL_VERSION; + virgl_block_write(vws->sock_fd, _hdr, sizeof(vtest_hdr)); + + vtest_hdr[VTEST_CMD_LEN] = VCMD_BUSY_WAIT_SIZE; + vtest_hdr[VTEST_CMD_ID] = VCMD_RESOURCE_BUSY_WAIT; + busy_wait_buf[VCMD_BUSY_WAIT_HANDLE] = 0; + busy_wait_buf[VCMD_BUSY_WAIT_FLAGS] = 0; + virgl_block_write(vws->sock_fd, _hdr, sizeof(vtest_hdr)); + virgl_block_write(vws->sock_fd, _wait_buf, sizeof(busy_wait_buf)); + + ret = virgl_block_read(vws->sock_fd, vtest_hdr, sizeof(vtest_hdr)); + assert(ret); + + if (vtest_hdr[VTEST_CMD_ID] == VCMD_PING_PROTOCOL_VERSION) { + /* Read dummy busy_wait response */ + ret = virgl_block_read(vws->sock_fd, vtest_hdr, sizeof(vtest_hdr)); + assert(ret); + ret = virgl_block_read(vws->sock_fd, busy_wait_result, sizeof(busy_wait_result)); + assert(ret); + + vtest_hdr[VTEST_CMD_LEN] = VCMD_PROTOCOL_VERSION_SIZE; + vtest_hdr[VTEST_CMD_ID] = VCMD_PROTOCOL_VERSION; + version_buf[VCMD_PROTOCOL_VERSION_VERSION] = VTEST_PROTOCOL_VERSION; + virgl_block_write(vws->sock_fd, _hdr, sizeof(vtest_hdr)); + virgl_block_write(vws->sock_fd, _buf, sizeof(version_buf)); + + ret = virgl_block_read(vws->sock_fd, vtest_hdr, sizeof(vtest_hdr)); + assert(ret); + ret = virgl_block_read(vws->sock_fd, version_buf, sizeof(version_buf)); + assert(ret); + return version_buf[VCMD_PROTOCOL_VERSION_VERSION]; + } + + /* Read dummy busy_wait response */ + assert(vtest_hdr[VTEST_CMD_ID] == VCMD_RESOURCE_BUSY_WAIT); + ret = virgl_block_read(vws->sock_fd, busy_wait_result, sizeof(busy_wait_result)); + assert(ret); + + /* Old server, return version 0 */ + return 0; +} + int virgl_vtest_connect(struct virgl_vtest_winsys *vws) { struct sockaddr_un un; @@ -123,6 +174,7 @@ int virgl_vtest_connect(struct virgl_vtest_winsys *vws) vws->sock_fd = sock; virgl_vtest_send_init(vws); + vws->protocol_version = virgl_vtest_negotiate_version(vws); return 0; } diff --git a/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.h b/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.h index 031037b6b5..3628c74644 100644 --- a/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.h +++ b/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.h @@ -49,6 +49,8 @@ struct virgl_vtest_winsys { int num_delayed; unsigned usecs; mtx_t mutex; + + unsigned protocol_version; }; struct virgl_hw_res { diff --git a/src/gallium/winsys/virgl/vtest/vtest_protocol.h b/src/gallium/winsys/virgl/vtest/vtest_protocol.h index 95bd8c1d0b..8eb904e73f 100644 --- a/src/gallium/winsys/virgl/vtest/vtest_protocol.h +++ b/src/gallium/winsys/virgl/vtest/vtest_protocol.h @@ -24,6 +24,7 @@ #define VTEST_PROTOCOL #define VTEST_DEFAULT_SOCKET_NAME "/tmp/.virgl_test" +#define VTEST_PROTOCOL_VERSION 1 /* 32-bit length field */ /* 32-bit cmd field */ @@ -53,6 +54,10 @@ /* 0 length cmd */ /* resp VCMD_GET_CAPS + caps */ +#define VCMD_PING_PROTOCOL_VERSION 10 + +#define VCMD_PROTOCOL_VERSION 11 + #define VCMD_RES_CREATE_SIZE 10 #define VCMD_RES_CREATE_RES_HANDLE 0 #define VCMD_RES_CREATE_TARGET 1 @@ -87,4 +92,9 @@ #define VCMD_BUSY_WAIT_HANDLE 0 #define VCMD_BUSY_WAIT_FLAGS 1 +#define VCMD_PING_PROTOCOL_VERSION_SIZE 1 +
Re: [Mesa-dev] [PATCH 2/2] virgl: Pass resource size and transfer offsets
Hello Dave, Am Freitag, den 14.09.2018, 13:25 +1000 schrieb Dave Airlie: [...] > This causes regressions in a bunch of piglits on skylake for me. > > arb_copy_image-formats > and some > ./bin/fbo-generatemipmap-formats GL_EXT_texture_sRGB-s3tc > > have some different results after this. Comparing between mesa-master and v3 of the patch I get exactly the same results on kabylake for "piglit run gpu -t formats" that include these tests, or did you mean different results w.r.t. v2 of the patch? In this case I get simply more failures. Funny enough v2 has the same additional failures like when running though qemu, so there must be a similar problem with strides on the virgl-drm mesa side. I was testing with mesa master 1a263b377c plus these patches. HTH, Gert PS: I'll be away for the next week ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev