Re: [Mesa-dev] [PATCH] mesa: readpixels add support for GL_HALF_FLOAT
On 21/03/18 06:57, Lin Johnson wrote: > Ext_color_buffer_half_float is using type GL_HALF_FLOAT > and data_type GL_FLOAT. This fix Android CTS test > android.view.cts.PixelCopyTest > #TestWindowProducerCopyToRGBA16F > > Signed-off-by: Lin Johnson > --- > src/mesa/main/readpix.c | 2 ++ > 1 file changed, 2 insertions(+) > > diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c > index 6ce340ddf9bb..51331dd095ab 100644 > --- a/src/mesa/main/readpix.c > +++ b/src/mesa/main/readpix.c > @@ -920,6 +920,8 @@ read_pixels_es3_error_check(GLenum format, GLenum type, > case GL_RGBA: >if (type == GL_FLOAT && data_type == GL_FLOAT) > return GL_NO_ERROR; /* EXT_color_buffer_float */ > + if (type == GL_HALF_FLOAT && data_type == GL_FLOAT) > + return GL_NO_ERROR; /* EXT_color_buffer_half_float */ If this combination is allowed thanks to that extension, what would happen if that extension is not supported? shouldn't include a extension check? Or that is checked in a different place? >if (type == GL_UNSIGNED_BYTE && data_type == GL_UNSIGNED_NORMALIZED) > return GL_NO_ERROR; >if (internalFormat == GL_RGB10_A2 && ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] mesa: readpixels add support for GL_HALF_FLOAT
Ext_color_buffer_half_float is using type GL_HALF_FLOAT and data_type GL_FLOAT. This fix Android CTS test android.view.cts.PixelCopyTest #TestWindowProducerCopyToRGBA16F Signed-off-by: Lin Johnson --- src/mesa/main/readpix.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c index 6ce340ddf9bb..51331dd095ab 100644 --- a/src/mesa/main/readpix.c +++ b/src/mesa/main/readpix.c @@ -920,6 +920,8 @@ read_pixels_es3_error_check(GLenum format, GLenum type, case GL_RGBA: if (type == GL_FLOAT && data_type == GL_FLOAT) return GL_NO_ERROR; /* EXT_color_buffer_float */ + if (type == GL_HALF_FLOAT && data_type == GL_FLOAT) + return GL_NO_ERROR; /* EXT_color_buffer_half_float */ if (type == GL_UNSIGNED_BYTE && data_type == GL_UNSIGNED_NORMALIZED) return GL_NO_ERROR; if (internalFormat == GL_RGB10_A2 && -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC v1 19/38] nir: Add a pass for fixing deref modes
This will be needed by anything which changes variable modes without rewriting derefs. --- src/compiler/nir/nir.h | 2 ++ src/compiler/nir/nir_deref.c | 30 ++ 2 files changed, 32 insertions(+) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index db37b98..7f5c7e9 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2608,6 +2608,8 @@ bool nir_lower_deref_instrs(nir_shader *shader, void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, nir_shader *shader); bool nir_lower_var_copies(nir_shader *shader); +void nir_fixup_deref_modes(nir_shader *shader); + bool nir_lower_global_vars_to_local(nir_shader *shader); bool nir_lower_indirect_derefs(nir_shader *shader, nir_variable_mode modes); diff --git a/src/compiler/nir/nir_deref.c b/src/compiler/nir/nir_deref.c index 87a8192..af5d75f 100644 --- a/src/compiler/nir/nir_deref.c +++ b/src/compiler/nir/nir_deref.c @@ -299,3 +299,33 @@ nir_lower_deref_instrs(nir_shader *shader, return progress; } + +void +nir_fixup_deref_modes(nir_shader *shader) +{ + nir_foreach_function(function, shader) { + if (!function->impl) + continue; + + nir_foreach_block(block, function->impl) { + nir_foreach_instr(instr, block) { +if (instr->type != nir_instr_type_deref) + continue; + +nir_deref_instr *deref = nir_instr_as_deref(instr); + +nir_variable_mode parent_mode; +if (deref->deref_type == nir_deref_type_var) { + parent_mode = deref->var->data.mode; +} else { + assert(deref->parent.is_ssa); + nir_deref_instr *parent = + nir_instr_as_deref(deref->parent.ssa->parent_instr); + parent_mode = parent->mode; +} + +deref->mode = parent_mode; + } + } + } +} -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC v1 37/38] intel/nir: Fixup deref modes after lowering patch vertices
--- src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp index 69da83a..0fd1492 100644 --- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp +++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp @@ -268,4 +268,6 @@ brw_nir_lower_patch_vertices_in_to_uniform(nir_shader *nir) exec_node_remove(&var->node); exec_list_push_tail(&nir->uniforms, &var->node); } + + nir_fixup_deref_modes(nir); } -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC v1 18/38] nir: Support deref instructions in remove_dead_variables
--- src/compiler/nir/nir_remove_dead_variables.c | 99 1 file changed, 99 insertions(+) diff --git a/src/compiler/nir/nir_remove_dead_variables.c b/src/compiler/nir/nir_remove_dead_variables.c index eff66f9..6b1927f 100644 --- a/src/compiler/nir/nir_remove_dead_variables.c +++ b/src/compiler/nir/nir_remove_dead_variables.c @@ -27,6 +27,55 @@ #include "nir.h" +static bool +deref_used_for_not_store(nir_deref_instr *deref) +{ + nir_foreach_use(src, &deref->dest.ssa) { + switch (src->parent_instr->type) { + case nir_instr_type_deref: + if (deref_used_for_not_store(nir_instr_as_deref(src->parent_instr))) +return true; + break; + + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intrin = +nir_instr_as_intrinsic(src->parent_instr); + /* The first source of copy and store intrinsics is the deref to + * write. Don't record those. + */ + if ((intrin->intrinsic != nir_intrinsic_store_deref && + intrin->intrinsic != nir_intrinsic_copy_var) || + src != &intrin->src[0]) +return true; + break; + } + + default: + /* If it's used by any other instruction type (most likely a texture + * instruction), consider it used. + */ + return true; + } + } + + return false; +} + +static void +add_var_use_deref(nir_deref_instr *deref, struct set *live) +{ + if (deref->deref_type != nir_deref_type_var) + return; + + /* If it's not a local that never escapes the shader, then any access at +* all means we need to keep it alive. +*/ + assert(deref->mode == deref->var->data.mode); + if (!(deref->mode & (nir_var_local | nir_var_global | nir_var_shared)) || + deref_used_for_not_store(deref)) + _mesa_set_add(live, deref->var); +} + static void add_var_use_intrinsic(nir_intrinsic_instr *instr, struct set *live, nir_variable_mode modes) @@ -100,6 +149,10 @@ add_var_use_shader(nir_shader *shader, struct set *live, nir_variable_mode modes nir_foreach_block(block, function->impl) { nir_foreach_instr(instr, block) { switch(instr->type) { + case nir_instr_type_deref: + add_var_use_deref(nir_instr_as_deref(instr), live); + break; + case nir_instr_type_intrinsic: add_var_use_intrinsic(nir_instr_as_intrinsic(instr), live, modes); @@ -123,6 +176,33 @@ add_var_use_shader(nir_shader *shader, struct set *live, nir_variable_mode modes } static void +remove_dead_deref(nir_deref_instr *deref) +{ + nir_foreach_use(src, &deref->dest.ssa) { + switch (src->parent_instr->type) { + case nir_instr_type_deref: + remove_dead_deref(nir_instr_as_deref(src->parent_instr)); + break; + + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intrin = +nir_instr_as_intrinsic(src->parent_instr); + + assert(intrin->intrinsic == nir_intrinsic_copy_deref || +intrin->intrinsic == nir_intrinsic_store_deref); + nir_instr_remove(&intrin->instr); + break; + } + + default: + unreachable("This must have been marked as live!"); + } + } + + nir_instr_remove(&deref->instr); +} + +static void remove_dead_var_writes(nir_shader *shader, struct set *live) { nir_foreach_function(function, shader) { @@ -144,6 +224,25 @@ remove_dead_var_writes(nir_shader *shader, struct set *live) nir_instr_remove(instr); } } + + /* We walk the list of instructions backwards because we're going to + * delete a deref and all of it's uses and we don't want to end up + * deleting stuff ahead of us. + */ + nir_foreach_block_reverse(block, function->impl) { + nir_foreach_instr_reverse_safe(instr, block) { +if (instr->type != nir_instr_type_deref) + continue; + +nir_deref_instr *deref = nir_instr_as_deref(instr); +if (deref->deref_type != nir_deref_type_var) + continue; + +/* If it's been marked as dead, delete it */ +if (deref->var->data.mode == 0) + remove_dead_deref(deref); + } + } } } -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC v1 34/38] nir: Support deref instructions in lower_wpos_ytransform
--- src/compiler/nir/nir_lower_wpos_ytransform.c | 29 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/src/compiler/nir/nir_lower_wpos_ytransform.c b/src/compiler/nir/nir_lower_wpos_ytransform.c index 62166e7..2e0dd86 100644 --- a/src/compiler/nir/nir_lower_wpos_ytransform.c +++ b/src/compiler/nir/nir_lower_wpos_ytransform.c @@ -77,11 +77,10 @@ nir_cmp(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1, nir_ssa_def *src2) /* see emit_wpos_adjustment() in st_mesa_to_tgsi.c */ static void emit_wpos_adjustment(lower_wpos_ytransform_state *state, - nir_intrinsic_instr *intr, + nir_intrinsic_instr *intr, nir_variable *fragcoord, bool invert, float adjX, float adjY[2]) { nir_builder *b = &state->b; - nir_variable *fragcoord = intr->variables[0]->var; nir_ssa_def *wpostrans, *wpos_temp, *wpos_temp_y, *wpos_input; assert(intr->dest.is_ssa); @@ -144,10 +143,10 @@ emit_wpos_adjustment(lower_wpos_ytransform_state *state, } static void -lower_fragcoord(lower_wpos_ytransform_state *state, nir_intrinsic_instr *intr) +lower_fragcoord(lower_wpos_ytransform_state *state, +nir_intrinsic_instr *intr, nir_variable *fragcoord) { const nir_lower_wpos_ytransform_options *options = state->options; - nir_variable *fragcoord = intr->variables[0]->var; float adjX = 0.0f; float adjY[2] = { 0.0f, 0.0f }; bool invert = false; @@ -229,7 +228,7 @@ lower_fragcoord(lower_wpos_ytransform_state *state, nir_intrinsic_instr *intr) } } - emit_wpos_adjustment(state, intr, invert, adjX, adjY); + emit_wpos_adjustment(state, intr, fragcoord, invert, adjX, adjY); } /* turns 'fddy(p)' into 'fddy(fmul(p, transform.x))' */ @@ -298,7 +297,21 @@ lower_wpos_ytransform_block(lower_wpos_ytransform_state *state, nir_block *block nir_foreach_instr_safe(instr, block) { if (instr->type == nir_instr_type_intrinsic) { nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); - if (intr->intrinsic == nir_intrinsic_load_var) { + if (intr->intrinsic == nir_intrinsic_load_deref) { +nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); +nir_variable *var = nir_deref_instr_get_variable(deref); + +if ((var->data.mode == nir_var_shader_in && + var->data.location == VARYING_SLOT_POS) || +(var->data.mode == nir_var_system_value && + var->data.location == SYSTEM_VALUE_FRAG_COORD)) { + /* gl_FragCoord should not have array/struct derefs: */ + lower_fragcoord(state, intr, var); +} else if (var->data.mode == nir_var_system_value && + var->data.location == SYSTEM_VALUE_SAMPLE_POS) { + lower_load_sample_pos(state, intr); +} + } else if (intr->intrinsic == nir_intrinsic_load_var) { nir_deref_var *dvar = intr->variables[0]; nir_variable *var = dvar->var; @@ -308,14 +321,14 @@ lower_wpos_ytransform_block(lower_wpos_ytransform_state *state, nir_block *block var->data.location == SYSTEM_VALUE_FRAG_COORD)) { /* gl_FragCoord should not have array/struct derefs: */ assert(dvar->deref.child == NULL); - lower_fragcoord(state, intr); + lower_fragcoord(state, intr, var); } else if (var->data.mode == nir_var_system_value && var->data.location == SYSTEM_VALUE_SAMPLE_POS) { assert(dvar->deref.child == NULL); lower_load_sample_pos(state, intr); } } else if (intr->intrinsic == nir_intrinsic_load_frag_coord) { -lower_fragcoord(state, intr); +lower_fragcoord(state, intr, NULL); } else if (intr->intrinsic == nir_intrinsic_load_sample_pos) { lower_load_sample_pos(state, intr); } else if (intr->intrinsic == nir_intrinsic_interp_var_at_offset) { -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC v1 36/38] intel, ir3: Disable nir_opt_copy_prop_vars
This pass doesn't handle deref instructions yet. Making it handle both legacy derefs and deref instructions would be painful. Since it's not important for correctness, just disable it for now. --- src/gallium/drivers/freedreno/ir3/ir3_nir.c | 2 +- src/intel/compiler/brw_nir.c| 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_nir.c index cd1f9c5..da434bf 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_nir.c @@ -91,7 +91,7 @@ ir3_optimize_loop(nir_shader *s) progress = false; OPT_V(s, nir_lower_vars_to_ssa); - progress |= OPT(s, nir_opt_copy_prop_vars); + /* progress |= OPT(s, nir_opt_copy_prop_vars); */ progress |= OPT(s, nir_lower_alu_to_scalar); progress |= OPT(s, nir_lower_phis_to_scalar); diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index 69ab162..cf994ac 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -538,7 +538,7 @@ brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler, do { progress = false; OPT(nir_lower_vars_to_ssa); - OPT(nir_opt_copy_prop_vars); + /* OPT(nir_opt_copy_prop_vars); */ if (is_scalar) { OPT(nir_lower_alu_to_scalar); -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC v1 29/38] nir: Support deref instructions in lower_clip_cull
--- .../nir/nir_lower_clip_cull_distance_arrays.c | 69 -- 1 file changed, 65 insertions(+), 4 deletions(-) diff --git a/src/compiler/nir/nir_lower_clip_cull_distance_arrays.c b/src/compiler/nir/nir_lower_clip_cull_distance_arrays.c index 95eda82..69b31d5 100644 --- a/src/compiler/nir/nir_lower_clip_cull_distance_arrays.c +++ b/src/compiler/nir/nir_lower_clip_cull_distance_arrays.c @@ -74,9 +74,9 @@ update_type(nir_variable *var, gl_shader_stage stage, unsigned length) * Rewrite any clip/cull distances to refer to the new combined array. */ static void -rewrite_references(nir_instr *instr, - nir_variable *combined, - unsigned cull_offset) +rewrite_var_references(nir_instr *instr, + nir_variable *combined, + unsigned cull_offset) { if (instr->type != nir_instr_type_intrinsic) return; @@ -121,6 +121,63 @@ rewrite_references(nir_instr *instr, /* There's no need to update writemasks; it's a scalar array. */ } +static void +rewrite_clip_cull_deref(nir_builder *b, +nir_deref_instr *deref, +const struct glsl_type *type, +unsigned tail_offset) +{ + deref->type = type; + + if (glsl_type_is_array(type)) { + const struct glsl_type *child_type = glsl_get_array_element(type); + nir_foreach_use(src, &deref->dest.ssa) { + rewrite_clip_cull_deref(b, nir_instr_as_deref(src->parent_instr), + child_type, tail_offset); + } + } else { + assert(glsl_type_is_scalar(type)); + + /* This is the end of the line. Add the tail offset if needed */ + if (tail_offset > 0) { + b->cursor = nir_before_instr(&deref->instr); + assert(deref->deref_type == nir_deref_type_array); + nir_ssa_def *index = nir_iadd(b, deref->arr.index.ssa, + nir_imm_int(b, tail_offset)); + nir_instr_rewrite_src(&deref->instr, &deref->arr.index, + nir_src_for_ssa(index)); + } + } +} + +static void +rewrite_references(nir_builder *b, + nir_instr *instr, + nir_variable *combined, + unsigned cull_offset) +{ + if (instr->type != nir_instr_type_deref) + return; + + nir_deref_instr *deref = nir_instr_as_deref(instr); + if (deref->deref_type != nir_deref_type_var) + return; + + if (deref->var->data.mode != combined->data.mode) + return; + + const unsigned location = deref->var->data.location; + if (location != VARYING_SLOT_CLIP_DIST0 && + location != VARYING_SLOT_CULL_DIST0) + return; + + deref->var = combined; + if (location == VARYING_SLOT_CULL_DIST0) + rewrite_clip_cull_deref(b, deref, combined->type, cull_offset); + else + rewrite_clip_cull_deref(b, deref, combined->type, 0); +} + static bool combine_clip_cull(nir_shader *nir, struct exec_list *vars, @@ -163,9 +220,13 @@ combine_clip_cull(nir_shader *nir, /* Rewrite CullDistance to reference the combined array */ nir_foreach_function(function, nir) { if (function->impl) { + nir_builder b; + nir_builder_init(&b, function->impl); + nir_foreach_block(block, function->impl) { nir_foreach_instr(instr, block) { - rewrite_references(instr, clip, clip_array_size); + rewrite_var_references(instr, clip, clip_array_size); + rewrite_references(&b, instr, clip, clip_array_size); } } } -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC v1 33/38] nir: Support deref instructions in lower_atomics
--- src/compiler/nir/nir_lower_atomics.c | 115 +-- 1 file changed, 110 insertions(+), 5 deletions(-) diff --git a/src/compiler/nir/nir_lower_atomics.c b/src/compiler/nir/nir_lower_atomics.c index 2287517..9cf6608 100644 --- a/src/compiler/nir/nir_lower_atomics.c +++ b/src/compiler/nir/nir_lower_atomics.c @@ -27,6 +27,7 @@ #include "compiler/glsl/ir_uniform.h" #include "nir.h" +#include "nir_builder.h" #include "main/config.h" #include @@ -36,9 +37,9 @@ */ static bool -lower_instr(nir_intrinsic_instr *instr, -const struct gl_shader_program *shader_program, -nir_shader *shader) +lower_var_instr(nir_intrinsic_instr *instr, +const struct gl_shader_program *shader_program, +nir_shader *shader) { nir_intrinsic_op op; switch (instr->intrinsic) { @@ -172,6 +173,103 @@ lower_instr(nir_intrinsic_instr *instr, return true; } +static bool +lower_deref_instr(nir_builder *b, nir_intrinsic_instr *instr, + const struct gl_shader_program *shader_program, + nir_shader *shader) +{ + nir_intrinsic_op op; + switch (instr->intrinsic) { + case nir_intrinsic_atomic_counter_read_deref: + op = nir_intrinsic_atomic_counter_read; + break; + + case nir_intrinsic_atomic_counter_inc_deref: + op = nir_intrinsic_atomic_counter_inc; + break; + + case nir_intrinsic_atomic_counter_dec_deref: + op = nir_intrinsic_atomic_counter_dec; + break; + + case nir_intrinsic_atomic_counter_add_deref: + op = nir_intrinsic_atomic_counter_add; + break; + + case nir_intrinsic_atomic_counter_min_deref: + op = nir_intrinsic_atomic_counter_min; + break; + + case nir_intrinsic_atomic_counter_max_deref: + op = nir_intrinsic_atomic_counter_max; + break; + + case nir_intrinsic_atomic_counter_and_deref: + op = nir_intrinsic_atomic_counter_and; + break; + + case nir_intrinsic_atomic_counter_or_deref: + op = nir_intrinsic_atomic_counter_or; + break; + + case nir_intrinsic_atomic_counter_xor_deref: + op = nir_intrinsic_atomic_counter_xor; + break; + + case nir_intrinsic_atomic_counter_exchange_deref: + op = nir_intrinsic_atomic_counter_exchange; + break; + + case nir_intrinsic_atomic_counter_comp_swap_deref: + op = nir_intrinsic_atomic_counter_comp_swap; + break; + + default: + return false; + } + + nir_deref_instr *deref = nir_src_as_deref(instr->src[0]); + nir_variable *var = nir_deref_instr_get_variable(deref); + + if (var->data.mode != nir_var_uniform && + var->data.mode != nir_var_shader_storage && + var->data.mode != nir_var_shared) + return false; /* atomics passed as function arguments can't be lowered */ + + const unsigned uniform_loc = var->data.location; + const unsigned base = + shader_program->data->UniformStorage[uniform_loc].opaque[shader->info.stage].index; + + b->cursor = nir_before_instr(&instr->instr); + + nir_ssa_def *offset = nir_imm_int(b, var->data.offset); + for (nir_deref_instr *d = deref; d->deref_type != nir_deref_type_var; +d = nir_deref_instr_parent(d)) { + assert(d->deref_type == nir_deref_type_array); + assert(d->arr.index.is_ssa); + + unsigned array_stride = ATOMIC_COUNTER_SIZE; + if (glsl_type_is_array(d->type)) + array_stride *= glsl_get_aoa_size(d->type); + + offset = nir_iadd(b, offset, nir_imul(b, d->arr.index.ssa, +nir_imm_int(b, array_stride))); + } + + /* Since the first source is a deref and the first source in the lowered +* instruction is the offset, we can just swap it out and change the +* opcode. +*/ + instr->intrinsic = op; + nir_instr_rewrite_src(&instr->instr, &instr->src[0], + nir_src_for_ssa(offset)); + nir_intrinsic_set_base(instr, base); + + nir_deref_instr_cleanup(deref); + + return true; +} + bool nir_lower_atomics(nir_shader *shader, const struct gl_shader_program *shader_program) @@ -184,13 +282,20 @@ nir_lower_atomics(nir_shader *shader, bool impl_progress = false; + nir_builder build; + nir_builder_init(&build, function->impl); + nir_foreach_block(block, function->impl) { nir_foreach_instr_safe(instr, block) { if (instr->type != nir_instr_type_intrinsic) continue; -impl_progress |= lower_instr(nir_instr_as_intrinsic(instr), - shader_program, shader); +impl_progress |= lower_var_instr(nir_instr_as_intrinsic(instr), + shader_program, shader); + +impl_progress |= lower_deref_instr(&build, + nir_instr_as_intrinsic(instr), + shader_program, shader);
[Mesa-dev] [RFC v1 27/38] nir/deref: Add a deref cleanup function
Sometimes it's useful for a pass to be able to clean up its own derefs instead of waiting for DCE. This little helper makes it very easy. --- src/compiler/nir/nir.h | 2 ++ src/compiler/nir/nir_deref.c | 13 + 2 files changed, 15 insertions(+) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 496b123..b99dd9b 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -1018,6 +1018,8 @@ nir_deref_instr_get_variable(nir_deref_instr *instr) return instr->var; } +void nir_deref_instr_cleanup(nir_deref_instr *instr); + nir_deref_var * nir_deref_instr_to_deref(nir_deref_instr *instr, void *mem_ctx); diff --git a/src/compiler/nir/nir_deref.c b/src/compiler/nir/nir_deref.c index bf39a12..de45358 100644 --- a/src/compiler/nir/nir_deref.c +++ b/src/compiler/nir/nir_deref.c @@ -72,6 +72,19 @@ nir_deref_path_finish(struct nir_deref_path *path) ralloc_free(path->path); } +void +nir_deref_instr_cleanup(nir_deref_instr *instr) +{ + for (nir_deref_instr *d = instr; d; d = nir_deref_instr_parent(d)) { + /* If anyone is using this deref, leave it alone */ + assert(d->dest.is_ssa); + if (!list_empty(&d->dest.ssa.uses)) + return; + + nir_instr_remove(&d->instr); + } +} + nir_deref_var * nir_deref_instr_to_deref(nir_deref_instr *instr, void *mem_ctx) { -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC v1 26/38] nir: Support deref instructions in lower_indirect_derefs
--- src/compiler/nir/nir_lower_indirect_derefs.c | 156 +++ 1 file changed, 156 insertions(+) diff --git a/src/compiler/nir/nir_lower_indirect_derefs.c b/src/compiler/nir/nir_lower_indirect_derefs.c index 02f202d..ebeb79b 100644 --- a/src/compiler/nir/nir_lower_indirect_derefs.c +++ b/src/compiler/nir/nir_lower_indirect_derefs.c @@ -23,6 +23,7 @@ #include "nir.h" #include "nir_builder.h" +#include "nir_deref.h" static void emit_load_store(nir_builder *b, nir_intrinsic_instr *orig_instr, @@ -184,6 +185,160 @@ lower_indirect_block(nir_block *block, nir_builder *b, return progress; } +static void +emit_load_store_deref(nir_builder *b, nir_intrinsic_instr *orig_instr, + nir_deref_instr *parent, + nir_deref_instr **deref_arr, + nir_ssa_def **dest, nir_ssa_def *src); + +static void +emit_indirect_load_store_deref(nir_builder *b, nir_intrinsic_instr *orig_instr, + nir_deref_instr *parent, + nir_deref_instr **deref_arr, + int start, int end, + nir_ssa_def **dest, nir_ssa_def *src) +{ + assert(start < end); + if (start == end - 1) { + nir_ssa_def *index = nir_imm_int(b, start); + emit_load_store_deref(b, orig_instr, +nir_build_deref_array(b, parent, index), +deref_arr + 1, dest, src); + } else { + int mid = start + (end - start) / 2; + + nir_ssa_def *then_dest, *else_dest; + + nir_deref_instr *deref = *deref_arr; + assert(deref->deref_type == nir_deref_type_array); + + nir_push_if(b, nir_ilt(b, deref->arr.index.ssa, nir_imm_int(b, mid))); + emit_indirect_load_store_deref(b, orig_instr, parent, deref_arr, + start, mid, &then_dest, src); + nir_push_else(b, NULL); + emit_indirect_load_store_deref(b, orig_instr, parent, deref_arr, + mid, end, &else_dest, src); + nir_pop_if(b, NULL); + + if (src == NULL) + *dest = nir_if_phi(b, then_dest, else_dest); + } +} + +static void +emit_load_store_deref(nir_builder *b, nir_intrinsic_instr *orig_instr, + nir_deref_instr *parent, + nir_deref_instr **deref_arr, + nir_ssa_def **dest, nir_ssa_def *src) +{ + for (; *deref_arr; deref_arr++) { + nir_deref_instr *deref = *deref_arr; + if (deref->deref_type == nir_deref_type_array && + nir_src_as_const_value(deref->arr.index) == NULL) { + int length = glsl_get_length(parent->type); + + emit_indirect_load_store_deref(b, orig_instr, parent, deref_arr, +0, length, dest, src); + return; + } + + parent = nir_build_deref_follower(b, parent, deref); + } + + /* We reached the end of the deref chain. Emit the instruction */ + assert(*deref_arr == NULL); + + if (src == NULL) { + /* This is a load instruction */ + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b->shader, orig_instr->intrinsic); + load->num_components = orig_instr->num_components; + + load->src[0] = nir_src_for_ssa(&parent->dest.ssa); + + /* Copy over any other sources. This is needed for interp_deref_at */ + for (unsigned i = 1; + i < nir_intrinsic_infos[orig_instr->intrinsic].num_srcs; i++) + nir_src_copy(&load->src[i], &orig_instr->src[i], load); + + nir_ssa_dest_init(&load->instr, &load->dest, +orig_instr->dest.ssa.num_components, +orig_instr->dest.ssa.bit_size, NULL); + nir_builder_instr_insert(b, &load->instr); + *dest = &load->dest.ssa; + } else { + assert(orig_instr->intrinsic == nir_intrinsic_store_deref); + nir_store_deref(b, parent, src, nir_intrinsic_write_mask(orig_instr)); + } +} + +static bool +lower_indirect_derefs_block(nir_block *block, nir_builder *b, +nir_variable_mode modes) +{ + bool progress = false; + + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic != nir_intrinsic_load_deref && + intrin->intrinsic != nir_intrinsic_interp_deref_at_centroid && + intrin->intrinsic != nir_intrinsic_interp_deref_at_sample && + intrin->intrinsic != nir_intrinsic_interp_deref_at_offset && + intrin->intrinsic != nir_intrinsic_store_deref) + continue; + + nir_deref_instr *deref = + nir_instr_as_deref(intrin->src[0].ssa->parent_instr); + + /* Walk the deref chain back to the base and look for indirects */ + bool has_indirect = false; + nir_deref_instr *base = deref; + while (base->deref_type
[Mesa-dev] [RFC v1 32/38] nir: Support deref instructions in lower_io
--- src/compiler/nir/nir_lower_io.c | 70 + 1 file changed, 50 insertions(+), 20 deletions(-) diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c index df91feb..549583d 100644 --- a/src/compiler/nir/nir_lower_io.c +++ b/src/compiler/nir/nir_lower_io.c @@ -35,6 +35,7 @@ #include "nir_builder.h" struct lower_io_state { + void *dead_ctx; nir_builder builder; int (*type_size)(const struct glsl_type *type); nir_variable_mode modes; @@ -156,11 +157,10 @@ get_io_offset(nir_builder *b, nir_deref_var *deref, static nir_intrinsic_instr * lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state, - nir_ssa_def *vertex_index, nir_ssa_def *offset, + nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset, unsigned component) { const nir_shader *nir = state->builder.shader; - nir_variable *var = intrin->variables[0]->var; nir_variable_mode mode = var->data.mode; nir_ssa_def *barycentric = NULL; @@ -229,10 +229,9 @@ lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state, static nir_intrinsic_instr * lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state, -nir_ssa_def *vertex_index, nir_ssa_def *offset, +nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset, unsigned component) { - nir_variable *var = intrin->variables[0]->var; nir_variable_mode mode = var->data.mode; nir_intrinsic_op op; @@ -248,7 +247,10 @@ lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state, nir_intrinsic_instr_create(state->builder.shader, op); store->num_components = intrin->num_components; - nir_src_copy(&store->src[0], &intrin->src[0], store); + if (intrin->intrinsic == nir_intrinsic_store_var) + nir_src_copy(&store->src[0], &intrin->src[0], store); + else + nir_src_copy(&store->src[0], &intrin->src[1], store); nir_intrinsic_set_base(store, var->data.driver_location); @@ -267,10 +269,8 @@ lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state, static nir_intrinsic_instr * lower_atomic(nir_intrinsic_instr *intrin, struct lower_io_state *state, - nir_ssa_def *offset) + nir_variable *var, nir_ssa_def *offset) { - nir_variable *var = intrin->variables[0]->var; - assert(var->data.mode == nir_var_shared); nir_intrinsic_op op; @@ -306,27 +306,28 @@ lower_atomic(nir_intrinsic_instr *intrin, struct lower_io_state *state, static nir_intrinsic_instr * lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state, - nir_ssa_def *offset, unsigned component) + nir_variable *var, nir_ssa_def *offset, unsigned component) { - nir_variable *var = intrin->variables[0]->var; - assert(var->data.mode == nir_var_shader_in); /* Ignore interpolateAt() for flat variables - flat is flat. */ if (var->data.interpolation == INTERP_MODE_FLAT) - return lower_load(intrin, state, NULL, offset, component); + return lower_load(intrin, state, NULL, var, offset, component); nir_intrinsic_op bary_op; switch (intrin->intrinsic) { case nir_intrinsic_interp_var_at_centroid: + case nir_intrinsic_interp_deref_at_centroid: bary_op = (state->options & nir_lower_io_force_sample_interpolation) ? nir_intrinsic_load_barycentric_sample : nir_intrinsic_load_barycentric_centroid; break; case nir_intrinsic_interp_var_at_sample: + case nir_intrinsic_interp_deref_at_sample: bary_op = nir_intrinsic_load_barycentric_at_sample; break; case nir_intrinsic_interp_var_at_offset: + case nir_intrinsic_interp_deref_at_offset: bary_op = nir_intrinsic_load_barycentric_at_offset; break; default: @@ -339,9 +340,14 @@ lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state, nir_ssa_dest_init(&bary_setup->instr, &bary_setup->dest, 2, 32, NULL); nir_intrinsic_set_interp_mode(bary_setup, var->data.interpolation); - if (intrin->intrinsic != nir_intrinsic_interp_var_at_centroid) + if (intrin->intrinsic == nir_intrinsic_interp_var_at_sample || + intrin->intrinsic == nir_intrinsic_interp_var_at_offset) nir_src_copy(&bary_setup->src[0], &intrin->src[0], bary_setup); + if (intrin->intrinsic == nir_intrinsic_interp_deref_at_sample || + intrin->intrinsic == nir_intrinsic_interp_deref_at_offset) + nir_src_copy(&bary_setup->src[0], &intrin->src[1], bary_setup); + nir_builder_instr_insert(&state->builder, &bary_setup->instr); nir_intrinsic_instr *load = @@ -374,7 +380,9 @@ nir_lower_io_block(nir_block *block, switch (intrin->intrinsic) { case nir_intrinsic_load_var: + case nir_intrinsic_load_deref: case nir_intrinsic_store_var: + case nir_intrinsic_store_deref:
[Mesa-dev] [RFC v1 25/38] nir: Support deref instructions in lower_vars_to_ssa
--- src/compiler/nir/nir_lower_vars_to_ssa.c | 75 1 file changed, 58 insertions(+), 17 deletions(-) diff --git a/src/compiler/nir/nir_lower_vars_to_ssa.c b/src/compiler/nir/nir_lower_vars_to_ssa.c index 0cc6514..403ce26 100644 --- a/src/compiler/nir/nir_lower_vars_to_ssa.c +++ b/src/compiler/nir/nir_lower_vars_to_ssa.c @@ -357,11 +357,37 @@ deref_may_be_aliased(nir_deref_var *deref, &deref->deref, state); } +static struct deref_node * +get_deref_node_for_instr(nir_intrinsic_instr *instr, unsigned idx, + struct lower_variables_state *state) +{ + switch (instr->intrinsic) { + case nir_intrinsic_load_var: + case nir_intrinsic_store_var: + case nir_intrinsic_copy_var: + return get_deref_node(instr->variables[idx], state); + + case nir_intrinsic_load_deref: + case nir_intrinsic_store_deref: + case nir_intrinsic_copy_deref: { + assert(instr->src[idx].is_ssa); + nir_deref_instr *deref_instr = + nir_instr_as_deref(instr->src[idx].ssa->parent_instr); + nir_deref_var *deref_var = + nir_deref_instr_to_deref(deref_instr, state->dead_ctx); + return get_deref_node(deref_var, state); + } + + default: + unreachable("Unhanded instruction type"); + } +} + static void register_load_instr(nir_intrinsic_instr *load_instr, struct lower_variables_state *state) { - struct deref_node *node = get_deref_node(load_instr->variables[0], state); + struct deref_node *node = get_deref_node_for_instr(load_instr, 0, state); if (node == NULL) return; @@ -376,7 +402,7 @@ static void register_store_instr(nir_intrinsic_instr *store_instr, struct lower_variables_state *state) { - struct deref_node *node = get_deref_node(store_instr->variables[0], state); + struct deref_node *node = get_deref_node_for_instr(store_instr, 0, state); if (node == NULL) return; @@ -393,8 +419,7 @@ register_copy_instr(nir_intrinsic_instr *copy_instr, { for (unsigned idx = 0; idx < 2; idx++) { struct deref_node *node = - get_deref_node(copy_instr->variables[idx], state); - + get_deref_node_for_instr(copy_instr, idx, state); if (node == NULL) continue; @@ -419,14 +444,17 @@ register_variable_uses_block(nir_block *block, switch (intrin->intrinsic) { case nir_intrinsic_load_var: + case nir_intrinsic_load_deref: register_load_instr(intrin, state); break; case nir_intrinsic_store_var: + case nir_intrinsic_store_deref: register_store_instr(intrin, state); break; case nir_intrinsic_copy_var: + case nir_intrinsic_copy_deref: register_copy_instr(intrin, state); break; @@ -448,15 +476,20 @@ lower_copies_to_load_store(struct deref_node *node, if (!node->copies) return true; + nir_builder b; + nir_builder_init(&b, state->impl); + struct set_entry *copy_entry; set_foreach(node->copies, copy_entry) { nir_intrinsic_instr *copy = (void *)copy_entry->key; - nir_lower_var_copy_instr(copy, state->shader); + if (copy->intrinsic == nir_intrinsic_copy_var) + nir_lower_var_copy_instr(copy, state->shader); + else + nir_lower_deref_copy_instr(&b, copy); for (unsigned i = 0; i < 2; ++i) { - struct deref_node *arg_node = -get_deref_node(copy->variables[i], state); + struct deref_node *arg_node = get_deref_node_for_instr(copy, i, state); /* Only bother removing copy entries for other nodes */ if (arg_node == NULL || arg_node == node) @@ -496,10 +529,10 @@ rename_variables(struct lower_variables_state *state) nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); switch (intrin->intrinsic) { - case nir_intrinsic_load_var: { + case nir_intrinsic_load_var: + case nir_intrinsic_load_deref: { struct deref_node *node = - get_deref_node(intrin->variables[0], state); - + get_deref_node_for_instr(intrin, 0, state); if (node == NULL) { /* If we hit this path then we are referencing an invalid * value. Most likely, we unrolled something and are @@ -544,9 +577,19 @@ rename_variables(struct lower_variables_state *state) break; } - case nir_intrinsic_store_var: { + case nir_intrinsic_store_var: + case nir_intrinsic_store_deref: { struct deref_node *node = - get_deref_node(intrin->variables[0], state); + get_deref_node_for_instr(intrin, 0, state); + +nir_ssa_def *value; +if (intrin->intrinsic == nir_intrinsic_store_var) { + assert(intrin->src[0].is_ssa); + value = intrin->src[0].ssa; +} else { +
[Mesa-dev] [RFC v1 35/38] nir: Support deref instructions in remove_unused_varyings
--- src/compiler/nir/nir_linking_helpers.c | 50 ++ 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/src/compiler/nir/nir_linking_helpers.c b/src/compiler/nir/nir_linking_helpers.c index 2b0a266..1a0cb91 100644 --- a/src/compiler/nir/nir_linking_helpers.c +++ b/src/compiler/nir/nir_linking_helpers.c @@ -62,29 +62,33 @@ static void tcs_add_output_reads(nir_shader *shader, uint64_t *read, uint64_t *patches_read) { nir_foreach_function(function, shader) { - if (function->impl) { - nir_foreach_block(block, function->impl) { -nir_foreach_instr(instr, block) { - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr *intrin_instr = - nir_instr_as_intrinsic(instr); - if (intrin_instr->intrinsic == nir_intrinsic_load_var && - intrin_instr->variables[0]->var->data.mode == - nir_var_shader_out) { - - nir_variable *var = intrin_instr->variables[0]->var; - if (var->data.patch) { - patches_read[var->data.location_frac] |= -get_variable_io_mask(intrin_instr->variables[0]->var, - shader->info.stage); - } else { - read[var->data.location_frac] |= -get_variable_io_mask(intrin_instr->variables[0]->var, - shader->info.stage); - } - } + if (!function->impl) + continue; + + nir_foreach_block(block, function->impl) { + nir_foreach_instr(instr, block) { +if (instr->type != nir_instr_type_intrinsic) + continue; + +nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); +nir_variable *var; +if (intrin->intrinsic == nir_intrinsic_load_var) { + var = intrin->variables[0]->var; +} else if (intrin->intrinsic == nir_intrinsic_load_deref) { + var = nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[0])); +} else { + continue; +} + +if (var->data.mode != nir_var_shader_out) + continue; + +if (var->data.patch) { + patches_read[var->data.location_frac] |= + get_variable_io_mask(var, shader->info.stage); +} else { + read[var->data.location_frac] |= + get_variable_io_mask(var, shader->info.stage); } } } -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC v1 38/38] i965: Move nir_lower_deref_instrs to right before locals_to_regs
--- src/intel/compiler/brw_nir.c| 2 ++ src/mesa/drivers/dri/i965/brw_program.c | 3 +-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index cf994ac..4fc6cae 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -740,6 +740,8 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler, OPT(nir_opt_dce); OPT(nir_opt_move_comparisons); + OPT(nir_lower_deref_instrs, ~0); + OPT(nir_lower_locals_to_regs); if (unlikely(debug_enabled)) { diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index a871432..1ad4f74 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -77,7 +77,7 @@ brw_create_nir(struct brw_context *brw, /* First, lower the GLSL IR or Mesa IR to NIR */ if (shader_prog) { nir = glsl_to_nir(shader_prog, stage, options); - nir_lower_deref_instrs(nir, ~0); + nir_lower_deref_instrs(nir, nir_lower_texture_derefs); nir_remove_dead_variables(nir, nir_var_shader_in | nir_var_shader_out); nir_lower_returns(nir); nir_validate_shader(nir); @@ -85,7 +85,6 @@ brw_create_nir(struct brw_context *brw, nir_shader_get_entrypoint(nir), true, false); } else { nir = prog_to_nir(prog, options); - nir_lower_deref_instrs(nir, ~0); NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */ } nir_validate_shader(nir); -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC v1 30/38] nir: Support deref instructions in propagate_invariant
--- src/compiler/nir/nir_propagate_invariant.c | 23 --- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/src/compiler/nir/nir_propagate_invariant.c b/src/compiler/nir/nir_propagate_invariant.c index 7b5bd6c..b48b91c 100644 --- a/src/compiler/nir/nir_propagate_invariant.c +++ b/src/compiler/nir/nir_propagate_invariant.c @@ -74,6 +74,15 @@ var_is_invariant(nir_variable *var, struct set * invariants) return var->data.invariant || _mesa_set_search(invariants, var); } +static nir_variable * +intrinsic_get_var(nir_intrinsic_instr *intrin, unsigned i) +{ + if (nir_intrinsic_infos[intrin->intrinsic].num_variables == 0) + return nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[i])); + else + return intrin->variables[0]->var; +} + static void propagate_invariant_instr(nir_instr *instr, struct set *invariants) { @@ -99,14 +108,16 @@ propagate_invariant_instr(nir_instr *instr, struct set *invariants) nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); switch (intrin->intrinsic) { case nir_intrinsic_copy_var: + case nir_intrinsic_copy_deref: /* If the destination is invariant then so is the source */ - if (var_is_invariant(intrin->variables[0]->var, invariants)) -add_var(intrin->variables[1]->var, invariants); + if (var_is_invariant(intrinsic_get_var(intrin, 0), invariants)) +add_var(intrinsic_get_var(intrin, 1), invariants); break; case nir_intrinsic_load_var: + case nir_intrinsic_load_deref: if (dest_is_invariant(&intrin->dest, invariants)) -add_var(intrin->variables[0]->var, invariants); +add_var(intrinsic_get_var(intrin, 0), invariants); break; case nir_intrinsic_store_var: @@ -114,12 +125,18 @@ propagate_invariant_instr(nir_instr *instr, struct set *invariants) add_src(&intrin->src[0], invariants); break; + case nir_intrinsic_store_deref: + if (var_is_invariant(intrinsic_get_var(intrin, 0), invariants)) +add_src(&intrin->src[1], invariants); + break; + default: /* Nothing to do */ break; } } + case nir_instr_type_deref: case nir_instr_type_jump: case nir_instr_type_ssa_undef: case nir_instr_type_load_const: -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC v1 31/38] nir: Support deref instructions in gather_info
--- src/compiler/nir/nir_gather_info.c | 26 -- 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/src/compiler/nir/nir_gather_info.c b/src/compiler/nir/nir_gather_info.c index 743f968..50d67b6 100644 --- a/src/compiler/nir/nir_gather_info.c +++ b/src/compiler/nir/nir_gather_info.c @@ -219,7 +219,8 @@ try_mask_partial_io(nir_shader *shader, nir_deref_var *deref, bool is_output_rea } static void -gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader) +gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader, + void *dead_ctx) { switch (instr->intrinsic) { case nir_intrinsic_discard: @@ -228,12 +229,22 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader) shader->info.fs.uses_discard = true; break; + case nir_intrinsic_interp_deref_at_centroid: + case nir_intrinsic_interp_deref_at_sample: + case nir_intrinsic_interp_deref_at_offset: case nir_intrinsic_interp_var_at_centroid: case nir_intrinsic_interp_var_at_sample: case nir_intrinsic_interp_var_at_offset: + case nir_intrinsic_load_deref: case nir_intrinsic_load_var: + case nir_intrinsic_store_deref: case nir_intrinsic_store_var: { - nir_variable *var = instr->variables[0]->var; + nir_deref_var *deref; + if (nir_intrinsic_infos[instr->intrinsic].num_variables > 0) + deref = instr->variables[0]; + else + deref = nir_deref_instr_to_deref(nir_src_as_deref(instr->src[0]), dead_ctx); + nir_variable *var = deref->var; if (var->data.mode == nir_var_shader_in || var->data.mode == nir_var_shader_out) { @@ -242,7 +253,7 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader) instr->intrinsic == nir_intrinsic_load_var) is_output_read = true; - if (!try_mask_partial_io(shader, instr->variables[0], is_output_read)) + if (!try_mask_partial_io(shader, deref, is_output_read)) mark_whole_variable(shader, var, is_output_read); /* We need to track which input_reads bits correspond to a @@ -328,7 +339,7 @@ gather_alu_info(nir_alu_instr *instr, nir_shader *shader) } static void -gather_info_block(nir_block *block, nir_shader *shader) +gather_info_block(nir_block *block, nir_shader *shader, void *dead_ctx) { nir_foreach_instr(instr, block) { switch (instr->type) { @@ -336,7 +347,7 @@ gather_info_block(nir_block *block, nir_shader *shader) gather_alu_info(nir_instr_as_alu(instr), shader); break; case nir_instr_type_intrinsic: - gather_intrinsic_info(nir_instr_as_intrinsic(instr), shader); + gather_intrinsic_info(nir_instr_as_intrinsic(instr), shader, dead_ctx); break; case nir_instr_type_tex: gather_tex_info(nir_instr_as_tex(instr), shader); @@ -384,7 +395,10 @@ nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint) if (shader->info.stage == MESA_SHADER_FRAGMENT) { shader->info.fs.uses_sample_qualifier = false; } + + void *dead_ctx = ralloc_context(NULL); nir_foreach_block(block, entrypoint) { - gather_info_block(block, shader); + gather_info_block(block, shader, dead_ctx); } + ralloc_free(dead_ctx); } -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC v1 21/38] nir: Support deref instructions in lower_io_to_temporaries
--- src/compiler/nir/nir_lower_io_to_temporaries.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/compiler/nir/nir_lower_io_to_temporaries.c b/src/compiler/nir/nir_lower_io_to_temporaries.c index 301ba65..7ba66ba 100644 --- a/src/compiler/nir/nir_lower_io_to_temporaries.c +++ b/src/compiler/nir/nir_lower_io_to_temporaries.c @@ -198,4 +198,6 @@ nir_lower_io_to_temporaries(nir_shader *shader, nir_function_impl *entrypoint, exec_list_append(&shader->globals, &state.old_inputs); exec_list_append(&shader->globals, &state.old_outputs); + + nir_fixup_deref_modes(shader); } -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC v1 28/38] nir: Support deref instructions in lower_system_values
--- src/compiler/nir/nir_lower_system_values.c | 13 ++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/compiler/nir/nir_lower_system_values.c b/src/compiler/nir/nir_lower_system_values.c index fb560ee..104df51 100644 --- a/src/compiler/nir/nir_lower_system_values.c +++ b/src/compiler/nir/nir_lower_system_values.c @@ -39,10 +39,15 @@ convert_block(nir_block *block, nir_builder *b) nir_intrinsic_instr *load_var = nir_instr_as_intrinsic(instr); - if (load_var->intrinsic != nir_intrinsic_load_var) - continue; + nir_variable *var; + if (load_var->intrinsic == nir_intrinsic_load_var) { + var = load_var->variables[0]->var; + } else if (load_var->intrinsic == nir_intrinsic_load_deref) { + var = nir_deref_instr_get_variable(nir_src_as_deref(load_var->src[0])); + } else { + continue; /* Not a load instruction */ + } - nir_variable *var = load_var->variables[0]->var; if (var->data.mode != nir_var_system_value) continue; @@ -150,6 +155,8 @@ convert_block(nir_block *block, nir_builder *b) nir_ssa_def_rewrite_uses(&load_var->dest.ssa, nir_src_for_ssa(sysval)); nir_instr_remove(&load_var->instr); + if (load_var->intrinsic == nir_intrinsic_load_deref) + nir_deref_instr_cleanup(nir_src_as_deref(load_var->src[0])); progress = true; } -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC v1 24/38] nir: Support deref instructions in split_var_copies
--- src/compiler/nir/nir_split_var_copies.c | 42 + 1 file changed, 42 insertions(+) diff --git a/src/compiler/nir/nir_split_var_copies.c b/src/compiler/nir/nir_split_var_copies.c index bc3ceed..bcd1f10 100644 --- a/src/compiler/nir/nir_split_var_copies.c +++ b/src/compiler/nir/nir_split_var_copies.c @@ -26,6 +26,7 @@ */ #include "nir.h" +#include "nir_builder.h" /* * Implements "copy splitting" which is similar to structure splitting only @@ -259,6 +260,25 @@ split_var_copies_block(nir_block *block, struct split_var_copies_state *state) return true; } +static void +split_deref_copy_instr(nir_builder *b, + nir_deref_instr *dst, nir_deref_instr *src) +{ + assert(dst->type == src->type); + if (glsl_type_is_vector_or_scalar(src->type)) { + nir_copy_deref(b, dst, src); + } else if (glsl_type_is_struct(src->type)) { + for (unsigned i = 0; i < glsl_get_length(src->type); i++) { + split_deref_copy_instr(b, nir_build_deref_struct(b, dst, i), + nir_build_deref_struct(b, src, i)); + } + } else { + assert(glsl_type_is_matrix(src->type) || glsl_type_is_array(src->type)); + split_deref_copy_instr(b, nir_build_deref_array_wildcard(b, dst), +nir_build_deref_array_wildcard(b, src)); + } +} + static bool split_var_copies_impl(nir_function_impl *impl) { @@ -268,8 +288,30 @@ split_var_copies_impl(nir_function_impl *impl) state.dead_ctx = ralloc_context(NULL); state.progress = false; + nir_builder b; + nir_builder_init(&b, impl); + nir_foreach_block(block, impl) { split_var_copies_block(block, &state); + + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) +continue; + + nir_intrinsic_instr *copy = nir_instr_as_intrinsic(instr); + if (copy->intrinsic != nir_intrinsic_copy_deref) +continue; + + b.cursor = nir_instr_remove(©->instr); + + nir_deref_instr *dst = +nir_instr_as_deref(copy->src[0].ssa->parent_instr); + nir_deref_instr *src = +nir_instr_as_deref(copy->src[1].ssa->parent_instr); + split_deref_copy_instr(&b, dst, src); + + state.progress = true; + } } ralloc_free(state.dead_ctx); -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC v1 12/38] anv, i965, radv, st, ir3: Call nir_lower_deref_instrs
This inserts a call to nir_lower_deref_instrs at every call site of glsl_to_nir, spirv_to_nir, and prog_to_nir. --- src/amd/vulkan/radv_shader.c| 2 ++ src/gallium/drivers/freedreno/ir3/ir3_cmdline.c | 3 +++ src/intel/vulkan/anv_pipeline.c | 2 ++ src/mesa/drivers/dri/i965/brw_program.c | 2 ++ src/mesa/state_tracker/st_glsl_to_nir.cpp | 1 + 5 files changed, 10 insertions(+) diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 180b427..482372e 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -228,6 +228,8 @@ radv_shader_compile_to_nir(struct radv_device *device, free(spec_entries); + NIR_PASS_V(nir, nir_lower_deref_instrs, ~0); + /* We have to lower away local constant initializers right before we * inline functions. That way they get properly initialized at the top * of the function and not at the top of its caller. diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c index 41bd1de..07e97c3 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c @@ -112,6 +112,7 @@ load_glsl(unsigned num_files, char* const* files, gl_shader_stage stage) errx(1, "couldn't parse `%s'", files[0]); nir_shader *nir = glsl_to_nir(prog, stage, ir3_get_compiler_options(compiler)); + nir_lower_deref_instrs(nir, ~0); /* required NIR passes: */ /* TODO cmdline args for some of the conditional lowering passes? */ @@ -231,6 +232,8 @@ load_spirv(const char *filename, const char *entry, gl_shader_stage stage) &spirv_options, ir3_get_compiler_options(compiler)); + NIR_PASS_V(entry_point->shader, nir_lower_deref_instrs, ~0); + nir_print_shader(entry_point->shader, stdout); return entry_point->shader; diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 9cfd16d..a4c0415 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -170,6 +170,8 @@ anv_shader_compile_to_nir(struct anv_pipeline *pipeline, nir_print_shader(nir, stderr); } + NIR_PASS_V(nir, nir_lower_deref_instrs, ~0); + /* We have to lower away local constant initializers right before we * inline functions. That way they get properly initialized at the top * of the function and not at the top of its caller. diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 4ba46a3..a871432 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -77,6 +77,7 @@ brw_create_nir(struct brw_context *brw, /* First, lower the GLSL IR or Mesa IR to NIR */ if (shader_prog) { nir = glsl_to_nir(shader_prog, stage, options); + nir_lower_deref_instrs(nir, ~0); nir_remove_dead_variables(nir, nir_var_shader_in | nir_var_shader_out); nir_lower_returns(nir); nir_validate_shader(nir); @@ -84,6 +85,7 @@ brw_create_nir(struct brw_context *brw, nir_shader_get_entrypoint(nir), true, false); } else { nir = prog_to_nir(prog, options); + nir_lower_deref_instrs(nir, ~0); NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */ } nir_validate_shader(nir); diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp index 0bd9c4e..c7ed386 100644 --- a/src/mesa/state_tracker/st_glsl_to_nir.cpp +++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp @@ -312,6 +312,7 @@ st_glsl_to_nir(struct st_context *st, struct gl_program *prog, return prog->nir; nir_shader *nir = glsl_to_nir(shader_program, stage, options); + nir_lower_deref_instrs(nir, (nir_lower_deref_flags)~0); nir_variable_mode mask = (nir_variable_mode) (nir_var_shader_in | nir_var_shader_out); -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC v1 20/38] nir: Support deref instructions in lower_global_vars_to_local
--- src/compiler/nir/nir_lower_global_vars_to_local.c | 62 +++ 1 file changed, 42 insertions(+), 20 deletions(-) diff --git a/src/compiler/nir/nir_lower_global_vars_to_local.c b/src/compiler/nir/nir_lower_global_vars_to_local.c index c8fdfde..14aa366 100644 --- a/src/compiler/nir/nir_lower_global_vars_to_local.c +++ b/src/compiler/nir/nir_lower_global_vars_to_local.c @@ -32,31 +32,50 @@ #include "nir.h" +static void +register_var_use(nir_variable *var, nir_function_impl *impl, + struct hash_table *var_func_table) +{ + if (var->data.mode != nir_var_global) + return; + + struct hash_entry *entry = + _mesa_hash_table_search(var_func_table, var); + + if (entry) { + if (entry->data != impl) + entry->data = NULL; + } else { + _mesa_hash_table_insert(var_func_table, var, impl); + } +} + static bool mark_global_var_uses_block(nir_block *block, nir_function_impl *impl, struct hash_table *var_func_table) { nir_foreach_instr(instr, block) { - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - unsigned num_vars = nir_intrinsic_infos[intrin->intrinsic].num_variables; - - for (unsigned i = 0; i < num_vars; i++) { - nir_variable *var = intrin->variables[i]->var; - if (var->data.mode != nir_var_global) -continue; - - struct hash_entry *entry = -_mesa_hash_table_search(var_func_table, var); - - if (entry) { -if (entry->data != impl) - entry->data = NULL; - } else { -_mesa_hash_table_insert(var_func_table, var, impl); - } + switch (instr->type) { + case nir_instr_type_deref: { + nir_deref_instr *deref = nir_instr_as_deref(instr); + if (deref->deref_type == nir_deref_type_var) +register_var_use(deref->var, impl, var_func_table); + break; + } + + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + unsigned num_vars = +nir_intrinsic_infos[intrin->intrinsic].num_variables; + + for (unsigned i = 0; i < num_vars; i++) +register_var_use(intrin->variables[i]->var, impl, var_func_table); + break; + } + + default: + /* Nothing to do */ + break; } } @@ -103,5 +122,8 @@ nir_lower_global_vars_to_local(nir_shader *shader) _mesa_hash_table_destroy(var_func_table, NULL); + if (progress) + nir_fixup_deref_modes(shader); + return progress; } -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC v1 23/38] nir: Support deref instructions in lower_var_copies
--- src/compiler/nir/nir.h | 3 ++ src/compiler/nir/nir_builder.h | 48 ++ src/compiler/nir/nir_lower_var_copies.c | 90 +++-- 3 files changed, 138 insertions(+), 3 deletions(-) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 7f5c7e9..496b123 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -81,6 +81,7 @@ name(const in_type *parent) \ struct nir_function; struct nir_shader; struct nir_instr; +struct nir_builder; /** @@ -2606,6 +2607,8 @@ bool nir_lower_deref_instrs(nir_shader *shader, enum nir_lower_deref_flags flags); void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, nir_shader *shader); +void nir_lower_deref_copy_instr(struct nir_builder *b, +nir_intrinsic_instr *copy); bool nir_lower_var_copies(nir_shader *shader); void nir_fixup_deref_modes(nir_shader *shader); diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h index 6667d52..14919e7 100644 --- a/src/compiler/nir/nir_builder.h +++ b/src/compiler/nir/nir_builder.h @@ -641,6 +641,54 @@ nir_build_deref_for_chain(nir_builder *b, nir_deref_var *deref_var) return tail; } +/** Returns a deref that follows another but starting from the given parent + * + * The new deref will be the same type and take the same array or struct index + * as the leader deref but it may have a different parent. This is very + * useful for walking deref paths. + */ +static inline nir_deref_instr * +nir_build_deref_follower(nir_builder *b, nir_deref_instr *parent, + nir_deref_instr *leader) +{ + /* If the derefs would have the same parent, don't make a new one */ + assert(leader->parent.is_ssa); + if (leader->parent.ssa == &parent->dest.ssa) + return leader; + + UNUSED nir_deref_instr *leader_parent = nir_src_as_deref(leader->parent); + + switch (leader->deref_type) { + case nir_deref_type_var: + unreachable("A var dereference cannot have a parent"); + break; + + case nir_deref_type_array: + case nir_deref_type_array_wildcard: + assert(glsl_type_is_matrix(parent->type) || + glsl_type_is_array(parent->type)); + assert(glsl_get_length(parent->type) == + glsl_get_length(leader_parent->type)); + + if (leader->deref_type == nir_deref_type_array) { + assert(leader->arr.index.is_ssa); + return nir_build_deref_array(b, parent, leader->arr.index.ssa); + } else { + return nir_build_deref_array_wildcard(b, parent); + } + + case nir_deref_type_struct: + assert(glsl_type_is_struct(parent->type)); + assert(glsl_get_length(parent->type) == + glsl_get_length(leader_parent->type)); + + return nir_build_deref_struct(b, parent, leader->strct.index); + + default: + unreachable("Invalid deref instruction type"); + } +} + static inline nir_ssa_def * nir_load_deref(nir_builder *build, nir_deref_instr *deref) { diff --git a/src/compiler/nir/nir_lower_var_copies.c b/src/compiler/nir/nir_lower_var_copies.c index 6288bdc..efe5c67 100644 --- a/src/compiler/nir/nir_lower_var_copies.c +++ b/src/compiler/nir/nir_lower_var_copies.c @@ -26,6 +26,8 @@ */ #include "nir.h" +#include "nir_builder.h" +#include "nir_deref.h" #include "compiler/nir_types.h" /* @@ -154,23 +156,105 @@ nir_lower_var_copy_instr(nir_intrinsic_instr *copy, nir_shader *shader) ©->variables[1]->deref, shader); } +static nir_deref_instr * +build_deref_to_next_wildcard(nir_builder *b, + nir_deref_instr *parent, + nir_deref_instr ***deref_arr) +{ + for (; **deref_arr; (*deref_arr)++) { + if ((**deref_arr)->deref_type == nir_deref_type_array_wildcard) + return parent; + + parent = nir_build_deref_follower(b, parent, **deref_arr); + } + + assert(**deref_arr == NULL); + *deref_arr = NULL; + return parent; +} + +static void +emit_deref_copy_load_store(nir_builder *b, + nir_deref_instr *dst_deref, + nir_deref_instr **dst_deref_arr, + nir_deref_instr *src_deref, + nir_deref_instr **src_deref_arr) +{ + if (dst_deref_arr || src_deref_arr) { + assert(dst_deref_arr && src_deref_arr); + dst_deref = build_deref_to_next_wildcard(b, dst_deref, &dst_deref_arr); + src_deref = build_deref_to_next_wildcard(b, src_deref, &src_deref_arr); + } + + if (dst_deref_arr || src_deref_arr) { + assert(dst_deref_arr && src_deref_arr); + assert((*dst_deref_arr)->deref_type == nir_deref_type_array_wildcard); + assert((*src_deref_arr)->deref_type == nir_deref_type_array_wildcard); + + unsigned length = glsl_get_length(src_deref->type); + /* The wildcards should represent the same number of element
[Mesa-dev] [RFC v1 17/38] nir/lower_atomics: Rework the main walker loop a bit
This replaces some "if (...} { }" with "if (...) continue;" to reduce nesting depth and makes nir_metadata_preserve conditional on progress for the given impl. --- src/compiler/nir/nir_lower_atomics.c | 22 +++--- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/src/compiler/nir/nir_lower_atomics.c b/src/compiler/nir/nir_lower_atomics.c index bdab4b8..2287517 100644 --- a/src/compiler/nir/nir_lower_atomics.c +++ b/src/compiler/nir/nir_lower_atomics.c @@ -179,17 +179,25 @@ nir_lower_atomics(nir_shader *shader, bool progress = false; nir_foreach_function(function, shader) { - if (function->impl) { - nir_foreach_block(block, function->impl) { -nir_foreach_instr_safe(instr, block) { - if (instr->type == nir_instr_type_intrinsic) - progress |= lower_instr(nir_instr_as_intrinsic(instr), - shader_program, shader); -} + if (!function->impl) + continue; + + bool impl_progress = false; + + nir_foreach_block(block, function->impl) { + nir_foreach_instr_safe(instr, block) { +if (instr->type != nir_instr_type_intrinsic) + continue; + +impl_progress |= lower_instr(nir_instr_as_intrinsic(instr), + shader_program, shader); } + } + if (impl_progress) { nir_metadata_preserve(function->impl, nir_metadata_block_index | nir_metadata_dominance); + progress = true; } } -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC v1 15/38] prog/nir: Simplify some load/store operations
--- src/compiler/nir/nir_builder.h | 6 ++ src/mesa/program/prog_to_nir.c | 29 ++--- 2 files changed, 12 insertions(+), 23 deletions(-) diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h index f1e52b2..6667d52 100644 --- a/src/compiler/nir/nir_builder.h +++ b/src/compiler/nir/nir_builder.h @@ -525,6 +525,12 @@ nir_ssa_for_alu_src(nir_builder *build, nir_alu_instr *instr, unsigned srcn) return nir_imov_alu(build, *src, num_components); } +static inline nir_ssa_def * +nir_load_reg(nir_builder *build, nir_register *reg) +{ + return nir_ssa_for_src(build, nir_src_for_reg(reg), reg->num_components); +} + static inline nir_deref_instr * nir_build_deref_var(nir_builder *build, nir_variable *var) { diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c index 851b3f2..26dfc37 100644 --- a/src/mesa/program/prog_to_nir.c +++ b/src/mesa/program/prog_to_nir.c @@ -136,15 +136,8 @@ ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src) assert(prog_src->Index >= 0 && prog_src->Index < VARYING_SLOT_MAX); - nir_intrinsic_instr *load = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var); - load->num_components = 4; - load->variables[0] = nir_deref_var_create(load, c->input_vars[prog_src->Index]); - - nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL); - nir_builder_instr_insert(b, &load->instr); - - src.src = nir_src_for_ssa(&load->dest.ssa); + nir_variable *var = c->input_vars[prog_src->Index]; + src.src = nir_src_for_ssa(nir_load_var(b, var)); break; } case PROGRAM_STATE_VAR: @@ -860,27 +853,17 @@ ptn_add_output_stores(struct ptn_compile *c) nir_builder *b = &c->build; nir_foreach_variable(var, &b->shader->outputs) { - nir_intrinsic_instr *store = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); - store->num_components = glsl_get_vector_elements(var->type); - nir_intrinsic_set_write_mask(store, (1 << store->num_components) - 1); - store->variables[0] = - nir_deref_var_create(store, c->output_vars[var->data.location]); - + nir_ssa_def *src = nir_load_reg(b, c->output_regs[var->data.location]); if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB && var->data.location == FRAG_RESULT_DEPTH) { /* result.depth has this strange convention of being the .z component of * a vec4 with undefined .xyw components. We resolve it to a scalar, to * match GLSL's gl_FragDepth and the expectations of most backends. */ - nir_alu_src alu_src = { NIR_SRC_INIT }; - alu_src.src = nir_src_for_reg(c->output_regs[FRAG_RESULT_DEPTH]); - alu_src.swizzle[0] = SWIZZLE_Z; - store->src[0] = nir_src_for_ssa(nir_fmov_alu(b, alu_src, 1)); - } else { - store->src[0].reg.reg = c->output_regs[var->data.location]; + src = nir_channel(b, src, 2); } - nir_builder_instr_insert(b, &store->instr); + unsigned num_components = glsl_get_vector_elements(var->type); + nir_store_var(b, var, src, (1 << num_components) - 1); } } -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC v1 22/38] nir: Add a deref path helper struct
This commit introduces a new nir_deref.h header for helpers that are less common and really only needed by a few heavy-duty passes. In this header is a new struct for representing a full deref path which can be walked in either direction. --- src/compiler/Makefile.sources | 1 + src/compiler/nir/meson.build | 1 + src/compiler/nir/nir_deref.c | 48 + src/compiler/nir/nir_deref.h | 55 +++ 4 files changed, 105 insertions(+) create mode 100644 src/compiler/nir/nir_deref.h diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources index 6ee357c..3e4d851 100644 --- a/src/compiler/Makefile.sources +++ b/src/compiler/Makefile.sources @@ -196,6 +196,7 @@ NIR_FILES = \ nir/nir_control_flow.h \ nir/nir_control_flow_private.h \ nir/nir_deref.c \ + nir/nir_deref.h \ nir/nir_dominance.c \ nir/nir_from_ssa.c \ nir/nir_gather_info.c \ diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build index 963ef02..53bf816 100644 --- a/src/compiler/nir/meson.build +++ b/src/compiler/nir/meson.build @@ -90,6 +90,7 @@ files_libnir = files( 'nir_control_flow.h', 'nir_control_flow_private.h', 'nir_deref.c', + 'nir_deref.h', 'nir_dominance.c', 'nir_from_ssa.c', 'nir_gather_info.c', diff --git a/src/compiler/nir/nir_deref.c b/src/compiler/nir/nir_deref.c index af5d75f..bf39a12 100644 --- a/src/compiler/nir/nir_deref.c +++ b/src/compiler/nir/nir_deref.c @@ -23,6 +23,54 @@ #include "nir.h" #include "nir_builder.h" +#include "nir_deref.h" + +void +nir_deref_path_init(struct nir_deref_path *path, +nir_deref_instr *deref, void *mem_ctx) +{ + /* The length of the short path is at most ARRAY_SIZE - 1 because we need +* room for the NULL terminator. +*/ + static const int max_short_path_len = ARRAY_SIZE(path->_short_path) - 1; + + int count = 0; + + nir_deref_instr **tail = &path->_short_path[max_short_path_len]; + nir_deref_instr **head = tail; + + *tail = NULL; + for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) { + count++; + if (count <= max_short_path_len) + *(--head) = d; + } + + if (count <= max_short_path_len) { + /* If we're under max_short_path_len, just use the short path. */ + path->path = head; + goto done; + } + + path->path = ralloc_array(mem_ctx, nir_deref_instr *, count + 1); + head = tail = path->path + count; + for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) + *(--head) = d; + +done: + assert(head == path->path); + assert(tail == head + count); + assert((*head)->deref_type == nir_deref_type_var); + assert(*tail == NULL); +} + +void +nir_deref_path_finish(struct nir_deref_path *path) +{ + if (path->path < &path->_short_path[0] || + path->path > &path->_short_path[ARRAY_SIZE(path->_short_path) - 1]) + ralloc_free(path->path); +} nir_deref_var * nir_deref_instr_to_deref(nir_deref_instr *instr, void *mem_ctx) diff --git a/src/compiler/nir/nir_deref.h b/src/compiler/nir/nir_deref.h new file mode 100644 index 000..7597b77 --- /dev/null +++ b/src/compiler/nir/nir_deref.h @@ -0,0 +1,55 @@ +/* + * Copyright © 2018 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef NIR_DEREF_H +#define NIR_DEREF_H + +#include "nir.h" +#include "nir_builder.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct nir_deref_path { + /** Short path so we can keep it on the stack most of the time. */ + nir_deref_instr *_short_path[7]; + + /** A null-terminated array view of a deref chain +* +* The first element of this array will be the variable dereference +* followed by every deref_instr on the path to the final one. The last +* element in the array is a NULL pointer which acts as a terminator. +
[Mesa-dev] [RFC v1 16/38] prog/nir: Use deref instructions for params
--- src/mesa/program/prog_to_nir.c | 36 ++-- 1 file changed, 6 insertions(+), 30 deletions(-) diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c index 26dfc37..14391a3 100644 --- a/src/mesa/program/prog_to_nir.c +++ b/src/mesa/program/prog_to_nir.c @@ -167,38 +167,14 @@ ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src) nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL); load->num_components = 4; - load->variables[0] = nir_deref_var_create(load, c->parameters); - nir_deref_array *deref_arr = -nir_deref_array_create(load->variables[0]); - deref_arr->deref.type = glsl_vec4_type(); - load->variables[0]->deref.child = &deref_arr->deref; - - if (prog_src->RelAddr) { -deref_arr->deref_array_type = nir_deref_array_type_indirect; - -nir_alu_src addr_src = { NIR_SRC_INIT }; -addr_src.src = nir_src_for_reg(c->addr_reg); -nir_ssa_def *reladdr = nir_imov_alu(b, addr_src, 1); - -if (prog_src->Index < 0) { - /* This is a negative offset which should be added to the address -* register's value. -*/ - reladdr = nir_iadd(b, reladdr, nir_imm_int(b, prog_src->Index)); - - deref_arr->base_offset = 0; -} else { - deref_arr->base_offset = prog_src->Index; -} -deref_arr->indirect = nir_src_for_ssa(reladdr); - } else { -deref_arr->deref_array_type = nir_deref_array_type_direct; -deref_arr->base_offset = prog_src->Index; - } + nir_deref_instr *deref = nir_build_deref_var(b, c->parameters); - nir_builder_instr_insert(b, &load->instr); + nir_ssa_def *index = nir_imm_int(b, prog_src->Index); + if (prog_src->RelAddr) +index = nir_iadd(b, index, nir_load_reg(b, c->addr_reg)); + deref = nir_build_deref_array(b, deref, nir_channel(b, index, 0)); - src.src = nir_src_for_ssa(&load->dest.ssa); + src.src = nir_src_for_ssa(nir_load_deref(b, deref)); break; } default: -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC v1 14/38] glsl/nir: Use deref instructions instead of dref chains
--- src/compiler/glsl/glsl_to_nir.cpp | 239 +++--- 1 file changed, 94 insertions(+), 145 deletions(-) diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index 9da4526..be7a510 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -90,12 +90,10 @@ private: nir_builder b; nir_ssa_def *result; /* result of the expression tree last visited */ - nir_deref_var *evaluate_deref(nir_instr *mem_ctx, ir_instruction *ir); + nir_deref_instr *evaluate_deref(ir_instruction *ir); - /* the head of the dereference chain we're creating */ - nir_deref_var *deref_head; - /* the tail of the dereference chain we're creating */ - nir_deref *deref_tail; + /* most recent deref instruction created */ + nir_deref_instr *deref; nir_variable *var; /* variable created by ir_variable visitor */ @@ -198,8 +196,6 @@ nir_visitor::nir_visitor(nir_shader *shader) this->result = NULL; this->impl = NULL; this->var = NULL; - this->deref_head = NULL; - this->deref_tail = NULL; memset(&this->b, 0, sizeof(this->b)); } @@ -209,12 +205,11 @@ nir_visitor::~nir_visitor() _mesa_hash_table_destroy(this->overload_table, NULL); } -nir_deref_var * -nir_visitor::evaluate_deref(nir_instr *mem_ctx, ir_instruction *ir) +nir_deref_instr * +nir_visitor::evaluate_deref(ir_instruction *ir) { ir->accept(this); - ralloc_steal(mem_ctx, this->deref_head); - return this->deref_head; + return this->deref; } static nir_constant * @@ -627,76 +622,76 @@ nir_visitor::visit(ir_call *ir) switch (ir->callee->intrinsic_id) { case ir_intrinsic_atomic_counter_read: - op = nir_intrinsic_atomic_counter_read_var; + op = nir_intrinsic_atomic_counter_read_deref; break; case ir_intrinsic_atomic_counter_increment: - op = nir_intrinsic_atomic_counter_inc_var; + op = nir_intrinsic_atomic_counter_inc_deref; break; case ir_intrinsic_atomic_counter_predecrement: - op = nir_intrinsic_atomic_counter_dec_var; + op = nir_intrinsic_atomic_counter_dec_deref; break; case ir_intrinsic_atomic_counter_add: - op = nir_intrinsic_atomic_counter_add_var; + op = nir_intrinsic_atomic_counter_add_deref; break; case ir_intrinsic_atomic_counter_and: - op = nir_intrinsic_atomic_counter_and_var; + op = nir_intrinsic_atomic_counter_and_deref; break; case ir_intrinsic_atomic_counter_or: - op = nir_intrinsic_atomic_counter_or_var; + op = nir_intrinsic_atomic_counter_or_deref; break; case ir_intrinsic_atomic_counter_xor: - op = nir_intrinsic_atomic_counter_xor_var; + op = nir_intrinsic_atomic_counter_xor_deref; break; case ir_intrinsic_atomic_counter_min: - op = nir_intrinsic_atomic_counter_min_var; + op = nir_intrinsic_atomic_counter_min_deref; break; case ir_intrinsic_atomic_counter_max: - op = nir_intrinsic_atomic_counter_max_var; + op = nir_intrinsic_atomic_counter_max_deref; break; case ir_intrinsic_atomic_counter_exchange: - op = nir_intrinsic_atomic_counter_exchange_var; + op = nir_intrinsic_atomic_counter_exchange_deref; break; case ir_intrinsic_atomic_counter_comp_swap: - op = nir_intrinsic_atomic_counter_comp_swap_var; + op = nir_intrinsic_atomic_counter_comp_swap_deref; break; case ir_intrinsic_image_load: - op = nir_intrinsic_image_var_load; + op = nir_intrinsic_image_deref_load; break; case ir_intrinsic_image_store: - op = nir_intrinsic_image_var_store; + op = nir_intrinsic_image_deref_store; break; case ir_intrinsic_image_atomic_add: - op = nir_intrinsic_image_var_atomic_add; + op = nir_intrinsic_image_deref_atomic_add; break; case ir_intrinsic_image_atomic_min: - op = nir_intrinsic_image_var_atomic_min; + op = nir_intrinsic_image_deref_atomic_min; break; case ir_intrinsic_image_atomic_max: - op = nir_intrinsic_image_var_atomic_max; + op = nir_intrinsic_image_deref_atomic_max; break; case ir_intrinsic_image_atomic_and: - op = nir_intrinsic_image_var_atomic_and; + op = nir_intrinsic_image_deref_atomic_and; break; case ir_intrinsic_image_atomic_or: - op = nir_intrinsic_image_var_atomic_or; + op = nir_intrinsic_image_deref_atomic_or; break; case ir_intrinsic_image_atomic_xor: - op = nir_intrinsic_image_var_atomic_xor; + op = nir_intrinsic_image_deref_atomic_xor; break; case ir_intrinsic_image_atomic_exchange: - op = nir_intrinsic_image_var_atomic_exchange; + op = nir_intrinsic_i
[Mesa-dev] [PATCH 04/38] nir/lower_indirect_derefs: Support interp_var_at intrinsics
This fixes the fs-interpolateAtCentroid-block-array piglit test on i965. Cc: mesa-sta...@lists.freedesktop.org --- src/compiler/nir/nir_lower_indirect_derefs.c | 13 +++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/compiler/nir/nir_lower_indirect_derefs.c b/src/compiler/nir/nir_lower_indirect_derefs.c index c949224..02f202d 100644 --- a/src/compiler/nir/nir_lower_indirect_derefs.c +++ b/src/compiler/nir/nir_lower_indirect_derefs.c @@ -95,9 +95,15 @@ emit_load_store(nir_builder *b, nir_intrinsic_instr *orig_instr, if (src == NULL) { /* This is a load instruction */ nir_intrinsic_instr *load = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var); + nir_intrinsic_instr_create(b->shader, orig_instr->intrinsic); load->num_components = orig_instr->num_components; load->variables[0] = nir_deref_var_clone(deref, load); + + /* Copy over any sources. This is needed for interp_var_at */ + for (unsigned i = 0; + i < nir_intrinsic_infos[orig_instr->intrinsic].num_srcs; i++) + nir_src_copy(&load->src[i], &orig_instr->src[i], load); + unsigned bit_size = orig_instr->dest.ssa.bit_size; nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, bit_size, NULL); @@ -142,6 +148,9 @@ lower_indirect_block(nir_block *block, nir_builder *b, nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); if (intrin->intrinsic != nir_intrinsic_load_var && + intrin->intrinsic != nir_intrinsic_interp_var_at_centroid && + intrin->intrinsic != nir_intrinsic_interp_var_at_sample && + intrin->intrinsic != nir_intrinsic_interp_var_at_offset && intrin->intrinsic != nir_intrinsic_store_var) continue; @@ -158,7 +167,7 @@ lower_indirect_block(nir_block *block, nir_builder *b, b->cursor = nir_before_instr(&intrin->instr); - if (intrin->intrinsic == nir_intrinsic_load_var) { + if (intrin->intrinsic != nir_intrinsic_store_var) { nir_ssa_def *result; emit_load_store(b, intrin, intrin->variables[0], &intrin->variables[0]->deref, &result, NULL); -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 05/38] nir/validator: Validate that all used variables exist
We were validating this for locals but nothing else. --- src/compiler/nir/nir_validate.c | 16 +--- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/compiler/nir/nir_validate.c b/src/compiler/nir/nir_validate.c index a49948f..e9d6bd5 100644 --- a/src/compiler/nir/nir_validate.c +++ b/src/compiler/nir/nir_validate.c @@ -96,7 +96,9 @@ typedef struct { /* bitset of registers we have currently found; used to check uniqueness */ BITSET_WORD *regs_found; - /* map of local variable -> function implementation where it is defined */ + /* map of variable -> function implementation where it is defined or NULL +* if it is a global variable +*/ struct hash_table *var_defs; /* map of instruction/var/etc to failed assert string */ @@ -448,12 +450,10 @@ validate_deref_chain(nir_deref *deref, nir_variable_mode mode, static void validate_var_use(nir_variable *var, validate_state *state) { - if (var->data.mode == nir_var_local) { - struct hash_entry *entry = _mesa_hash_table_search(state->var_defs, var); - - validate_assert(state, entry); + struct hash_entry *entry = _mesa_hash_table_search(state->var_defs, var); + validate_assert(state, entry); + if (var->data.mode == nir_var_local) validate_assert(state, (nir_function_impl *) entry->data == state->impl); - } } static void @@ -1000,7 +1000,9 @@ validate_var_decl(nir_variable *var, bool is_global, validate_state *state) * support) */ - if (!is_global) { + if (is_global) { + _mesa_hash_table_insert(state->var_defs, var, NULL); + } else { _mesa_hash_table_insert(state->var_defs, var, state->impl); } -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC v1 09/38] nir: Add _deref versions of all of the _var intrinsics
--- src/compiler/nir/nir.h| 2 +- src/compiler/nir/nir_builder.h| 37 + src/compiler/nir/nir_intrinsics.h | 84 +++ 3 files changed, 122 insertions(+), 1 deletion(-) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 0e69a85..14b532d 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -1148,7 +1148,7 @@ typedef enum { } nir_intrinsic_index_flag; -#define NIR_INTRINSIC_MAX_INPUTS 4 +#define NIR_INTRINSIC_MAX_INPUTS 5 typedef struct { const char *name; diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h index 27aa65b..b4dda96 100644 --- a/src/compiler/nir/nir_builder.h +++ b/src/compiler/nir/nir_builder.h @@ -613,6 +613,43 @@ nir_build_deref_struct(nir_builder *build, nir_deref_instr *parent, } static inline nir_ssa_def * +nir_load_deref(nir_builder *build, nir_deref_instr *deref) +{ + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(build->shader, nir_intrinsic_load_deref); + load->num_components = glsl_get_vector_elements(deref->type); + load->src[0] = nir_src_for_ssa(&deref->dest.ssa); + nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, + glsl_get_bit_size(deref->type), NULL); + nir_builder_instr_insert(build, &load->instr); + return &load->dest.ssa; +} + +static inline void +nir_store_deref(nir_builder *build, nir_deref_instr *deref, +nir_ssa_def *value, unsigned writemask) +{ + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(build->shader, nir_intrinsic_store_deref); + store->num_components = glsl_get_vector_elements(deref->type); + store->src[0] = nir_src_for_ssa(&deref->dest.ssa); + store->src[1] = nir_src_for_ssa(value); + nir_intrinsic_set_write_mask(store, +writemask & ((1 << store->num_components) - 1)); + nir_builder_instr_insert(build, &store->instr); +} + +static inline void +nir_copy_deref(nir_builder *build, nir_deref_instr *dest, nir_deref_instr *src) +{ + nir_intrinsic_instr *copy = + nir_intrinsic_instr_create(build->shader, nir_intrinsic_copy_deref); + copy->src[0] = nir_src_for_ssa(&dest->dest.ssa); + copy->src[1] = nir_src_for_ssa(&src->dest.ssa); + nir_builder_instr_insert(build, ©->instr); +} + +static inline nir_ssa_def * nir_load_var(nir_builder *build, nir_variable *var) { const unsigned num_components = glsl_get_vector_elements(var->type); diff --git a/src/compiler/nir/nir_intrinsics.h b/src/compiler/nir/nir_intrinsics.h index 8f3d3bc..c14a9ef 100644 --- a/src/compiler/nir/nir_intrinsics.h +++ b/src/compiler/nir/nir_intrinsics.h @@ -49,6 +49,14 @@ INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 1, WRMASK, xx, xx, 0) INTRINSIC(copy_var, 0, ARR(0), false, 0, 2, 0, xx, xx, xx, 0) /* + * Pointer versions of the _var intrinsics which take a deref as the first (or + * second, in the case of copy) source. + */ +INTRINSIC(load_deref, 1, ARR(1), true, 0, 0, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE) +INTRINSIC(store_deref, 2, ARR(1, 0), false, 0, 0, 1, WRMASK, xx, xx, 0) +INTRINSIC(copy_deref, 2, ARR(1, 1), false, 0, 0, 0, xx, xx, xx, 0) + +/* * Interpolation of input. The interp_var_at* intrinsics are similar to the * load_var intrinsic acting on a shader input except that they interpolate * the input differently. The at_sample and at_offset intrinsics take an @@ -64,6 +72,21 @@ INTRINSIC(interp_var_at_offset, 1, ARR(2), true, 0, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) /* + * Interpolation of input. The interp_deref_at* intrinsics are similar to the + * load_deref intrinsic acting on a shader input except that they interpolate + * the input differently. The at_sample and at_offset intrinsics take an + * additional source that is an integer sample id or a vec2 position offset + * respectively. + */ + +INTRINSIC(interp_deref_at_centroid, 1, ARR(1, 0), true, 0, 0, 0, xx, xx, xx, + NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +INTRINSIC(interp_deref_at_sample, 2, ARR(1, 1), true, 0, 0, 0, xx, xx, xx, + NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +INTRINSIC(interp_deref_at_offset, 2, ARR(1, 2), true, 0, 0, 0, xx, xx, xx, + NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) + +/* * Ask the driver for the size of a given buffer. It takes the buffer index * as source. */ @@ -217,12 +240,15 @@ INTRINSIC(set_vertex_count, 1, ARR(1), false, 0, 0, 0, xx, xx, xx, 0) #define ATOMIC(name, flags) \ INTRINSIC(name##_var, 0, ARR(0), true, 1, 1, 0, xx, xx, xx, flags) \ + INTRINSIC(name##_deref, 1, ARR(1), true, 1, 0, 0, xx, xx, xx, flags) \ INTRINSIC(name, 1, ARR(1), true, 1, 0, 1, BASE, xx, xx, flags) #define ATOMIC2(name) \ INTRINSIC(name##_var, 1, ARR(1), true, 1, 1, 0, xx, xx, xx, 0) \ + INTRINSIC(name##_deref, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0) \
[Mesa-dev] [RFC v1 06/38] nir: Rename image intrinsics to image_var
Generated with git grep -l nir_intrinsic_image | xargs \ sed -i 's/nir_intrinsic_image/nir_intrinsic_image_var/g' and some manual fixing in nir_intrinsics.h --- src/amd/common/ac_nir_to_llvm.c| 42 - src/amd/vulkan/radv_meta_bufimage.c| 8 ++-- src/amd/vulkan/radv_meta_fast_clear.c | 2 +- src/amd/vulkan/radv_meta_resolve_cs.c | 2 +- src/amd/vulkan/radv_shader_info.c | 40 src/compiler/glsl/glsl_to_nir.cpp | 54 +++--- src/compiler/nir/nir_intrinsics.h | 24 +- src/compiler/nir/nir_lower_samplers_as_deref.c | 22 - src/compiler/spirv/spirv_to_nir.c | 4 +- .../drivers/freedreno/ir3/ir3_compiler_nir.c | 38 +++ src/gallium/drivers/freedreno/ir3/ir3_nir.c| 2 +- src/gallium/drivers/radeonsi/si_shader_nir.c | 18 src/intel/compiler/brw_fs_nir.cpp | 46 +- src/intel/vulkan/anv_nir_apply_pipeline_layout.c | 24 +- src/intel/vulkan/anv_nir_lower_input_attachments.c | 2 +- 15 files changed, 164 insertions(+), 164 deletions(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index b0c0d76..938b215 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -2235,35 +2235,35 @@ static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx, bool is_unsigned = glsl_get_sampler_result_type(type) == GLSL_TYPE_UINT; switch (instr->intrinsic) { - case nir_intrinsic_image_atomic_add: + case nir_intrinsic_image_var_atomic_add: atomic_name = "add"; break; - case nir_intrinsic_image_atomic_min: + case nir_intrinsic_image_var_atomic_min: atomic_name = is_unsigned ? "umin" : "smin"; break; - case nir_intrinsic_image_atomic_max: + case nir_intrinsic_image_var_atomic_max: atomic_name = is_unsigned ? "umax" : "smax"; break; - case nir_intrinsic_image_atomic_and: + case nir_intrinsic_image_var_atomic_and: atomic_name = "and"; break; - case nir_intrinsic_image_atomic_or: + case nir_intrinsic_image_var_atomic_or: atomic_name = "or"; break; - case nir_intrinsic_image_atomic_xor: + case nir_intrinsic_image_var_atomic_xor: atomic_name = "xor"; break; - case nir_intrinsic_image_atomic_exchange: + case nir_intrinsic_image_var_atomic_exchange: atomic_name = "swap"; break; - case nir_intrinsic_image_atomic_comp_swap: + case nir_intrinsic_image_var_atomic_comp_swap: atomic_name = "cmpswap"; break; default: abort(); } - if (instr->intrinsic == nir_intrinsic_image_atomic_comp_swap) + if (instr->intrinsic == nir_intrinsic_image_var_atomic_comp_swap) params[param_count++] = get_src(ctx, instr->src[3]); params[param_count++] = get_src(ctx, instr->src[2]); @@ -2895,26 +2895,26 @@ static void visit_intrinsic(struct ac_nir_context *ctx, case nir_intrinsic_store_shared: visit_store_shared(ctx, instr); break; - case nir_intrinsic_image_samples: + case nir_intrinsic_image_var_samples: result = visit_image_samples(ctx, instr); break; - case nir_intrinsic_image_load: + case nir_intrinsic_image_var_load: result = visit_image_load(ctx, instr); break; - case nir_intrinsic_image_store: + case nir_intrinsic_image_var_store: visit_image_store(ctx, instr); break; - case nir_intrinsic_image_atomic_add: - case nir_intrinsic_image_atomic_min: - case nir_intrinsic_image_atomic_max: - case nir_intrinsic_image_atomic_and: - case nir_intrinsic_image_atomic_or: - case nir_intrinsic_image_atomic_xor: - case nir_intrinsic_image_atomic_exchange: - case nir_intrinsic_image_atomic_comp_swap: + case nir_intrinsic_image_var_atomic_add: + case nir_intrinsic_image_var_atomic_min: + case nir_intrinsic_image_var_atomic_max: + case nir_intrinsic_image_var_atomic_and: + case nir_intrinsic_image_var_atomic_or: + case nir_intrinsic_image_var_atomic_xor: + case nir_intrinsic_image_var_atomic_exchange: + case nir_intrinsic_image_var_atomic_comp_swap: result = visit_image_atomic(ctx, instr); break; - case nir_intrinsic_image_size: + case nir_intrinsic_image_var_size: result = visit_image_size(ctx, instr); break; case nir_intrinsic_shader_clock: diff --git a/
[Mesa-dev] [RFC v1 08/38] nir/builder: Add deref building helpers
--- src/compiler/nir/nir_builder.h | 87 ++ 1 file changed, 87 insertions(+) diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h index 36e0ae3..27aa65b 100644 --- a/src/compiler/nir/nir_builder.h +++ b/src/compiler/nir/nir_builder.h @@ -525,6 +525,93 @@ nir_ssa_for_alu_src(nir_builder *build, nir_alu_instr *instr, unsigned srcn) return nir_imov_alu(build, *src, num_components); } +static inline nir_deref_instr * +nir_build_deref_var(nir_builder *build, nir_variable *var) +{ + nir_deref_instr *deref = + nir_deref_instr_create(build->shader, nir_deref_type_var); + + deref->mode = var->data.mode; + deref->type = var->type; + deref->var = var; + + nir_ssa_dest_init(&deref->instr, &deref->dest, 1, 32, NULL); + + nir_builder_instr_insert(build, &deref->instr); + + return deref; +} + +static inline nir_deref_instr * +nir_build_deref_array(nir_builder *build, nir_deref_instr *parent, + nir_ssa_def *index) +{ + assert(glsl_type_is_array(parent->type) || + glsl_type_is_matrix(parent->type) || + glsl_type_is_vector(parent->type)); + + nir_deref_instr *deref = + nir_deref_instr_create(build->shader, nir_deref_type_array); + + deref->mode = parent->mode; + deref->type = glsl_get_array_element(parent->type); + deref->parent = nir_src_for_ssa(&parent->dest.ssa); + deref->arr.index = nir_src_for_ssa(index); + + nir_ssa_dest_init(&deref->instr, &deref->dest, + parent->dest.ssa.num_components, + parent->dest.ssa.bit_size, NULL); + + nir_builder_instr_insert(build, &deref->instr); + + return deref; +} + +static inline nir_deref_instr * +nir_build_deref_array_wildcard(nir_builder *build, nir_deref_instr *parent) +{ + assert(glsl_type_is_array(parent->type) || + glsl_type_is_matrix(parent->type)); + + nir_deref_instr *deref = + nir_deref_instr_create(build->shader, nir_deref_type_array_wildcard); + + deref->mode = parent->mode; + deref->type = glsl_get_array_element(parent->type); + deref->parent = nir_src_for_ssa(&parent->dest.ssa); + + nir_ssa_dest_init(&deref->instr, &deref->dest, + parent->dest.ssa.num_components, + parent->dest.ssa.bit_size, NULL); + + nir_builder_instr_insert(build, &deref->instr); + + return deref; +} + +static inline nir_deref_instr * +nir_build_deref_struct(nir_builder *build, nir_deref_instr *parent, + unsigned index) +{ + assert(glsl_type_is_struct(parent->type)); + + nir_deref_instr *deref = + nir_deref_instr_create(build->shader, nir_deref_type_struct); + + deref->mode = parent->mode; + deref->type = glsl_get_struct_field(parent->type, index); + deref->parent = nir_src_for_ssa(&parent->dest.ssa); + deref->strct.index = index; + + nir_ssa_dest_init(&deref->instr, &deref->dest, + parent->dest.ssa.num_components, + parent->dest.ssa.bit_size, NULL); + + nir_builder_instr_insert(build, &deref->instr); + + return deref; +} + static inline nir_ssa_def * nir_load_var(nir_builder *build, nir_variable *var) { -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC v1 07/38] nir: Add a deref instruction type
This commit adds a new instruction type to NIR for handling derefs. Nothing uses it yet but this adds the data structure as well as all of the code to validate, print, clone, and [de]serialize them. --- src/compiler/nir/nir.c| 50 +++ src/compiler/nir/nir.h| 44 - src/compiler/nir/nir_clone.c | 41 src/compiler/nir/nir_instr_set.c | 76 + src/compiler/nir/nir_opt_copy_propagate.c | 18 +++ src/compiler/nir/nir_opt_dce.c| 7 +++ src/compiler/nir/nir_print.c | 46 ++ src/compiler/nir/nir_serialize.c | 79 +++ src/compiler/nir/nir_validate.c | 74 + 9 files changed, 434 insertions(+), 1 deletion(-) diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c index b16d6fa..2ed96a1 100644 --- a/src/compiler/nir/nir.c +++ b/src/compiler/nir/nir.c @@ -469,6 +469,26 @@ nir_alu_instr_create(nir_shader *shader, nir_op op) return instr; } +nir_deref_instr * +nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type) +{ + nir_deref_instr *instr = + rzalloc_size(shader, sizeof(nir_deref_instr)); + + instr_init(&instr->instr, nir_instr_type_deref); + + instr->deref_type = deref_type; + if (deref_type != nir_deref_type_var) + src_init(&instr->parent); + + if (deref_type == nir_deref_type_array) + src_init(&instr->arr.index); + + dest_init(&instr->dest); + + return instr; +} + nir_jump_instr * nir_jump_instr_create(nir_shader *shader, nir_jump_type type) { @@ -1198,6 +1218,12 @@ visit_alu_dest(nir_alu_instr *instr, nir_foreach_dest_cb cb, void *state) } static bool +visit_deref_dest(nir_deref_instr *instr, nir_foreach_dest_cb cb, void *state) +{ + return cb(&instr->dest, state); +} + +static bool visit_intrinsic_dest(nir_intrinsic_instr *instr, nir_foreach_dest_cb cb, void *state) { @@ -1238,6 +1264,8 @@ nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state) switch (instr->type) { case nir_instr_type_alu: return visit_alu_dest(nir_instr_as_alu(instr), cb, state); + case nir_instr_type_deref: + return visit_deref_dest(nir_instr_as_deref(instr), cb, state); case nir_instr_type_intrinsic: return visit_intrinsic_dest(nir_instr_as_intrinsic(instr), cb, state); case nir_instr_type_tex: @@ -1283,6 +1311,7 @@ nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state) { switch (instr->type) { case nir_instr_type_alu: + case nir_instr_type_deref: case nir_instr_type_tex: case nir_instr_type_intrinsic: case nir_instr_type_phi: @@ -1349,6 +1378,23 @@ visit_alu_src(nir_alu_instr *instr, nir_foreach_src_cb cb, void *state) } static bool +visit_deref_instr_src(nir_deref_instr *instr, + nir_foreach_src_cb cb, void *state) +{ + if (instr->deref_type != nir_deref_type_var) { + if (!visit_src(&instr->parent, cb, state)) + return false; + } + + if (instr->deref_type == nir_deref_type_array) { + if (!visit_src(&instr->arr.index, cb, state)) + return false; + } + + return true; +} + +static bool visit_tex_src(nir_tex_instr *instr, nir_foreach_src_cb cb, void *state) { for (unsigned i = 0; i < instr->num_srcs; i++) { @@ -1436,6 +1482,10 @@ nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state) if (!visit_alu_src(nir_instr_as_alu(instr), cb, state)) return false; break; + case nir_instr_type_deref: + if (!visit_deref_instr_src(nir_instr_as_deref(instr), cb, state)) + return false; + break; case nir_instr_type_intrinsic: if (!visit_intrinsic_src(nir_instr_as_intrinsic(instr), cb, state)) return false; diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index cc0b171..0e69a85 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -421,6 +421,7 @@ typedef struct nir_register { typedef enum { nir_instr_type_alu, + nir_instr_type_deref, nir_instr_type_call, nir_instr_type_tex, nir_instr_type_intrinsic, @@ -888,7 +889,8 @@ bool nir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2, typedef enum { nir_deref_type_var, nir_deref_type_array, - nir_deref_type_struct + nir_deref_type_array_wildcard, + nir_deref_type_struct, } nir_deref_type; typedef struct nir_deref { @@ -950,6 +952,41 @@ nir_deref_tail(nir_deref *deref) typedef struct { nir_instr instr; + /** The type of this deref instruction */ + nir_deref_type deref_type; + + /** The mode of the underlying variable */ + nir_variable_mode mode; + + /** The dereferenced type of the resulting pointer value */ + const struct glsl_type *type; + + union { + /** Variable being dereferenced if deref_type is a deref_var */ +
[Mesa-dev] [RFC v1 00/38] nir: Move to using instructions for derefs
This is something that Connor and I have been talking about for some time now. The basic idea is to replace the current singly linked nir_deref list with deref instructions. This is similar to what LLVM does and it offers quite a bit more freedom when we start getting more realistic pointers from compute applications. This series is the start of the conversion. I have almost everything working for i965. The two remaining gaps are nir_lower_locals_to_regs and nir_lower_samplers. However, those two passes are a bit less practical to change in the same additive fashion that I've done for most of core NIR. Instead, my plan is to just change them for everyone all at the same time. Before we can do that, however, we need to get other drivers to the same point as i965. I don't think I've broken anyone else's drivers in this process since they just lower derefs away immediately. My next project will be Vulkan. Unfortunately, that means reworking the way that NIR functions work so that we can use deref instructions with them. My plan there is to vastly simplify them so that they just have a list of SSA defs which get filled out at the start of the function call. Those SSA defs may be derefs or regular values. Return parameters are handled by passing a deref into the function as a parameter and then writing to it from within the function. This should map fairly naturally to SPIR-V but it'll be a fairly big change. I've already started hacking on this and I think I really like it. One result is that function inlining is now basically trivial. If you're the owner of a GL driver and would like to work on converting it, that would be awesome. I'm happy to take a crack but there's enough work to do to get core Vulkan bits working that it'd be nice if I didn't do all the work. :-) Bas, I'm afraid Vulkan is blocking on the function reworks; I'll let you know once I have something. The other thing that's left to do after we get all the drivers moved over is to rip out legacy deref chains and do a final rework of a few of the core optimization/lowering passes. Some passes such as vars_to_ssa still use deref chains internally by converting deref instructions to deref chains on-the-fly. I've got a plan for converting them but we need to make deref chains an artifact of history first. This series can be found as a branch on gitlab: https://gitlab.freedesktop.org/jekstrand/mesa/commits/review/nir-deref-instrs-v1 Cc: Rob Clark Cc: Timothy Arceri Cc: Eric Anholt Cc: Connor Abbott Cc: Bas Nieuwenhuizen Jason Ekstrand (38): nir: Add src/dest num_components helpers nir: Return a cursor from nir_instr_remove nir/vars_to_ssa: Remove copies from the correct set nir/lower_indirect_derefs: Support interp_var_at intrinsics nir/validator: Validate that all used variables exist nir: Rename image intrinsics to image_var nir: Add a deref instruction type nir/builder: Add deref building helpers nir: Add _deref versions of all of the _var intrinsics nir: Add deref sources to texture instructions nir: Add helpers for working with deref instructions anv,i965,radv,st,ir3: Call nir_lower_deref_instrs glsl/nir: Only claim to handle intrinsic functions glsl/nir: Use deref instructions instead of dref chains prog/nir: Simplify some load/store operations prog/nir: Use deref instructions for params nir/lower_atomics: Rework the main walker loop a bit nir: Support deref instructions in remove_dead_variables nir: Add a pass for fixing deref modes nir: Support deref instructions in lower_global_vars_to_local nir: Support deref instructions in lower_io_to_temporaries nir: Add a deref path helper struct nir: Support deref instructions in lower_var_copies nir: Support deref instructions in split_var_copies nir: Support deref instructions in lower_vars_to_ssa nir: Support deref instructions in lower_indirect_derefs nir/deref: Add a deref cleanup function nir: Support deref instructions in lower_system_values nir: Support deref instructions in lower_clip_cull nir: Support deref instructions in propagate_invariant nir: Support deref instructions in gather_info nir: Support deref instructions in lower_io nir: Support deref instructions in lower_atomics nir: Support deref instructions in lower_wpos_ytransform nir: Support deref instructions in remove_unused_varyings intel,ir3: Disable nir_opt_copy_prop_vars intel/nir: Fixup deref modes after lowering patch vertices i965: Move nir_lower_deref_instrs to right before locals_to_regs src/amd/common/ac_nir_to_llvm.c| 42 +-- src/amd/vulkan/radv_meta_bufimage.c| 8 +- src/amd/vulkan/radv_meta_fast_clear.c | 2 +- src/amd/vulkan/radv_meta_resolve_cs.c | 2 +- src/amd/vulkan/radv_shader.c | 2 + src/amd/vulkan/radv_shader_info.c | 40 +-- src/compiler/Makefile.sources | 2 + src/compiler
[Mesa-dev] [RFC v1 11/38] nir: Add helpers for working with deref instructions
This commit adds a pass for lowering deref instructions to deref chains as well as some smaller helpers to ease the transition. --- src/compiler/Makefile.sources | 1 + src/compiler/nir/meson.build | 1 + src/compiler/nir/nir.h | 50 ++- src/compiler/nir/nir_builder.h | 23 src/compiler/nir/nir_deref.c | 301 + 5 files changed, 374 insertions(+), 2 deletions(-) create mode 100644 src/compiler/nir/nir_deref.c diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources index 55143db..6ee357c 100644 --- a/src/compiler/Makefile.sources +++ b/src/compiler/Makefile.sources @@ -195,6 +195,7 @@ NIR_FILES = \ nir/nir_control_flow.c \ nir/nir_control_flow.h \ nir/nir_control_flow_private.h \ + nir/nir_deref.c \ nir/nir_dominance.c \ nir/nir_from_ssa.c \ nir/nir_gather_info.c \ diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build index 289bb9e..963ef02 100644 --- a/src/compiler/nir/meson.build +++ b/src/compiler/nir/meson.build @@ -89,6 +89,7 @@ files_libnir = files( 'nir_control_flow.c', 'nir_control_flow.h', 'nir_control_flow_private.h', + 'nir_deref.c', 'nir_dominance.c', 'nir_from_ssa.c', 'nir_gather_info.c', diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index b575545..db37b98 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -984,6 +984,42 @@ typedef struct { nir_dest dest; } nir_deref_instr; +NIR_DEFINE_CAST(nir_instr_as_deref, nir_instr, nir_deref_instr, instr, +type, nir_instr_type_deref) + +static inline nir_deref_instr * +nir_src_as_deref(nir_src src) +{ + if (!src.is_ssa) + return NULL; + + if (src.ssa->parent_instr->type != nir_instr_type_deref) + return NULL; + + return nir_instr_as_deref(src.ssa->parent_instr); +} + +static inline nir_deref_instr * +nir_deref_instr_parent(nir_deref_instr *instr) +{ + if (instr->deref_type == nir_deref_type_var) + return NULL; + else + return nir_src_as_deref(instr->parent); +} + +static inline nir_variable * +nir_deref_instr_get_variable(nir_deref_instr *instr) +{ + while (instr->deref_type != nir_deref_type_var) + instr = nir_deref_instr_parent(instr); + + return instr->var; +} + +nir_deref_var * +nir_deref_instr_to_deref(nir_deref_instr *instr, void *mem_ctx); + typedef struct { nir_instr instr; @@ -1560,8 +1596,6 @@ typedef struct { NIR_DEFINE_CAST(nir_instr_as_alu, nir_instr, nir_alu_instr, instr, type, nir_instr_type_alu) -NIR_DEFINE_CAST(nir_instr_as_deref, nir_instr, nir_deref_instr, instr, -type, nir_instr_type_deref) NIR_DEFINE_CAST(nir_instr_as_call, nir_instr, nir_call_instr, instr, type, nir_instr_type_call) NIR_DEFINE_CAST(nir_instr_as_jump, nir_instr, nir_jump_instr, instr, @@ -2559,6 +2593,18 @@ bool nir_inline_functions(nir_shader *shader); bool nir_propagate_invariant(nir_shader *shader); +enum nir_lower_deref_flags { + nir_lower_load_store_derefs = (1 << 0), + nir_lower_texture_derefs = (1 << 1), + nir_lower_interp_derefs = (1 << 2), + nir_lower_atomic_counter_derefs = (1 << 3), + nir_lower_atomic_derefs = (1 << 4), + nir_lower_image_derefs =(1 << 5), +}; + +bool nir_lower_deref_instrs(nir_shader *shader, +enum nir_lower_deref_flags flags); + void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, nir_shader *shader); bool nir_lower_var_copies(nir_shader *shader); diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h index b4dda96..f1e52b2 100644 --- a/src/compiler/nir/nir_builder.h +++ b/src/compiler/nir/nir_builder.h @@ -612,6 +612,29 @@ nir_build_deref_struct(nir_builder *build, nir_deref_instr *parent, return deref; } +static inline nir_deref_instr * +nir_build_deref_for_chain(nir_builder *b, nir_deref_var *deref_var) +{ + nir_deref_instr *tail = nir_build_deref_var(b, deref_var->var); + for (nir_deref *d = deref_var->deref.child; d; d = d->child) { + if (d->deref_type == nir_deref_type_array) { + nir_deref_array *a = nir_deref_as_array(d); + assert(a->deref_array_type != nir_deref_array_type_wildcard); + + nir_ssa_def *index = nir_imm_int(b, a->base_offset); + if (a->deref_array_type == nir_deref_array_type_indirect) +index = nir_iadd(b, index, nir_ssa_for_src(b, a->indirect, 1)); + + tail = nir_build_deref_array(b, tail, index); + } else { + nir_deref_struct *s = nir_deref_as_struct(d); + tail = nir_build_deref_struct(b, tail, s->index); + } + } + + return tail; +} + static inline nir_ssa_def * nir_load_deref(nir_builder *build, nir_deref_instr *deref) { diff --git a/src/compiler/nir/nir_deref.c b/src/compiler/nir/nir_deref.c new file mode 100644 index 000..87a8192 --- /dev/null +++ b/src/comp
[Mesa-dev] [PATCH 01/38] nir: Add src/dest num_components helpers
We already have these for bit_size --- src/compiler/nir/nir.h | 12 1 file changed, 12 insertions(+) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 7ad19b4..8f4a28c 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -626,11 +626,23 @@ nir_src_bit_size(nir_src src) } static inline unsigned +nir_src_num_components(nir_src src) +{ + return src.is_ssa ? src.ssa->num_components : src.reg.reg->num_components; +} + +static inline unsigned nir_dest_bit_size(nir_dest dest) { return dest.is_ssa ? dest.ssa.bit_size : dest.reg.reg->bit_size; } +static inline unsigned +nir_dest_num_components(nir_dest dest) +{ + return dest.is_ssa ? dest.ssa.num_components : dest.reg.reg->num_components; +} + void nir_src_copy(nir_src *dest, const nir_src *src, void *instr_or_if); void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr); -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC v1 10/38] nir: Add deref sources to texture instructions
--- src/compiler/nir/nir.h | 2 ++ src/compiler/nir/nir_print.c | 6 ++ 2 files changed, 8 insertions(+) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 14b532d..b575545 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -1233,6 +1233,8 @@ typedef enum { nir_tex_src_ms_mcs, /* MSAA compression value */ nir_tex_src_ddx, nir_tex_src_ddy, + nir_tex_src_texture_deref, /* < deref pointing to the texture */ + nir_tex_src_sampler_deref, /* < deref pointing to the sampler */ nir_tex_src_texture_offset, /* < dynamically uniform indirect offset */ nir_tex_src_sampler_offset, /* < dynamically uniform indirect offset */ nir_tex_src_plane, /* < selects plane for planar textures */ diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index 081ef72..f7ba164 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -809,6 +809,12 @@ print_tex_instr(nir_tex_instr *instr, print_state *state) case nir_tex_src_ddy: fprintf(fp, "(ddy)"); break; + case nir_tex_src_texture_deref: + fprintf(fp, "(texture_deref)"); + break; + case nir_tex_src_sampler_deref: + fprintf(fp, "(sampler_deref)"); + break; case nir_tex_src_texture_offset: fprintf(fp, "(texture_offset)"); break; -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 03/38] nir/vars_to_ssa: Remove copies from the correct set
Cc: mesa-sta...@lists.freedesktop.org --- src/compiler/nir/nir_lower_vars_to_ssa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler/nir/nir_lower_vars_to_ssa.c b/src/compiler/nir/nir_lower_vars_to_ssa.c index e8cfe30..0cc6514 100644 --- a/src/compiler/nir/nir_lower_vars_to_ssa.c +++ b/src/compiler/nir/nir_lower_vars_to_ssa.c @@ -464,7 +464,7 @@ lower_copies_to_load_store(struct deref_node *node, struct set_entry *arg_entry = _mesa_set_search(arg_node->copies, copy); assert(arg_entry); - _mesa_set_remove(node->copies, arg_entry); + _mesa_set_remove(arg_node->copies, arg_entry); } nir_instr_remove(©->instr); -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC v1 13/38] glsl/nir: Only claim to handle intrinsic functions
Non-intrinsic function handling has never actually been tested and probably doesn't work. Just get rid of it for now. We can always add it back in later if it's useful. --- src/compiler/glsl/glsl_to_nir.cpp | 25 ++--- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index c2f7fd8..9da4526 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -614,14 +614,7 @@ nir_visitor::visit(ir_loop_jump *ir) void nir_visitor::visit(ir_return *ir) { - if (ir->value != NULL) { - nir_intrinsic_instr *copy = - nir_intrinsic_instr_create(this->shader, nir_intrinsic_copy_var); - - copy->variables[0] = nir_deref_var_create(copy, this->impl->return_var); - copy->variables[1] = evaluate_deref(©->instr, ir->value); - } - + assert(ir->value == NULL); nir_jump_instr *instr = nir_jump_instr_create(this->shader, nir_jump_return); nir_builder_instr_insert(&b, &instr->instr); } @@ -1231,21 +1224,7 @@ nir_visitor::visit(ir_call *ir) return; } - struct hash_entry *entry = - _mesa_hash_table_search(this->overload_table, ir->callee); - assert(entry); - nir_function *callee = (nir_function *) entry->data; - - nir_call_instr *instr = nir_call_instr_create(this->shader, callee); - - unsigned i = 0; - foreach_in_list(ir_dereference, param, &ir->actual_parameters) { - instr->params[i] = evaluate_deref(&instr->instr, param); - i++; - } - - instr->return_deref = evaluate_deref(&instr->instr, ir->return_deref); - nir_builder_instr_insert(&b, &instr->instr); + unreachable("glsl_to_nir only handles function calls to intrinsics"); } void -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 02/38] nir: Return a cursor from nir_instr_remove
Because nir_instr_remove is an inline wrapper around nir_instr_remove_v, the compiler should be able to tell that the return value is unused and not emit the extra code in most cases. --- src/compiler/nir/nir.c| 2 +- src/compiler/nir/nir.h| 16 +++- src/compiler/nir/nir_opt_copy_prop_vars.c | 19 ++- 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c index a97b119..b16d6fa 100644 --- a/src/compiler/nir/nir.c +++ b/src/compiler/nir/nir.c @@ -1158,7 +1158,7 @@ remove_defs_uses(nir_instr *instr) nir_foreach_src(instr, remove_use_cb, instr); } -void nir_instr_remove(nir_instr *instr) +void nir_instr_remove_v(nir_instr *instr) { remove_defs_uses(instr); exec_node_remove(&instr->node); diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 8f4a28c..cc0b171 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2277,7 +2277,21 @@ nir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after) nir_instr_insert(nir_after_cf_list(list), after); } -void nir_instr_remove(nir_instr *instr); +void nir_instr_remove_v(nir_instr *instr); + +static inline nir_cursor +nir_instr_remove(nir_instr *instr) +{ + nir_cursor cursor; + nir_instr *prev = nir_instr_prev(instr); + if (prev) { + cursor = nir_after_instr(prev); + } else { + cursor = nir_before_block(instr->block); + } + nir_instr_remove_v(instr); + return cursor; +} /** @} */ diff --git a/src/compiler/nir/nir_opt_copy_prop_vars.c b/src/compiler/nir/nir_opt_copy_prop_vars.c index 89ddc8d..cc8f00f 100644 --- a/src/compiler/nir/nir_opt_copy_prop_vars.c +++ b/src/compiler/nir/nir_opt_copy_prop_vars.c @@ -349,21 +349,6 @@ store_to_entry(struct copy_prop_var_state *state, struct copy_entry *entry, } } -/* Remove an instruction and return a cursor pointing to where it was */ -static nir_cursor -instr_remove_cursor(nir_instr *instr) -{ - nir_cursor cursor; - nir_instr *prev = nir_instr_prev(instr); - if (prev) { - cursor = nir_after_instr(prev); - } else { - cursor = nir_before_block(instr->block); - } - nir_instr_remove(instr); - return cursor; -} - /* Do a "load" from an SSA-based entry return it in "value" as a value with a * single SSA def. Because an entry could reference up to 4 different SSA * defs, a vecN operation may be inserted to combine them into a single SSA @@ -396,7 +381,7 @@ load_from_ssa_entry_value(struct copy_prop_var_state *state, if (all_same) { /* Our work here is done */ - b->cursor = instr_remove_cursor(&intrin->instr); + b->cursor = nir_instr_remove(&intrin->instr); intrin->instr.block = NULL; return true; } @@ -594,7 +579,7 @@ load_from_deref_entry_value(struct copy_prop_var_state *state, value_tail->child = nir_deref_clone(src_tail->child, value_tail); } - b->cursor = instr_remove_cursor(&intrin->instr); + b->cursor = nir_instr_remove(&intrin->instr); return true; } -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/9] Clean up draw path state updates.
Hi Brian, On Tuesday, 20 March 2018 16:16:13 CET Brian Paul wrote: > The code changes look good, AFAICT. But some of the comments could be > improved, IMHO. See other replies... > > Otherwise, for the series, > Reviewed-by: Brian Paul Thanks for the review! Will incorporate the proposed changes. best Mathias ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/3] st/glsl_to_nir: fix driver location for packed doubles
--- src/mesa/state_tracker/st_glsl_to_nir.cpp | 22 -- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp index afb6120d9d..b01be622f7 100644 --- a/src/mesa/state_tracker/st_glsl_to_nir.cpp +++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp @@ -141,16 +141,23 @@ st_nir_assign_var_locations(struct exec_list *var_list, unsigned *size, type = glsl_get_array_element(type); } + unsigned var_size = type_size(type); + /* Builtins don't allow component packing so we only need to worry about * user defined varyings sharing the same location. */ bool processed = false; if (var->data.location >= base) { unsigned glsl_location = var->data.location - base; - if (processed_locs[var->data.index] & ((uint64_t)1 << glsl_location)) -processed = true; - else -processed_locs[var->data.index] |= ((uint64_t)1 << glsl_location); + + for (unsigned i = 0; i < var_size; i++) { +if (processed_locs[var->data.index] & +((uint64_t)1 << (glsl_location + i))) + processed = true; +else + processed_locs[var->data.index] |= + ((uint64_t)1 << (glsl_location + i)); + } } /* Because component packing allows varyings to share the same location @@ -162,9 +169,12 @@ st_nir_assign_var_locations(struct exec_list *var_list, unsigned *size, continue; } - assigned_locations[var->data.location] = location; + for (unsigned i = 0; i < var_size; i++) { + assigned_locations[var->data.location + i] = location + i; + } + var->data.driver_location = location; - location += type_size(type); + location += var_size; } *size += location; -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/3] ac/nir_to_llvm: fix component packing for double outputs
We need to wait until after the writemask is widened before we adjust it for component packing. Together with the previous patch this fixes a number of arb_enhanced_layouts component layout piglit tests. --- src/amd/common/ac_nir_to_llvm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 1fd2745201..a1cbf65edb 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -1793,7 +1793,7 @@ visit_store_var(struct ac_nir_context *ctx, int idx = instr->variables[0]->var->data.driver_location; unsigned comp = instr->variables[0]->var->data.location_frac; LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0])); - int writemask = instr->const_index[0] << comp; + int writemask = instr->const_index[0]; LLVMValueRef indir_index; unsigned const_index; get_deref_offset(ctx, instr->variables[0], false, @@ -1808,6 +1808,8 @@ visit_store_var(struct ac_nir_context *ctx, writemask = widen_mask(writemask, 2); } + writemask = writemask << comp; + switch (instr->variables[0]->var->data.mode) { case nir_var_shader_out: -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/3] radeonsi/nir: fix scanning of multi-slot output varyings
This fixes tcs/tes varying arrays where we dont lower indirects and therefore don't split arrays. Here we also fix useagemask for dual slot doubles. Fixes a number of arb_tessellation_shader piglit tests. --- src/gallium/drivers/radeonsi/si_shader_nir.c | 236 ++- 1 file changed, 127 insertions(+), 109 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index e83c2ca604..5b6afc10e5 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -442,32 +442,48 @@ void si_nir_scan_shader(const struct nir_shader *nir, nir_foreach_variable(variable, &nir->outputs) { unsigned semantic_name, semantic_index; - if (nir->info.stage == MESA_SHADER_FRAGMENT) { - tgsi_get_gl_frag_result_semantic(variable->data.location, - &semantic_name, &semantic_index); + i = variable->data.driver_location; - /* Adjust for dual source blending */ - if (variable->data.index > 0) { - semantic_index++; - } - } else { - tgsi_get_gl_varying_semantic(variable->data.location, true, -&semantic_name, &semantic_index); + const struct glsl_type *type = variable->type; + if (nir_is_per_vertex_io(variable, nir->info.stage)) { + assert(glsl_type_is_array(type)); + type = glsl_get_array_element(type); } - i = variable->data.driver_location; + unsigned attrib_count = glsl_count_attribute_slots(type, false); + for (unsigned k = 0; k < attrib_count; k++, i++) { + + if (nir->info.stage == MESA_SHADER_FRAGMENT) { + tgsi_get_gl_frag_result_semantic(variable->data.location + k, + &semantic_name, &semantic_index); - unsigned num_components = 4; - unsigned vector_elements = glsl_get_vector_elements(glsl_without_array(variable->type)); - if (vector_elements) - num_components = vector_elements; + /* Adjust for dual source blending */ + if (variable->data.index > 0) { + semantic_index++; + } + } else { + tgsi_get_gl_varying_semantic(variable->data.location + k, true, +&semantic_name, &semantic_index); + } - if (glsl_type_is_64bit(glsl_without_array(variable->type))) - num_components = MIN2(num_components * 2, 4); + unsigned num_components = 4; + unsigned vector_elements = glsl_get_vector_elements(glsl_without_array(variable->type)); + if (vector_elements) + num_components = vector_elements; + + unsigned component = variable->data.location_frac; + if (glsl_type_is_64bit(glsl_without_array(variable->type))) { + if (glsl_type_is_dual_slot(glsl_without_array(variable->type)) && k % 2) { + num_components = (num_components * 2) - 4; + component = 0; + } else { + num_components = MIN2(num_components * 2, 4); + } + } - ubyte usagemask = 0; - for (unsigned j = 0; j < num_components; j++) { - switch (j + variable->data.location_frac) { + ubyte usagemask = 0; + for (unsigned j = component; j < num_components + component; j++) { + switch (j) { case 0: usagemask |= TGSI_WRITEMASK_X; break; @@ -482,110 +498,112 @@ void si_nir_scan_shader(const struct nir_shader *nir, break; default: unreachable("error calculating component index"); + } } - } - unsigned gs_out_streams; - if (variable->data.stream & (1u << 31)) { - gs_out_streams = variable->data.stream & ~(1u << 31); - } else { - assert(variable->data.
Re: [Mesa-dev] [PATCH kmscube] cube-tex: make use of modifiers
On Tue, Mar 20, 2018 at 2:45 PM, Emil Velikov wrote: > On 20 March 2018 at 18:02, Christian Gmeiner > wrote: >> Fixes rendering issues with mode rgba on etnaviv. I have applied >> the same change for nv12 variants but they are not supported on >> etnaviv. >> >> Signed-off-by: Christian Gmeiner >> --- >> cube-tex.c | 32 >> 1 file changed, 24 insertions(+), 8 deletions(-) >> >> diff --git a/cube-tex.c b/cube-tex.c >> index 9e38ae8..dba19ff 100644 >> --- a/cube-tex.c >> +++ b/cube-tex.c >> @@ -213,7 +213,7 @@ static const char *fragment_shader_source_2img = >> >> static const uint32_t texw = 512, texh = 512; >> >> -static int get_fd_rgba(uint32_t *pstride) >> +static int get_fd_rgba(uint32_t *pstride, uint64_t *modifier) >> { >> struct gbm_bo *bo; >> void *map_data = NULL; >> @@ -234,6 +234,7 @@ static int get_fd_rgba(uint32_t *pstride) >> gbm_bo_unmap(bo, map_data); >> >> fd = gbm_bo_get_fd(bo); >> + *modifier = gbm_bo_get_modifier(bo); >> > Based on the existing gbm_bo_get_modifier handling there should be a guard > here: > > #ifdef HAVE_GBM_MODIFIERS > *modifier = ... > #else > *modifier = DRM_FORMAT_MOD_INVALID; > #endif > We probably *could* just require new enough version of gbm. But a similar issue came up with $blob gles driver that did not support gbm_bo_map(), and I'd suggested that we could probably start doing __attribute__((weak)) fallbacks for new gbm fxns which returned errors (or in this case DRM_FORMAT_MOD_LINEAR ?) to avoid making these things build time configs.. BR, -R > > >> /* we have the fd now, no longer need the bo: */ >> gbm_bo_destroy(bo); >> @@ -243,7 +244,7 @@ static int get_fd_rgba(uint32_t *pstride) >> return fd; >> } >> >> -static int get_fd_y(uint32_t *pstride) >> +static int get_fd_y(uint32_t *pstride, uint64_t *modifier) >> { >> struct gbm_bo *bo; >> void *map_data = NULL; >> @@ -264,6 +265,7 @@ static int get_fd_y(uint32_t *pstride) >> gbm_bo_unmap(bo, map_data); >> >> fd = gbm_bo_get_fd(bo); >> + *modifier = gbm_bo_get_modifier(bo); >> > Ditto > >> /* we have the fd now, no longer need the bo: */ >> gbm_bo_destroy(bo); >> @@ -273,7 +275,7 @@ static int get_fd_y(uint32_t *pstride) >> return fd; >> } >> >> -static int get_fd_uv(uint32_t *pstride) >> +static int get_fd_uv(uint32_t *pstride, uint64_t *modifier) >> { >> struct gbm_bo *bo; >> void *map_data = NULL; >> @@ -294,6 +296,7 @@ static int get_fd_uv(uint32_t *pstride) >> gbm_bo_unmap(bo, map_data); >> >> fd = gbm_bo_get_fd(bo); >> + *modifier = gbm_bo_get_modifier(bo); >> > And again? > >> /* we have the fd now, no longer need the bo: */ >> gbm_bo_destroy(bo); >> @@ -306,7 +309,8 @@ static int get_fd_uv(uint32_t *pstride) >> static int init_tex_rgba(void) >> { >> uint32_t stride; >> - int fd = get_fd_rgba(&stride); >> + uint64_t modifier; >> + int fd = get_fd_rgba(&stride, &modifier); >> const EGLint attr[] = { >> EGL_WIDTH, texw, >> EGL_HEIGHT, texh, >> @@ -314,6 +318,8 @@ static int init_tex_rgba(void) >> EGL_DMA_BUF_PLANE0_FD_EXT, fd, >> EGL_DMA_BUF_PLANE0_OFFSET_EXT, 0, >> EGL_DMA_BUF_PLANE0_PITCH_EXT, stride, >> + EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT, modifier & 0x, >> + EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT, modifier >> 32, > With these attributes added, only, if the modifier is valid. > Same goes for the analogous hunks through the rest of the patch. > > -Emil > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 5/5] clover: Dynamically calculate __OPENCL_VERSION__ and CLC language version
ping. This is the last of the series that still needs review. --Aaron On Thu, Mar 1, 2018 at 1:39 PM, Aaron Watry wrote: > Use get_language_version to calculate default cl standard based on > device capabilities and -cl-std specified in build options. > > v4: Squash the __OPENCL_VERSION__ and CLC language version patches > v3: (Jan) Allow device_version up to 2.2 while device_clc_version > only goes to 2.0 > Use get_cl_version to calculate version instead > v2: Split out from the previous patch (Pierre) > > Signed-off-by: Aaron Watry > CC: Pierre Moreau > CC: Jan Vesely > --- > src/gallium/state_trackers/clover/llvm/invocation.cpp | 6 -- > 1 file changed, 4 insertions(+), 2 deletions(-) > > diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp > b/src/gallium/state_trackers/clover/llvm/invocation.cpp > index 8d76f203de..f146695585 100644 > --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp > +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp > @@ -194,7 +194,7 @@ namespace { >compat::set_lang_defaults(c->getInvocation(), c->getLangOpts(), > compat::ik_opencl, > ::llvm::Triple(target.triple), > c->getPreprocessorOpts(), > -clang::LangStandard::lang_opencl11); > +get_language_version(opts, > device_clc_version)); > >c->createDiagnostics(new clang::TextDiagnosticPrinter( >*new raw_string_ostream(r_log), > @@ -225,7 +225,9 @@ namespace { >c.getPreprocessorOpts().Includes.push_back("clc/clc.h"); > >// Add definition for the OpenCL version > - c.getPreprocessorOpts().addMacroDef("__OPENCL_VERSION__=110"); > + c.getPreprocessorOpts().addMacroDef("__OPENCL_VERSION__=" + > + std::to_string(get_cl_version( > + dev.device_version()).version_number)); > >// clc.h requires that this macro be defined: > > c.getPreprocessorOpts().addMacroDef("cl_clang_storage_class_specifiers"); > -- > 2.14.1 > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 5/5] i965/miptree: Don't gtt map from map_depthstencil
On Tue, Jan 09, 2018 at 11:17:02PM -0800, Scott D Phillips wrote: > Instead of gtt mapping, call out to other map functions (map_map > or map_tiled_memcpy) for the depth surface. Removes a place where > gtt mapping is used. > --- > This is a bit icky, perhaps something like mapping z_mt with > BRW_MAP_DIRECT_BIT could be cleaner (but in that case the > depthstencil mapping and the DIRECT one would fight for the map > slot in mt->level[level].slice[slice].map). > > src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 48 > +-- > 1 file changed, 30 insertions(+), 18 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > index fa4ae06399..0b9aafe205 100644 > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > @@ -3460,16 +3460,21 @@ intel_miptree_map_depthstencil(struct brw_context > *brw, > * temporary buffer back out. > */ > if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) { > + struct intel_miptree_map z_mt_map = { > + .mode = map->mode & ~GL_MAP_WRITE_BIT, .x = map->x, .y = map->y, The old paths were simpler in that they constants instead of map->mode. Why the change? > + .w = map->w, .h = map->h, > + }; > + if (z_mt->surf.tiling == ISL_TILING_LINEAR) > + intel_miptree_map_map(brw, z_mt, &z_mt_map, level, slice); > + else > + intel_miptree_map_tiled_memcpy(brw, z_mt, &z_mt_map, level, slice); > + uint32_t *z_map = z_mt_map.ptr; >uint32_t *packed_map = map->ptr; >uint8_t *s_map = intel_miptree_map_raw(brw, s_mt, GL_MAP_READ_BIT); > - uint32_t *z_map = intel_miptree_map_raw(brw, z_mt, GL_MAP_READ_BIT); >unsigned int s_image_x, s_image_y; > - unsigned int z_image_x, z_image_y; > >intel_miptree_get_image_offset(s_mt, level, slice, >&s_image_x, &s_image_y); > - intel_miptree_get_image_offset(z_mt, level, slice, > - &z_image_x, &z_image_y); > >for (uint32_t y = 0; y < map->h; y++) { >for (uint32_t x = 0; x < map->w; x++) { > @@ -3478,9 +3483,7 @@ intel_miptree_map_depthstencil(struct brw_context *brw, >map_x + s_image_x, >map_y + s_image_y, >brw->has_swizzling); > - ptrdiff_t z_offset = ((map_y + z_image_y) * > - (z_mt->surf.row_pitch / 4) + > - (map_x + z_image_x)); > + ptrdiff_t z_offset = y * (z_mt_map.stride / 4) + x; > uint8_t s = s_map[s_offset]; > uint32_t z = z_map[z_offset]; > > @@ -3494,12 +3497,15 @@ intel_miptree_map_depthstencil(struct brw_context > *brw, >} > >intel_miptree_unmap_raw(s_mt); > - intel_miptree_unmap_raw(z_mt); > + if (z_mt->surf.tiling == ISL_TILING_LINEAR) > + intel_miptree_unmap_map(z_mt); > + else > + intel_miptree_unmap_tiled_memcpy(brw, z_mt, &z_mt_map, level, > slice); > >DBG("%s: %d,%d %dx%d from z mt %p %d,%d, s mt %p %d,%d = %p/%d\n", > __func__, > map->x, map->y, map->w, map->h, > - z_mt, map->x + z_image_x, map->y + z_image_y, > + z_mt, map->x, map->y, I can see this update and the similar one below leading to confusion for a user reading the debug output if they aren't aware of this change. The user may map a rectangle that's not at (level,slice) (0,0) and be surprised that the second x,y coordinate is unchanged from the first. One solution would be to instead print the level and slice for the z mt. -Nanley > s_mt, map->x + s_image_x, map->y + s_image_y, > map->ptr, map->stride); > } else { > @@ -3521,16 +3527,21 @@ intel_miptree_unmap_depthstencil(struct brw_context > *brw, > bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z_FLOAT32; > > if (map->mode & GL_MAP_WRITE_BIT) { > + struct intel_miptree_map z_mt_map = { > + .mode = map->mode | GL_MAP_INVALIDATE_RANGE_BIT, .x = map->x, > + .y = map->y, .w = map->w, .h = map->h, > + }; > + if (z_mt->surf.tiling == ISL_TILING_LINEAR) > + intel_miptree_map_map(brw, z_mt, &z_mt_map, level, slice); > + else > + intel_miptree_map_tiled_memcpy(brw, z_mt, &z_mt_map, level, slice); > + uint32_t *z_map = z_mt_map.ptr; >uint32_t *packed_map = map->ptr; >uint8_t *s_map = intel_miptree_map_raw(brw, s_mt, GL_MAP_WRITE_BIT); > - uint32_t *z_map = intel_miptree_map_raw(brw, z_mt, GL_MAP_WRITE_BIT); >unsigned int s_image_x, s_image_y; > - unsigned int z_image_x, z_image_y; > >intel_miptree_get_image_offset(s_mt, level, slice, >&s_image_x, &s_image_y); > - intel_miptree_get_image_offset(z_
Re: [Mesa-dev] [PATCH v4 1/2] anv/cmd_buffer: consider multiview masks for tracking pending clear aspects
Hi Iago, > Fixes: > dEQP-VK.multiview.readback_implicit_clear.* Applied locally and verified this. Thanks for fixing those. I have a couple of comments after reading the patch, feel free to take them only if make sense to you :-) > + /* When multiview is active, attachments with a renderpass clear > +* operation have their respective layers cleared on the first > +* subpass that uses them, and only in that subpass. We keep track > +* of this using a bitfield to indicate which layers of an attachment > +* have not been cleared yet when multiview is active. > +*/ > + uint32_t pending_clear_views; > }; (...) > + state->attachments[i].pending_clear_views = ~0; I was expecting pending_clear_views to have bit set only for the views that are being used -- i.e. it would be initalized with the combination (with the '|' operator) of all the view_masks of the subpasses. While setting all the bits to one works correctly, being more precise here could aid future debugging. > +for_each_bit(layer_idx, pending_clear_mask) { > + uint32_t layer = > + iview->planes[0].isl.base_array_layer + layer_idx; > + > + anv_image_clear_color(cmd_buffer, image, > + VK_IMAGE_ASPECT_COLOR_BIT, > + att_state->aux_usage, > + iview->planes[0].isl.format, > + iview->planes[0].isl.swizzle, > + iview->planes[0].isl.base_level, > + layer, 1, > + render_area, > + > vk_to_isl_color(att_state->clear_value.color)); > + > + att_state->pending_clear_views &= ~(1 << layer_idx); > +} Consider resetting the pending_clear_views bits all at once after the for_each_bit loop with something like att_state->pending_clear_views &= ~pending_clear_mask; (That also applies to the other patch). > @@ -3525,7 +3589,24 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer > *cmd_buffer, > } >} > > - att_state->pending_clear_aspects = 0; > + /* If multiview is enabled, then we are only done clearing when we no > + * longer have pending layers to clear, or when we have processed the > + * last subpass that uses this attachment. > + */ > + if (!is_multiview) { > + att_state->pending_clear_aspects = 0; > + } else if (att_state->pending_clear_views == 0) { > + att_state->pending_clear_aspects = 0; > + } else { > + uint32_t last_subpass_idx = > +cmd_state->pass->attachments[a].last_subpass_idx; > + const struct anv_subpass *last_subpass = > +&cmd_state->pass->subpasses[last_subpass_idx]; > + if (last_subpass == cmd_state->subpass) { > +att_state->pending_clear_aspects = 0; > + } > + } > + Consider extracting the "last subpass for this attachment" condition into a local variable or a function, and make a single if with the combinations of the conditions. Thanks, Caio ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] i965: android: pass the correct header path to brw_oa.py
$(intermediates) is somehow different from $(dir $@). We were also passing the xml files twice :/ Fixes: 2d2b15fbcab ("i965: fix autotools/android build") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105634 Signed-off-by: Lionel Landwerlin Tested-by: Mark Janes --- src/mesa/drivers/dri/i965/Android.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/Android.mk b/src/mesa/drivers/dri/i965/Android.mk index 8c4a613bcf3..9880fa53330 100644 --- a/src/mesa/drivers/dri/i965/Android.mk +++ b/src/mesa/drivers/dri/i965/Android.mk @@ -315,7 +315,7 @@ i965_oa_xml_FILES := $(addprefix $(LOCAL_PATH)/, \ $(intermediates)/brw_oa_metrics.c: $(LOCAL_PATH)/brw_oa.py $(i965_oa_xml_FILES) @echo "target Generated: $(PRIVATE_MODULE) <= $(notdir $(@))" @mkdir -p $(dir $@) - $(hide) $(MESA_PYTHON2) $< --code=$@ $(i965_oa_xml_FILES) --header=$@ $(i965_oa_xml_FILES) + $(hide) $(MESA_PYTHON2) $< --code=$@ --header=$(dir $@)/brw_oa_metrics.h $(i965_oa_xml_FILES) $(intermediates)/brw_oa_metrics.h: $(intermediates)/brw_oa_metrics.c -- 2.16.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3] i965: android: pass the correct header name to the python script
Thanks Mark, There is one thing I cannot explain why Emil's patch : How comes it is : --code=out/target/product/androidia_64/gen/SHARED_LIBRARIES/i965_dri_intermediates/brw_oa_metrics.c Yet : --header=out/target/common/obj/PACKAGING/boot-jars-package-check_intermediates/brw_oa_metrics.h Those paths should be the same no? Unless what we want is : --header=$(dir $@)/brw_oa_metrics.h On 20/03/18 21:39, Mark Janes wrote: I tested this, and still got an error: FAILED: out/target/product/androidia_64/gen/SHARED_LIBRARIES/i965_dri_intermediates/brw_oa_metrics.c /bin/bash -c "python vendor/intel/external/android_ia/mesa/src/mesa/drivers/dri/i965/brw_oa.py --code=out/target/product/androidia_64/gen/SHARED_LIBRARIES/i965_dri_intermediates/brw_oa_metrics.c --header=out/target/common/obj/PACKAGING/boot-jars-package-check_intermediates/brw_oa_metrics.h vendor/intel/external/android_ia/mesa/src/mesa/drivers/dri/i965/brw_oa_hsw.xml vendor/intel/external/android_ia/mesa/src/mesa/drivers/dri/i965/brw_oa_bdw.xml vendor/intel/external/android_ia/mesa/src/mesa/drivers/dri/i965/brw_oa_chv.xml vendor/intel/external/android_ia/mesa/src/mesa/drivers/dri/i965/brw_oa_sklgt2.xml vendor/intel/external/android_ia/mesa/src/mesa/drivers/dri/i965/brw_oa_sklgt3.xml vendor/intel/external/android_ia/mesa/src/mesa/drivers/dri/i965/brw_oa_sklgt4.xml vendor/intel/external/android_ia/mesa/src/mesa/drivers/dri/i965/brw_oa_bxt.xml vendor/intel/external/android_ia/mesa/src/mesa/drivers/dri/i965/brw_oa_kblgt2.xml vendor/intel/external/android_ia/mesa/src/mesa/drivers/dri/i965/brw_oa_kblgt3.xml vendor/intel/external/android_ia/mesa/src/mesa/drivers/dri/i965/brw_oa_glk.xml vendor/intel/external/android_ia/mesa/src/mesa/drivers/dri/i965/brw_oa_cflgt2.xml vendor/intel/external/android_ia/mesa/src/mesa/drivers/dri/i965/brw_oa_cflgt3.xml" Traceback (most recent call last): File "vendor/intel/external/android_ia/mesa/src/mesa/drivers/dri/i965/brw_oa.py", line 734, in main() File "vendor/intel/external/android_ia/mesa/src/mesa/drivers/dri/i965/brw_oa.py", line 561, in main header_file = open(args.header, 'w') IOError: [Errno 2] No such file or directory: 'out/target/common/obj/PACKAGING/boot-jars-package-check_intermediates/brw_oa_metrics.h' Emil Velikov writes: v2: Pass the actual filename instead of $(word ...) magic v3: Drop duplicate $(i965_oa_xml_FILES) Fixes: 2d2b15fbcab ("i965: fix autotools/android build Cc: Lionel Landwerlin Cc: Clayton Craft Signed-off-by: Emil Velikov --- src/mesa/drivers/dri/i965/Android.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/Android.mk b/src/mesa/drivers/dri/i965/Android.mk index 8c4a613bcf3..a1738a8afa2 100644 --- a/src/mesa/drivers/dri/i965/Android.mk +++ b/src/mesa/drivers/dri/i965/Android.mk @@ -315,7 +315,7 @@ i965_oa_xml_FILES := $(addprefix $(LOCAL_PATH)/, \ $(intermediates)/brw_oa_metrics.c: $(LOCAL_PATH)/brw_oa.py $(i965_oa_xml_FILES) @echo "target Generated: $(PRIVATE_MODULE) <= $(notdir $(@))" @mkdir -p $(dir $@) - $(hide) $(MESA_PYTHON2) $< --code=$@ $(i965_oa_xml_FILES) --header=$@ $(i965_oa_xml_FILES) + $(hide) $(MESA_PYTHON2) $< --code=$@ --header=$(intermediates)/brw_oa_metrics.h $(i965_oa_xml_FILES) $(intermediates)/brw_oa_metrics.h: $(intermediates)/brw_oa_metrics.c -- 2.16.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3] i965: android: pass the correct header name to the python script
I tested this, and still got an error: FAILED: out/target/product/androidia_64/gen/SHARED_LIBRARIES/i965_dri_intermediates/brw_oa_metrics.c /bin/bash -c "python vendor/intel/external/android_ia/mesa/src/mesa/drivers/dri/i965/brw_oa.py --code=out/target/product/androidia_64/gen/SHARED_LIBRARIES/i965_dri_intermediates/brw_oa_metrics.c --header=out/target/common/obj/PACKAGING/boot-jars-package-check_intermediates/brw_oa_metrics.h vendor/intel/external/android_ia/mesa/src/mesa/drivers/dri/i965/brw_oa_hsw.xml vendor/intel/external/android_ia/mesa/src/mesa/drivers/dri/i965/brw_oa_bdw.xml vendor/intel/external/android_ia/mesa/src/mesa/drivers/dri/i965/brw_oa_chv.xml vendor/intel/external/android_ia/mesa/src/mesa/drivers/dri/i965/brw_oa_sklgt2.xml vendor/intel/external/android_ia/mesa/src/mesa/drivers/dri/i965/brw_oa_sklgt3.xml vendor/intel/external/android_ia/mesa/src/mesa/drivers/dri/i965/brw_oa_sklgt4.xml vendor/intel/external/android_ia/mesa/src/mesa/drivers/dri/i965/brw_oa_bxt.xml vendor/intel/external/android_ia/mesa/src/mesa/drivers/dri/i965/brw_oa_kblgt2.xml vendor/intel/external/android_ia/mesa/src/mesa/drivers/dri/i965/brw_oa_kblgt3.xml vendor/intel/external/android_ia/mesa/src/mesa/drivers/dri/i965/brw_oa_glk.xml vendor/intel/external/android_ia/mesa/src/mesa/drivers/dri/i965/brw_oa_cflgt2.xml vendor/intel/external/android_ia/mesa/src/mesa/drivers/dri/i965/brw_oa_cflgt3.xml" Traceback (most recent call last): File "vendor/intel/external/android_ia/mesa/src/mesa/drivers/dri/i965/brw_oa.py", line 734, in main() File "vendor/intel/external/android_ia/mesa/src/mesa/drivers/dri/i965/brw_oa.py", line 561, in main header_file = open(args.header, 'w') IOError: [Errno 2] No such file or directory: 'out/target/common/obj/PACKAGING/boot-jars-package-check_intermediates/brw_oa_metrics.h' Emil Velikov writes: > v2: Pass the actual filename instead of $(word ...) magic > v3: Drop duplicate $(i965_oa_xml_FILES) > > Fixes: 2d2b15fbcab ("i965: fix autotools/android build > Cc: Lionel Landwerlin > Cc: Clayton Craft > Signed-off-by: Emil Velikov > --- > src/mesa/drivers/dri/i965/Android.mk | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/mesa/drivers/dri/i965/Android.mk > b/src/mesa/drivers/dri/i965/Android.mk > index 8c4a613bcf3..a1738a8afa2 100644 > --- a/src/mesa/drivers/dri/i965/Android.mk > +++ b/src/mesa/drivers/dri/i965/Android.mk > @@ -315,7 +315,7 @@ i965_oa_xml_FILES := $(addprefix $(LOCAL_PATH)/, \ > $(intermediates)/brw_oa_metrics.c: $(LOCAL_PATH)/brw_oa.py > $(i965_oa_xml_FILES) > @echo "target Generated: $(PRIVATE_MODULE) <= $(notdir $(@))" > @mkdir -p $(dir $@) > - $(hide) $(MESA_PYTHON2) $< --code=$@ $(i965_oa_xml_FILES) --header=$@ > $(i965_oa_xml_FILES) > + $(hide) $(MESA_PYTHON2) $< --code=$@ > --header=$(intermediates)/brw_oa_metrics.h $(i965_oa_xml_FILES) > > $(intermediates)/brw_oa_metrics.h: $(intermediates)/brw_oa_metrics.c > > -- > 2.16.0 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2] radv: fix scanning output_usage_mask with structs
To fix a regression in: dEQP-VK.spirv_assembly.instruction.graphics.variable_init.output.struct v2: handle indirect array accesses (Dave) Fixes: f3275ca01c ("ac/nir: only enable used channels when exporting parameters") Signed-off-by: Samuel Pitoiset --- src/amd/vulkan/radv_shader_info.c | 65 --- 1 file changed, 61 insertions(+), 4 deletions(-) diff --git a/src/amd/vulkan/radv_shader_info.c b/src/amd/vulkan/radv_shader_info.c index 9c18791524..9934f779bd 100644 --- a/src/amd/vulkan/radv_shader_info.c +++ b/src/amd/vulkan/radv_shader_info.c @@ -47,6 +47,52 @@ static void mark_tess_output(struct radv_shader_info *info, info->tcs.outputs_written |= (mask << param); } +static void get_deref_offset(nir_deref_var *deref, unsigned *const_out, +bool *use_indirect_deref) +{ + nir_deref *tail = &deref->deref; + unsigned const_offset = 0; + + if (deref->var->data.compact) { + assert(tail->child->deref_type == nir_deref_type_array); + assert(glsl_type_is_scalar(glsl_without_array(deref->var->type))); + + nir_deref_array *deref_array = nir_deref_as_array(tail->child); + /* We always lower indirect dereferences for "compact" array vars. */ + assert(deref_array->deref_array_type == nir_deref_array_type_direct); + + *const_out = deref_array->base_offset; + return; + } + + while (tail->child != NULL) { + const struct glsl_type *parent_type = tail->type; + tail = tail->child; + + if (tail->deref_type == nir_deref_type_array) { + nir_deref_array *deref_array = nir_deref_as_array(tail); + unsigned size = glsl_count_attribute_slots(tail->type, false); + + const_offset += size * deref_array->base_offset; + if (deref_array->deref_array_type == nir_deref_array_type_direct) + continue; + + assert(deref_array->deref_array_type == nir_deref_array_type_indirect); + *use_indirect_deref = true; + } else if (tail->deref_type == nir_deref_type_struct) { + nir_deref_struct *deref_struct = nir_deref_as_struct(tail); + + for (unsigned i = 0; i < deref_struct->index; i++) { + const struct glsl_type *ft = glsl_get_struct_field(parent_type, i); + const_offset += glsl_count_attribute_slots(ft, false); + } + } else + unreachable("unsupported deref type"); + } + + *const_out = const_offset; +} + static void gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr, struct radv_shader_info *info) @@ -176,13 +222,24 @@ gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr, if (var->data.mode == nir_var_shader_out) { unsigned idx = var->data.location; unsigned comp = var->data.location_frac; + bool use_indirect_deref = false; + unsigned const_offset = 0; + unsigned writemask; + + get_deref_offset(dvar, &const_offset, &use_indirect_deref); + + writemask = instr->const_index[0] << comp; + if (use_indirect_deref) { + /* Make sure to enable all channels for +* indirect array accesses. +*/ + writemask = 0xf; + } if (nir->info.stage == MESA_SHADER_VERTEX) { - info->vs.output_usage_mask[idx] |= - instr->const_index[0] << comp; + info->vs.output_usage_mask[idx + const_offset] |= writemask; } else if (nir->info.stage == MESA_SHADER_TESS_EVAL) { - info->tes.output_usage_mask[idx] |= - instr->const_index[0] << comp; + info->tes.output_usage_mask[idx + const_offset] |= writemask; } else if (nir->info.stage == MESA_SHADER_TESS_CTRL) { unsigned param = shader_io_get_unique_index(idx); const struct glsl_type *type = var->type; -- 2.16.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] i965: fix android build
This is the equivalent of commit 5770e1d89e0eb49eb3c9547e8657d636b6e7e5d7 for android. Signed-off-by: Lionel Landwerlin Fixes: 2d2b15fbcab ("i965: fix autotools/android build") --- src/mesa/drivers/dri/i965/Android.mk | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/Android.mk b/src/mesa/drivers/dri/i965/Android.mk index 8c4a613bcf3..ac2f2346ed1 100644 --- a/src/mesa/drivers/dri/i965/Android.mk +++ b/src/mesa/drivers/dri/i965/Android.mk @@ -315,7 +315,10 @@ i965_oa_xml_FILES := $(addprefix $(LOCAL_PATH)/, \ $(intermediates)/brw_oa_metrics.c: $(LOCAL_PATH)/brw_oa.py $(i965_oa_xml_FILES) @echo "target Generated: $(PRIVATE_MODULE) <= $(notdir $(@))" @mkdir -p $(dir $@) - $(hide) $(MESA_PYTHON2) $< --code=$@ $(i965_oa_xml_FILES) --header=$@ $(i965_oa_xml_FILES) + $(hide) $(MESA_PYTHON2) $< \ + --code=$@ $(i965_oa_xml_FILES) \ + --header=$@ \ + $(i965_oa_xml_FILES:%=$(srcdir)/%) $(intermediates)/brw_oa_metrics.h: $(intermediates)/brw_oa_metrics.c -- 2.16.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3] i965/miptree: Use cpu tiling/detiling when mapping
On Wed, Mar 14, 2018 at 05:18:58PM +, Chris Wilson wrote: > Quoting Nanley Chery (2018-03-14 17:14:15) > > On Mon, Mar 12, 2018 at 10:52:55AM -0700, Scott D Phillips wrote: > > > Rename the (un)map_gtt functions to (un)map_map (map by > > > returning a map) and add new functions (un)map_tiled_memcpy that > > > return a shadow buffer populated with the intel_tiled_memcpy > > > functions. > > > > > > Tiling/detiling with the cpu will be the only way to handle Yf/Ys > > > tiling, when support is added for those formats. > > > > > > v2: Compute extents properly in the x|y-rounded-down case (Chris Wilson) > > > > > > v3: Add units to parameter names of tile_extents (Nanley Chery) > > > Use _mesa_align_malloc for the shadow copy (Nanley) > > > Continue using gtt maps on gen4 (Nanley) > > > --- > > > src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 94 > > > --- > > > 1 file changed, 86 insertions(+), 8 deletions(-) > > > > > > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > > > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > > > index c6213b21629..fba17bf5b7b 100644 > > > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > > > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > > > @@ -31,6 +31,7 @@ > > > #include "intel_image.h" > > > #include "intel_mipmap_tree.h" > > > #include "intel_tex.h" > > > +#include "intel_tiled_memcpy.h" > > > #include "intel_blit.h" > > > #include "intel_fbo.h" > > > > > > @@ -3046,10 +3047,10 @@ intel_miptree_unmap_raw(struct intel_mipmap_tree > > > *mt) > > > } > > > > > > static void > > > -intel_miptree_map_gtt(struct brw_context *brw, > > > - struct intel_mipmap_tree *mt, > > > - struct intel_miptree_map *map, > > > - unsigned int level, unsigned int slice) > > > +intel_miptree_map_map(struct brw_context *brw, > > > + struct intel_mipmap_tree *mt, > > > + struct intel_miptree_map *map, > > > + unsigned int level, unsigned int slice) > > > { > > > unsigned int bw, bh; > > > void *base; > > > @@ -3093,11 +3094,81 @@ intel_miptree_map_gtt(struct brw_context *brw, > > > } > > > > > > static void > > > -intel_miptree_unmap_gtt(struct intel_mipmap_tree *mt) > > > +intel_miptree_unmap_map(struct intel_mipmap_tree *mt) > > > { > > > intel_miptree_unmap_raw(mt); > > > } > > > > > > +/* Compute extent parameters for use with tiled_memcpy functions. > > > + * xs are in units of bytes and ys are in units of strides. */ > > > +static inline void > > > +tile_extents(struct intel_mipmap_tree *mt, struct intel_miptree_map *map, > > > + unsigned int level, unsigned int slice, unsigned int *x1_B, > > > + unsigned int *x2_B, unsigned int *y1_el, unsigned int > > > *y2_el) > > > +{ > > > + unsigned int block_width, block_height; > > > + unsigned int x0_el, y0_el; > > > + > > > + _mesa_get_format_block_size(mt->format, &block_width, &block_height); > > > + > > > + assert(map->x % block_width == 0); > > > + assert(map->y % block_height == 0); > > > + > > > + intel_miptree_get_image_offset(mt, level, slice, &x0_el, &y0_el); > > > + *x1_B = (map->x / block_width + x0_el) * mt->cpp; > > > + *y1_el = map->y / block_height + y0_el; > > > + *x2_B = (DIV_ROUND_UP(map->x + map->w, block_width) + x0_el) * > > > mt->cpp; > > > + *y2_el = DIV_ROUND_UP(map->y + map->h, block_height) + y0_el; > > > +} > > > + > > > +static void > > > +intel_miptree_map_tiled_memcpy(struct brw_context *brw, > > > + struct intel_mipmap_tree *mt, > > > + struct intel_miptree_map *map, > > > + unsigned int level, unsigned int slice) > > > +{ > > > + unsigned int x1, x2, y1, y2; > > > + tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2); > > > + map->stride = _mesa_format_row_stride(mt->format, map->w); > > > + map->buffer = map->ptr = _mesa_align_malloc(map->stride * (y2 - y1), > > > 16); > > > + > > > + assert(map->ptr); > > > + > > > + if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) { > > > > It looks like we'll generate extra copies using this function, but only > > in a few corner cases. I think the following places should be using the > > INVALIDATE flag, but aren't: > > * _mesa_store_cleartexsubimage > > * generate_mipmap_uncompressed > > > > > + char *src = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW); > > > + src += mt->offset; > > > + > > > > It seems possible that the buffer object had a WC memory type during > > rendering. In that case, we need an sfence here right? > > > > This stuff is pretty new to me, so perhaps others would like to chime > > in. > > From talking to Ben on IRC and through my reading of the following section in the HW docs: Memory Types and Applicability to GFX, it seems that WC from GFX perspecitive is basically UC and doesn't use WC buffers: Write Combining
Re: [Mesa-dev] [PATCH] i965/tiled_memcpy: realign rgba8_copy_aligned_dst stack in 32-bit builds
Quoting Scott D Phillips (2018-03-20 20:39:25) > When building intel_tiled_memcpy for i686, the stack will only be > 4-byte aligned. This isn't sufficient for SSE temporaries which > require 16-byte alignment. Use the force_align_arg_pointer > function attribute in that case to ensure sufficient alignment. > --- > src/mesa/drivers/dri/i965/intel_tiled_memcpy.c | 8 +++- > 1 file changed, 7 insertions(+), 1 deletion(-) > > diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c > b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c > index 69306828d72..bd8bafbd2d7 100644 > --- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c > +++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c > @@ -42,6 +42,12 @@ > #include > #endif > > +#if defined(__GNUC__) && defined(__i386__) && (defined(__SSSE3__) || > defined(__SSE2__)) > +#define REALIGN __attribute__((force_align_arg_pointer)) > +#else > +#define REALIGN > +#endif It would be a harmless no-op on x86-64 (or essential ;) > + > #define FILE_DEBUG_FLAG DEBUG_TEXTURE > > #define ALIGN_DOWN(a, b) ROUND_DOWN_TO(a, b) > @@ -156,7 +162,7 @@ rgba8_copy_16_aligned_src(void *dst, const void *src) > /** > * Copy RGBA to BGRA - swap R and B, with the destination 16-byte aligned. > */ > -static inline void * > +static REALIGN inline void * > rgba8_copy_aligned_dst(void *dst, const void *src, size_t bytes) > { > assert(bytes == 0 || !(((uintptr_t)dst) & 0xf)); Hmm, if aligned_dst is spilling to stack, so would be aligned_src. As these are supposed to be inlined (and constant folded), do you not want to realign the callers instead? Perhaps with an explicit FLATTEN. -Chris ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] i965/tiled_memcpy: realign rgba8_copy_aligned_dst stack in 32-bit builds
When building intel_tiled_memcpy for i686, the stack will only be 4-byte aligned. This isn't sufficient for SSE temporaries which require 16-byte alignment. Use the force_align_arg_pointer function attribute in that case to ensure sufficient alignment. --- src/mesa/drivers/dri/i965/intel_tiled_memcpy.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c index 69306828d72..bd8bafbd2d7 100644 --- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c +++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c @@ -42,6 +42,12 @@ #include #endif +#if defined(__GNUC__) && defined(__i386__) && (defined(__SSSE3__) || defined(__SSE2__)) +#define REALIGN __attribute__((force_align_arg_pointer)) +#else +#define REALIGN +#endif + #define FILE_DEBUG_FLAG DEBUG_TEXTURE #define ALIGN_DOWN(a, b) ROUND_DOWN_TO(a, b) @@ -156,7 +162,7 @@ rgba8_copy_16_aligned_src(void *dst, const void *src) /** * Copy RGBA to BGRA - swap R and B, with the destination 16-byte aligned. */ -static inline void * +static REALIGN inline void * rgba8_copy_aligned_dst(void *dst, const void *src, size_t bytes) { assert(bytes == 0 || !(((uintptr_t)dst) & 0xf)); -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3] i965: android: pass the correct header name to the python script
On 20/03/18 19:34, Emil Velikov wrote: v2: Pass the actual filename instead of $(word ...) magic v3: Drop duplicate $(i965_oa_xml_FILES) Fixes: 2d2b15fbcab ("i965: fix autotools/android build Cc: Lionel Landwerlin Cc: Clayton Craft Signed-off-by: Emil Velikov --- src/mesa/drivers/dri/i965/Android.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/Android.mk b/src/mesa/drivers/dri/i965/Android.mk index 8c4a613bcf3..a1738a8afa2 100644 --- a/src/mesa/drivers/dri/i965/Android.mk +++ b/src/mesa/drivers/dri/i965/Android.mk @@ -315,7 +315,7 @@ i965_oa_xml_FILES := $(addprefix $(LOCAL_PATH)/, \ $(intermediates)/brw_oa_metrics.c: $(LOCAL_PATH)/brw_oa.py $(i965_oa_xml_FILES) @echo "target Generated: $(PRIVATE_MODULE) <= $(notdir $(@))" @mkdir -p $(dir $@) - $(hide) $(MESA_PYTHON2) $< --code=$@ $(i965_oa_xml_FILES) --header=$@ $(i965_oa_xml_FILES) + $(hide) $(MESA_PYTHON2) $< --code=$@ --header=$(intermediates)/brw_oa_metrics.h $(i965_oa_xml_FILES) Don't we need the srcdir trick here too? It seems to be the problem on android too. $(intermediates)/brw_oa_metrics.h: $(intermediates)/brw_oa_metrics.c ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: fix out of tree autotools build
Hi, Looks good and helps here a lot! Thanks! Reviewed-by: Mathias Fröhlich Mathias On Tuesday, 20 March 2018 19:59:00 CET Lionel Landwerlin wrote: > Fixes: 2d2b15fbcab ("i965: fix autotools/android build") > Signed-off-by: Lionel Landwerlin > --- > src/mesa/drivers/dri/i965/Makefile.am | 5 - > 1 file changed, 4 insertions(+), 1 deletion(-) > > diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/ i965/Makefile.am > index fe106b4257c..889d4c68a2b 100644 > --- a/src/mesa/drivers/dri/i965/Makefile.am > +++ b/src/mesa/drivers/dri/i965/Makefile.am > @@ -115,6 +115,9 @@ EXTRA_DIST = \ > meson.build > > brw_oa_metrics.c: brw_oa.py $(i965_oa_xml_FILES) > - $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/brw_oa.py --code=$(builddir)/ brw_oa_metrics.c --header=$(builddir)/brw_oa_metrics.h $(i965_oa_xml_FILES) > + $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/brw_oa.py \ > + --code=$(builddir)/brw_oa_metrics.c \ > + --header=$(builddir)/brw_oa_metrics.h \ > + $(i965_oa_xml_FILES:%=$(srcdir)/%) > > brw_oa_metrics.h: brw_oa_metrics.c > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: fix out of tree autotools build
On 20 March 2018 at 18:59, Lionel Landwerlin wrote: > Fixes: 2d2b15fbcab ("i965: fix autotools/android build") > Signed-off-by: Lionel Landwerlin > --- > src/mesa/drivers/dri/i965/Makefile.am | 5 - > 1 file changed, 4 insertions(+), 1 deletion(-) > > diff --git a/src/mesa/drivers/dri/i965/Makefile.am > b/src/mesa/drivers/dri/i965/Makefile.am > index fe106b4257c..889d4c68a2b 100644 > --- a/src/mesa/drivers/dri/i965/Makefile.am > +++ b/src/mesa/drivers/dri/i965/Makefile.am > @@ -115,6 +115,9 @@ EXTRA_DIST = \ > meson.build > > brw_oa_metrics.c: brw_oa.py $(i965_oa_xml_FILES) > - $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/brw_oa.py > --code=$(builddir)/brw_oa_metrics.c --header=$(builddir)/brw_oa_metrics.h > $(i965_oa_xml_FILES) > + $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/brw_oa.py \ > + --code=$(builddir)/brw_oa_metrics.c \ > + --header=$(builddir)/brw_oa_metrics.h \ > + $(i965_oa_xml_FILES:%=$(srcdir)/%) > Err right - srcdir is optional in the dependency line, although on during invocation it must be present. Reviewed-by: Emil Velikov -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v3] i965: android: pass the correct header name to the python script
v2: Pass the actual filename instead of $(word ...) magic v3: Drop duplicate $(i965_oa_xml_FILES) Fixes: 2d2b15fbcab ("i965: fix autotools/android build Cc: Lionel Landwerlin Cc: Clayton Craft Signed-off-by: Emil Velikov --- src/mesa/drivers/dri/i965/Android.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/Android.mk b/src/mesa/drivers/dri/i965/Android.mk index 8c4a613bcf3..a1738a8afa2 100644 --- a/src/mesa/drivers/dri/i965/Android.mk +++ b/src/mesa/drivers/dri/i965/Android.mk @@ -315,7 +315,7 @@ i965_oa_xml_FILES := $(addprefix $(LOCAL_PATH)/, \ $(intermediates)/brw_oa_metrics.c: $(LOCAL_PATH)/brw_oa.py $(i965_oa_xml_FILES) @echo "target Generated: $(PRIVATE_MODULE) <= $(notdir $(@))" @mkdir -p $(dir $@) - $(hide) $(MESA_PYTHON2) $< --code=$@ $(i965_oa_xml_FILES) --header=$@ $(i965_oa_xml_FILES) + $(hide) $(MESA_PYTHON2) $< --code=$@ --header=$(intermediates)/brw_oa_metrics.h $(i965_oa_xml_FILES) $(intermediates)/brw_oa_metrics.h: $(intermediates)/brw_oa_metrics.c -- 2.16.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2] docs: add 17.3.{8, 9} in the release calendar
On 19 March 2018 at 15:28, Juan A. Suarez Romero wrote: > Mesa 18.0 series has not been released yet, so let's extend 17.3 lifetime. > Any moment now ;-) > v2: add 17.3.9 in the calendar (Andres Gomez) > > CC: Andres Gomez > CC: Emil Velikov > --- Reviewed-by: Emil Velikov -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] i965: fix out of tree autotools build
Fixes: 2d2b15fbcab ("i965: fix autotools/android build") Signed-off-by: Lionel Landwerlin --- src/mesa/drivers/dri/i965/Makefile.am | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am index fe106b4257c..889d4c68a2b 100644 --- a/src/mesa/drivers/dri/i965/Makefile.am +++ b/src/mesa/drivers/dri/i965/Makefile.am @@ -115,6 +115,9 @@ EXTRA_DIST = \ meson.build brw_oa_metrics.c: brw_oa.py $(i965_oa_xml_FILES) - $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/brw_oa.py --code=$(builddir)/brw_oa_metrics.c --header=$(builddir)/brw_oa_metrics.h $(i965_oa_xml_FILES) + $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/brw_oa.py \ + --code=$(builddir)/brw_oa_metrics.c \ + --header=$(builddir)/brw_oa_metrics.h \ + $(i965_oa_xml_FILES:%=$(srcdir)/%) brw_oa_metrics.h: brw_oa_metrics.c -- 2.16.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Releasing 18.0
On 15 March 2018 at 17:01, Marek Olšák wrote: > Hi, > > I'd like to have these in 18.0. People can fetch them here: >git://people.freedesktop.org/~mareko/mesa for-18.0 > > They are already in our AMD internal 18.0 branch, so they get quite a lot of > testing. > > pick 0554172 radeonsi: align command buffer starting address to fix some > Raven hangs > pick e0d3ebc st/mesa: expose 0 shader binary formats for compat profiles for > Qt > pick 5d1a012 radeonsi: fix vertex buffer address computation with full > 64-bit addresses > pick 79ad993 radeonsi: prevent a negative buffer offset in > si_upload_descriptors > pick 0dd180a radeonsi: add a workaround for GFX9 hang with init_config > alignment > pick b4c156b st/dri: fix OpenGL-OpenCL interop for GL_TEXTURE_BUFFER > pick 4edf31d configure.ac: blacklist libdrm 2.4.90 > # All these are critical fixes. > All of those will be in 18.0 > pick a3ac60e ac: move address space definitions to common code > pick 572d46d ac: Use the renumbered const address space for LLVM 7. > # Both are convenient for AMD devs testing 18.0 with LLVM master. The latter > depends on the former. > Temporarily I've left these out. As devs reach consensus we'll handle them accordingly. Thanks Emil P.S. The 18.0 should be out shortly - there were some regression which needed to be addressed. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH kmscube] cube-tex: make use of modifiers
On 20 March 2018 at 18:02, Christian Gmeiner wrote: > Fixes rendering issues with mode rgba on etnaviv. I have applied > the same change for nv12 variants but they are not supported on > etnaviv. > > Signed-off-by: Christian Gmeiner > --- > cube-tex.c | 32 > 1 file changed, 24 insertions(+), 8 deletions(-) > > diff --git a/cube-tex.c b/cube-tex.c > index 9e38ae8..dba19ff 100644 > --- a/cube-tex.c > +++ b/cube-tex.c > @@ -213,7 +213,7 @@ static const char *fragment_shader_source_2img = > > static const uint32_t texw = 512, texh = 512; > > -static int get_fd_rgba(uint32_t *pstride) > +static int get_fd_rgba(uint32_t *pstride, uint64_t *modifier) > { > struct gbm_bo *bo; > void *map_data = NULL; > @@ -234,6 +234,7 @@ static int get_fd_rgba(uint32_t *pstride) > gbm_bo_unmap(bo, map_data); > > fd = gbm_bo_get_fd(bo); > + *modifier = gbm_bo_get_modifier(bo); > Based on the existing gbm_bo_get_modifier handling there should be a guard here: #ifdef HAVE_GBM_MODIFIERS *modifier = ... #else *modifier = DRM_FORMAT_MOD_INVALID; #endif > /* we have the fd now, no longer need the bo: */ > gbm_bo_destroy(bo); > @@ -243,7 +244,7 @@ static int get_fd_rgba(uint32_t *pstride) > return fd; > } > > -static int get_fd_y(uint32_t *pstride) > +static int get_fd_y(uint32_t *pstride, uint64_t *modifier) > { > struct gbm_bo *bo; > void *map_data = NULL; > @@ -264,6 +265,7 @@ static int get_fd_y(uint32_t *pstride) > gbm_bo_unmap(bo, map_data); > > fd = gbm_bo_get_fd(bo); > + *modifier = gbm_bo_get_modifier(bo); > Ditto > /* we have the fd now, no longer need the bo: */ > gbm_bo_destroy(bo); > @@ -273,7 +275,7 @@ static int get_fd_y(uint32_t *pstride) > return fd; > } > > -static int get_fd_uv(uint32_t *pstride) > +static int get_fd_uv(uint32_t *pstride, uint64_t *modifier) > { > struct gbm_bo *bo; > void *map_data = NULL; > @@ -294,6 +296,7 @@ static int get_fd_uv(uint32_t *pstride) > gbm_bo_unmap(bo, map_data); > > fd = gbm_bo_get_fd(bo); > + *modifier = gbm_bo_get_modifier(bo); > And again? > /* we have the fd now, no longer need the bo: */ > gbm_bo_destroy(bo); > @@ -306,7 +309,8 @@ static int get_fd_uv(uint32_t *pstride) > static int init_tex_rgba(void) > { > uint32_t stride; > - int fd = get_fd_rgba(&stride); > + uint64_t modifier; > + int fd = get_fd_rgba(&stride, &modifier); > const EGLint attr[] = { > EGL_WIDTH, texw, > EGL_HEIGHT, texh, > @@ -314,6 +318,8 @@ static int init_tex_rgba(void) > EGL_DMA_BUF_PLANE0_FD_EXT, fd, > EGL_DMA_BUF_PLANE0_OFFSET_EXT, 0, > EGL_DMA_BUF_PLANE0_PITCH_EXT, stride, > + EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT, modifier & 0x, > + EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT, modifier >> 32, With these attributes added, only, if the modifier is valid. Same goes for the analogous hunks through the rest of the patch. -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH kmscube] cube-tex: make use of modifiers
On Tuesday, 2018-03-20 19:02:00 +0100, Christian Gmeiner wrote: > Fixes rendering issues with mode rgba on etnaviv. I have applied > the same change for nv12 variants but they are not supported on > etnaviv. > > Signed-off-by: Christian Gmeiner LGTM! Reviewed-by: Eric Engestrom > --- > cube-tex.c | 32 > 1 file changed, 24 insertions(+), 8 deletions(-) > > diff --git a/cube-tex.c b/cube-tex.c > index 9e38ae8..dba19ff 100644 > --- a/cube-tex.c > +++ b/cube-tex.c > @@ -213,7 +213,7 @@ static const char *fragment_shader_source_2img = > > static const uint32_t texw = 512, texh = 512; > > -static int get_fd_rgba(uint32_t *pstride) > +static int get_fd_rgba(uint32_t *pstride, uint64_t *modifier) > { > struct gbm_bo *bo; > void *map_data = NULL; > @@ -234,6 +234,7 @@ static int get_fd_rgba(uint32_t *pstride) > gbm_bo_unmap(bo, map_data); > > fd = gbm_bo_get_fd(bo); > + *modifier = gbm_bo_get_modifier(bo); > > /* we have the fd now, no longer need the bo: */ > gbm_bo_destroy(bo); > @@ -243,7 +244,7 @@ static int get_fd_rgba(uint32_t *pstride) > return fd; > } > > -static int get_fd_y(uint32_t *pstride) > +static int get_fd_y(uint32_t *pstride, uint64_t *modifier) > { > struct gbm_bo *bo; > void *map_data = NULL; > @@ -264,6 +265,7 @@ static int get_fd_y(uint32_t *pstride) > gbm_bo_unmap(bo, map_data); > > fd = gbm_bo_get_fd(bo); > + *modifier = gbm_bo_get_modifier(bo); > > /* we have the fd now, no longer need the bo: */ > gbm_bo_destroy(bo); > @@ -273,7 +275,7 @@ static int get_fd_y(uint32_t *pstride) > return fd; > } > > -static int get_fd_uv(uint32_t *pstride) > +static int get_fd_uv(uint32_t *pstride, uint64_t *modifier) > { > struct gbm_bo *bo; > void *map_data = NULL; > @@ -294,6 +296,7 @@ static int get_fd_uv(uint32_t *pstride) > gbm_bo_unmap(bo, map_data); > > fd = gbm_bo_get_fd(bo); > + *modifier = gbm_bo_get_modifier(bo); > > /* we have the fd now, no longer need the bo: */ > gbm_bo_destroy(bo); > @@ -306,7 +309,8 @@ static int get_fd_uv(uint32_t *pstride) > static int init_tex_rgba(void) > { > uint32_t stride; > - int fd = get_fd_rgba(&stride); > + uint64_t modifier; > + int fd = get_fd_rgba(&stride, &modifier); > const EGLint attr[] = { > EGL_WIDTH, texw, > EGL_HEIGHT, texh, > @@ -314,6 +318,8 @@ static int init_tex_rgba(void) > EGL_DMA_BUF_PLANE0_FD_EXT, fd, > EGL_DMA_BUF_PLANE0_OFFSET_EXT, 0, > EGL_DMA_BUF_PLANE0_PITCH_EXT, stride, > + EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT, modifier & 0x, > + EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT, modifier >> 32, > EGL_NONE > }; > EGLImage img; > @@ -339,8 +345,9 @@ static int init_tex_rgba(void) > static int init_tex_nv12_2img(void) > { > uint32_t stride_y, stride_uv; > - int fd_y = get_fd_y(&stride_y); > - int fd_uv = get_fd_uv(&stride_uv); > + uint64_t modifier_y, modifier_uv; > + int fd_y = get_fd_y(&stride_y, &modifier_y); > + int fd_uv = get_fd_uv(&stride_uv, &modifier_uv); > const EGLint attr_y[] = { > EGL_WIDTH, texw, > EGL_HEIGHT, texh, > @@ -348,6 +355,8 @@ static int init_tex_nv12_2img(void) > EGL_DMA_BUF_PLANE0_FD_EXT, fd_y, > EGL_DMA_BUF_PLANE0_OFFSET_EXT, 0, > EGL_DMA_BUF_PLANE0_PITCH_EXT, stride_y, > + EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT, modifier_y & 0x, > + EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT, modifier_y >> 32, > EGL_NONE > }; > const EGLint attr_uv[] = { > @@ -357,6 +366,8 @@ static int init_tex_nv12_2img(void) > EGL_DMA_BUF_PLANE0_FD_EXT, fd_uv, > EGL_DMA_BUF_PLANE0_OFFSET_EXT, 0, > EGL_DMA_BUF_PLANE0_PITCH_EXT, stride_uv, > + EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT, modifier_uv & 0x, > + EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT, modifier_uv >> 32, > EGL_NONE > }; > EGLImage img_y, img_uv; > @@ -397,8 +408,9 @@ static int init_tex_nv12_2img(void) > static int init_tex_nv12_1img(void) > { > uint32_t stride_y, stride_uv; > - int fd_y = get_fd_y(&stride_y); > - int fd_uv = get_fd_uv(&stride_uv); > + uint64_t modifier_y, modifier_uv; > + int fd_y = get_fd_y(&stride_y, &modifier_y); > + int fd_uv = get_fd_uv(&stride_uv, &modifier_uv); > const EGLint attr[] = { > EGL_WIDTH, texw, > EGL_HEIGHT, texh, > @@ -406,9 +418,13 @@ static int init_tex_nv12_1img(void) > EGL_DMA_BUF_PLANE0_FD_EXT, fd_y, > EGL_DMA_BUF_PLANE0_OFFSET_EXT, 0, > EGL_DMA_BUF_PLANE0_PITCH_EXT, stride_y, > + EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT, modifier_y & 0x, > + EGL_DMA_B
[Mesa-dev] [Bug 105621] Build failure on GNOME Continuous
https://bugs.freedesktop.org/show_bug.cgi?id=105621 Emil Velikov changed: What|Removed |Added Resolution|--- |FIXED Status|NEW |RESOLVED --- Comment #4 from Emil Velikov --- Should be fixed with commit 28780c5028a60234e11e084777553ad70591f87d Author: Emil Velikov Date: Tue Mar 20 11:39:57 2018 + st/mesa: add compiler/nir/ prefix for nir includes -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] nir: Initial implementation of a nir_instr_worklist
Thomas Helland writes: > Make a simple worklist by basically just wrapping u_vector. > This is intended used in nir_opt_dce to reduce the number of calls > to ralloc, as we are currenlty spamming ralloc quite bad. It should > also give better cache locality and much lower memory usage. > --- > src/compiler/nir/nir_worklist.h | 69 > + > 1 file changed, 69 insertions(+) > > diff --git a/src/compiler/nir/nir_worklist.h b/src/compiler/nir/nir_worklist.h > index 39521a386c..5071c7aec1 100644 > --- a/src/compiler/nir/nir_worklist.h > +++ b/src/compiler/nir/nir_worklist.h > @@ -30,6 +30,8 @@ > #define _NIR_WORKLIST_ > > #include "nir.h" > +#include "util/set.h" > +#include "util/u_vector.h" > > #ifdef __cplusplus > extern "C" { > @@ -83,6 +85,73 @@ nir_block *nir_block_worklist_peek_tail(const > nir_block_worklist *w); > > nir_block *nir_block_worklist_pop_tail(nir_block_worklist *w); > > + > + > + Drop two of these extra newlines? Also, the "typedef struct nir_instr_worklist_node nir_instr_worklist_node;" in the second patch should probably be in this one. > +/* > + * This worklist implementation, in contrast to the block worklist, does not > + * have unique entries, meaning a nir_instr can be inserted more than once > + * into the worklist. It uses u_vector to keep the overhead and memory > + * footprint at a minimum. > + * Trailing whitespace here. Other than that, this looks great. I particularly like that you've explained the choice of the implementation details in the comments here. With these little nits fixed, both patches are: Reviewed-by: Eric Anholt signature.asc Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2] i965: android: pass the correct header name to the python script
v2: Pass the actual filename instead of $(word ...) magic Fixes: 2d2b15fbcab ("i965: fix autotools/android build Cc: Lionel Landwerlin Cc: Clayton Craft Signed-off-by: Emil Velikov --- src/mesa/drivers/dri/i965/Android.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/Android.mk b/src/mesa/drivers/dri/i965/Android.mk index 8c4a613bcf3..a5fa504bb11 100644 --- a/src/mesa/drivers/dri/i965/Android.mk +++ b/src/mesa/drivers/dri/i965/Android.mk @@ -315,7 +315,7 @@ i965_oa_xml_FILES := $(addprefix $(LOCAL_PATH)/, \ $(intermediates)/brw_oa_metrics.c: $(LOCAL_PATH)/brw_oa.py $(i965_oa_xml_FILES) @echo "target Generated: $(PRIVATE_MODULE) <= $(notdir $(@))" @mkdir -p $(dir $@) - $(hide) $(MESA_PYTHON2) $< --code=$@ $(i965_oa_xml_FILES) --header=$@ $(i965_oa_xml_FILES) + $(hide) $(MESA_PYTHON2) $< --code=$@ $(i965_oa_xml_FILES) --header=$(intermediates)/brw_oa_metrics.h $(i965_oa_xml_FILES) $(intermediates)/brw_oa_metrics.h: $(intermediates)/brw_oa_metrics.c -- 2.16.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] i965: android: pass the correct header name to the python script
Fixes: 2d2b15fbcab ("i965: fix autotools/android build Cc: Lionel Landwerlin Cc: Clayton Craft Signed-off-by: Emil Velikov --- src/mesa/drivers/dri/i965/Android.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/Android.mk b/src/mesa/drivers/dri/i965/Android.mk index 8c4a613bcf3..386ab1b8e9b 100644 --- a/src/mesa/drivers/dri/i965/Android.mk +++ b/src/mesa/drivers/dri/i965/Android.mk @@ -315,7 +315,7 @@ i965_oa_xml_FILES := $(addprefix $(LOCAL_PATH)/, \ $(intermediates)/brw_oa_metrics.c: $(LOCAL_PATH)/brw_oa.py $(i965_oa_xml_FILES) @echo "target Generated: $(PRIVATE_MODULE) <= $(notdir $(@))" @mkdir -p $(dir $@) - $(hide) $(MESA_PYTHON2) $< --code=$@ $(i965_oa_xml_FILES) --header=$@ $(i965_oa_xml_FILES) + $(hide) $(MESA_PYTHON2) $< --code=$@ $(i965_oa_xml_FILES) --header=$(word 2, $^) $(i965_oa_xml_FILES) $(intermediates)/brw_oa_metrics.h: $(intermediates)/brw_oa_metrics.c -- 2.16.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 105621] Build failure on GNOME Continuous
https://bugs.freedesktop.org/show_bug.cgi?id=105621 Mark Janes changed: What|Removed |Added CC||mark.a.ja...@intel.com --- Comment #3 from Mark Janes --- *** Bug 105628 has been marked as a duplicate of this bug. *** -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] aubinator_error_decode: Compare only the class_name of the ring.
Quoting Lionel Landwerlin (2018-03-20 16:39:18) > On 20/03/18 16:30, Chris Wilson wrote: > > Quoting Rafael Antognolli (2018-03-20 16:13:08) > >> ring_name is " + " (e.g. rcs0). So we need to > >> first compare the class name only, then get the instance id. > >> > >> Without this, INSTDONE is not being decoded. > >> > >> Signed-off-by: Rafael Antognolli > >> Cc: Chris Wilson > >> --- > >> src/intel/tools/aubinator_error_decode.c | 2 +- > >> 1 file changed, 1 insertion(+), 1 deletion(-) > >> > >> diff --git a/src/intel/tools/aubinator_error_decode.c > >> b/src/intel/tools/aubinator_error_decode.c > >> index 017be5bbc2b..db880d74a9e 100644 > >> --- a/src/intel/tools/aubinator_error_decode.c > >> +++ b/src/intel/tools/aubinator_error_decode.c > >> @@ -120,7 +120,7 @@ static int ring_name_to_class(const char *ring_name, > >> [VECS] = "vecs", > >> }; > >> for (size_t i = 0; i < ARRAY_SIZE(class_names); i++) { > >> - if (strcmp(ring_name, class_names[i])) > >> + if (strncmp(ring_name, class_names[i], strlen(class_names[i]))) > > Gah, I remember noticing this and completely forgot to send a patch. > > > > Reviewed-by: Chris Wilson > > > > Thanks, > > -Chris > > > > Just an idea, but maybe sending the addresses of the registers from the > kernel would help? I'm open to suggestions. And yeah, just a plain dump like registers: - 0x1200: { name: UGLY_BSPEC_NAME_IF_WE_CAN_BE_BOTHERED, value: 0xdeadbeef} - 0x1204: 0x0c0ffee will be on my wishlist. Maybe just call it error.yaml and phase out the old error in a decade. -Chris ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 5/8] intel: devinfo: add helper functions to fill fusing masks values
On 20/03/18 00:08, Kenneth Graunke wrote: On Wednesday, March 14, 2018 10:19:11 AM PDT Lionel Landwerlin wrote: There are a couple of ways we can get the fusing information from the kernel : - Through DRM_I915_GETPARAM with the SLICE_MASK/SUBSLICE_MASK parameters - Through the new DRM_IOCTL_I915_QUERY by requesting the DRM_I915_QUERY_TOPOLOGY_INFO The second method is more accurate and also gives us the EUs fusing masks. It's also a requirement for CNL as this platform has asymetric subslices and the first method SUBSLICE_MASK value is assumed uniform across slices. Signed-off-by: Lionel Landwerlin --- src/intel/dev/gen_device_info.c | 129 src/intel/dev/gen_device_info.h | 11 2 files changed, 140 insertions(+) diff --git a/src/intel/dev/gen_device_info.c b/src/intel/dev/gen_device_info.c index c1bdc997f2c..a8c9f7738b2 100644 --- a/src/intel/dev/gen_device_info.c +++ b/src/intel/dev/gen_device_info.c @@ -28,8 +28,11 @@ #include #include "gen_device_info.h" #include "compiler/shader_enums.h" +#include "util/bitscan.h" #include "util/macros.h" +#include + /** * Get the PCI ID for the device name. * @@ -913,6 +916,132 @@ fill_masks(struct gen_device_info *devinfo) } } +static void +reset_masks(struct gen_device_info *devinfo) +{ + devinfo->subslice_slice_stride = + devinfo->eu_subslice_stride = + devinfo->eu_slice_stride = 0; + + devinfo->num_slices = + devinfo->num_eu_per_subslice = 0; We tend to avoid chained assignments like this, can we just do: devinfo->subslice_slice_stride = 0; devinfo->eu_subslice_stride = 0; devinfo->eu_slice_stride = 0; devinfo->num_slices = 0; devinfo->num_eu_per_subslice = 0; With it already being on multiple lines, it's no more code, and easier to read IMHO. Sure. + memset(devinfo->num_subslices, 0, sizeof(devinfo->num_subslices)); + + memset(&devinfo->slice_masks, 0, sizeof(devinfo->slice_masks)); + memset(devinfo->subslice_masks, 0, sizeof(devinfo->subslice_masks)); + memset(devinfo->eu_masks, 0, sizeof(devinfo->eu_masks)); +} + +void +gen_device_info_update_from_masks(struct gen_device_info *devinfo, + uint32_t slice_mask, + uint32_t subslice_mask, + uint32_t n_eus) I wonder if it would be better to simply populate a drm_i915_query_topology_info structure from the masks, if we don't have the topology query uABI. Then, we could just have one codepath for filling out devinfo, with a "whoops, no kernel support" shim. I'm not sure whether it would be more or less code, but it might drop some of the duplicated complexity? Yeah, make sense. +{ + reset_masks(devinfo); + + assert((slice_mask & 0xff) == slice_mask); + + devinfo->slice_masks = slice_mask; + devinfo->num_slices = __builtin_popcount(devinfo->slice_masks); _mesa_bitcount() here and elsewhere. Only issue with this is that it pulls in the headers from what I consider to be the GL headers : src/mesa/main/imports.h + + uint32_t max_slices = util_last_bit(slice_mask); + uint32_t max_subslices = util_last_bit(subslice_mask); + devinfo->subslice_slice_stride = DIV_ROUND_UP(max_subslices, 8); + uint32_t n_subslices = 0; + for (int s = 0; s < util_last_bit(slice_mask); s++) { for (int s = 0; s < max_slices; s++) { Thanks! + if ((slice_mask & (1UL << s)) == 0) 1u << s Perhaps you like my topology-struct idea and will make a v3...if not... Will do :) Reviewed-by: Kenneth Graunke + continue; + + for (int b = 0; b < devinfo->subslice_slice_stride; b++) { + int subslice_offset = s * devinfo->subslice_slice_stride + b; + + devinfo->subslice_masks[subslice_offset] = +(subslice_mask >> (b * 8)) & 0xff; + devinfo->num_subslices[s] += +__builtin_popcount(devinfo->subslice_masks[subslice_offset]); + } + + n_subslices += devinfo->num_subslices[s]; + } + + /* We expect the total number of EUs to be uniformly distributed throughout +* the subslices. +*/ + assert((n_eus % n_subslices) == 0); + devinfo->num_eu_per_subslice = n_eus / n_subslices; + + devinfo->eu_subslice_stride = DIV_ROUND_UP(devinfo->num_eu_per_subslice, 8); + devinfo->eu_slice_stride = devinfo->eu_subslice_stride * max_subslices; + + for (int s = 0; s < max_slices; s++) { + if ((slice_mask & (1UL << s)) == 0) + continue; + + for (int ss = 0; ss < max_subslices; ss++) { + if ((subslice_mask & (1UL << ss)) == 0) +continue; + + for (int b = 0; b < devinfo->eu_subslice_stride; b++) { +int eus_offset = s * devinfo->eu_slice_stride + + ss * devinfo->eu_subslice_stride + b; + +devinfo->eu_masks[eus_offset] = + (((1UL << devinfo->num_eu_per_subslice) - 1) >> (b * 8)) & 0xff; +
[Mesa-dev] [PATCH kmscube] cube-tex: make use of modifiers
Fixes rendering issues with mode rgba on etnaviv. I have applied the same change for nv12 variants but they are not supported on etnaviv. Signed-off-by: Christian Gmeiner --- cube-tex.c | 32 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/cube-tex.c b/cube-tex.c index 9e38ae8..dba19ff 100644 --- a/cube-tex.c +++ b/cube-tex.c @@ -213,7 +213,7 @@ static const char *fragment_shader_source_2img = static const uint32_t texw = 512, texh = 512; -static int get_fd_rgba(uint32_t *pstride) +static int get_fd_rgba(uint32_t *pstride, uint64_t *modifier) { struct gbm_bo *bo; void *map_data = NULL; @@ -234,6 +234,7 @@ static int get_fd_rgba(uint32_t *pstride) gbm_bo_unmap(bo, map_data); fd = gbm_bo_get_fd(bo); + *modifier = gbm_bo_get_modifier(bo); /* we have the fd now, no longer need the bo: */ gbm_bo_destroy(bo); @@ -243,7 +244,7 @@ static int get_fd_rgba(uint32_t *pstride) return fd; } -static int get_fd_y(uint32_t *pstride) +static int get_fd_y(uint32_t *pstride, uint64_t *modifier) { struct gbm_bo *bo; void *map_data = NULL; @@ -264,6 +265,7 @@ static int get_fd_y(uint32_t *pstride) gbm_bo_unmap(bo, map_data); fd = gbm_bo_get_fd(bo); + *modifier = gbm_bo_get_modifier(bo); /* we have the fd now, no longer need the bo: */ gbm_bo_destroy(bo); @@ -273,7 +275,7 @@ static int get_fd_y(uint32_t *pstride) return fd; } -static int get_fd_uv(uint32_t *pstride) +static int get_fd_uv(uint32_t *pstride, uint64_t *modifier) { struct gbm_bo *bo; void *map_data = NULL; @@ -294,6 +296,7 @@ static int get_fd_uv(uint32_t *pstride) gbm_bo_unmap(bo, map_data); fd = gbm_bo_get_fd(bo); + *modifier = gbm_bo_get_modifier(bo); /* we have the fd now, no longer need the bo: */ gbm_bo_destroy(bo); @@ -306,7 +309,8 @@ static int get_fd_uv(uint32_t *pstride) static int init_tex_rgba(void) { uint32_t stride; - int fd = get_fd_rgba(&stride); + uint64_t modifier; + int fd = get_fd_rgba(&stride, &modifier); const EGLint attr[] = { EGL_WIDTH, texw, EGL_HEIGHT, texh, @@ -314,6 +318,8 @@ static int init_tex_rgba(void) EGL_DMA_BUF_PLANE0_FD_EXT, fd, EGL_DMA_BUF_PLANE0_OFFSET_EXT, 0, EGL_DMA_BUF_PLANE0_PITCH_EXT, stride, + EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT, modifier & 0x, + EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT, modifier >> 32, EGL_NONE }; EGLImage img; @@ -339,8 +345,9 @@ static int init_tex_rgba(void) static int init_tex_nv12_2img(void) { uint32_t stride_y, stride_uv; - int fd_y = get_fd_y(&stride_y); - int fd_uv = get_fd_uv(&stride_uv); + uint64_t modifier_y, modifier_uv; + int fd_y = get_fd_y(&stride_y, &modifier_y); + int fd_uv = get_fd_uv(&stride_uv, &modifier_uv); const EGLint attr_y[] = { EGL_WIDTH, texw, EGL_HEIGHT, texh, @@ -348,6 +355,8 @@ static int init_tex_nv12_2img(void) EGL_DMA_BUF_PLANE0_FD_EXT, fd_y, EGL_DMA_BUF_PLANE0_OFFSET_EXT, 0, EGL_DMA_BUF_PLANE0_PITCH_EXT, stride_y, + EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT, modifier_y & 0x, + EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT, modifier_y >> 32, EGL_NONE }; const EGLint attr_uv[] = { @@ -357,6 +366,8 @@ static int init_tex_nv12_2img(void) EGL_DMA_BUF_PLANE0_FD_EXT, fd_uv, EGL_DMA_BUF_PLANE0_OFFSET_EXT, 0, EGL_DMA_BUF_PLANE0_PITCH_EXT, stride_uv, + EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT, modifier_uv & 0x, + EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT, modifier_uv >> 32, EGL_NONE }; EGLImage img_y, img_uv; @@ -397,8 +408,9 @@ static int init_tex_nv12_2img(void) static int init_tex_nv12_1img(void) { uint32_t stride_y, stride_uv; - int fd_y = get_fd_y(&stride_y); - int fd_uv = get_fd_uv(&stride_uv); + uint64_t modifier_y, modifier_uv; + int fd_y = get_fd_y(&stride_y, &modifier_y); + int fd_uv = get_fd_uv(&stride_uv, &modifier_uv); const EGLint attr[] = { EGL_WIDTH, texw, EGL_HEIGHT, texh, @@ -406,9 +418,13 @@ static int init_tex_nv12_1img(void) EGL_DMA_BUF_PLANE0_FD_EXT, fd_y, EGL_DMA_BUF_PLANE0_OFFSET_EXT, 0, EGL_DMA_BUF_PLANE0_PITCH_EXT, stride_y, + EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT, modifier_y & 0x, + EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT, modifier_y >> 32, EGL_DMA_BUF_PLANE1_FD_EXT, fd_uv, EGL_DMA_BUF_PLANE1_OFFSET_EXT, 0, EGL_DMA_BUF_PLANE1_PITCH_EXT, stride_uv, +
Re: [Mesa-dev] [PATCH] radeonsi: correct si_vgt_param_key on big endian machines
Am Dienstag, den 20.03.2018, 15:33 +0100 schrieb Nicolai Hähnle: > Nice, did you actually get it to work entirely on a big endian > machine? > > Bit fields aren't super portable, [...] Indeed, the order of the bits in a bit field is compiler implementation dependent. To make sure that changing the compiler doesn't change the behaviour of the code I'd suggest that instead of using a bit field the index should be created by explicitly shifting the bits into the right positions. Best, Gert > However, I > think we should use the PIPE_ARCH_LITTLE_ENDIAN define from > u_endian.h > > Cheers, > Nicolai > > On 20.03.2018 15:21, Bas Vermeulen wrote: > > Using mesa OpenCL failed on a big endian PowerPC machine because > > si_vgt_param_key is using bitfields and a 32 bit int for an > > index into an array. > > > > Fix si_vgt_param_key to work correctly on both little endian > > and big endian machines. > > > > Signed-off-by: Bas Vermeulen > > --- > > src/gallium/drivers/radeonsi/si_pipe.h | 13 + > > 1 file changed, 13 insertions(+) > > > > diff --git a/src/gallium/drivers/radeonsi/si_pipe.h > > b/src/gallium/drivers/radeonsi/si_pipe.h > > index 2053dcb9fc..32dbdf6e2c 100644 > > --- a/src/gallium/drivers/radeonsi/si_pipe.h > > +++ b/src/gallium/drivers/radeonsi/si_pipe.h > > @@ -385,6 +385,7 @@ struct si_shader_ctx_state { > >*/ > > union si_vgt_param_key { > > struct { > > +#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) > > unsigned prim:4; > > unsigned uses_instancing:1; > > unsigned > > multi_instances_smaller_than_primgroup:1; > > @@ -395,6 +396,18 @@ union si_vgt_param_key { > > unsigned tess_uses_prim_id:1; > > unsigned uses_gs:1; > > unsigned _pad:32 - SI_NUM_VGT_PARAM_KEY_BITS; > > +#else /* __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ */ > > + unsigned _pad:32 - SI_NUM_VGT_PARAM_KEY_BITS; > > + unsigned uses_gs:1; > > + unsigned tess_uses_prim_id:1; > > + unsigned uses_tess:1; > > + unsigned line_stipple_enabled:1; > > + unsigned count_from_stream_output:1; > > + unsigned primitive_restart:1; > > + unsigned multi_instances_smaller_than_primgroup:1; > > + unsigned uses_instancing:1; > > + unsigned prim:4; > > +#endif > > } u; > > uint32_t index; > > }; > > > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: annotate brw_oa.py's --header and --code as required
On 20 March 2018 at 17:06, Lionel Landwerlin wrote: > On 20/03/18 16:29, Emil Velikov wrote: >> >> From: Emil Velikov >> >> As of earlier commit, the --header was made a hard requirement when >> using --code. >> >> Hence - annotate both as required and drop a few no longer needed >> checks. >> >> Fixes: 035cc7a12dc0 ("i965: perf: reduce i965 binary size") >> Cc: Lionel Landwerlin >> Signed-off-by: Emil Velikov >> --- >> Tad easier to read with git show -w >> --- >> src/mesa/drivers/dri/i965/brw_oa.py | 37 >> +++-- >> 1 file changed, 15 insertions(+), 22 deletions(-) >> >> diff --git a/src/mesa/drivers/dri/i965/brw_oa.py >> b/src/mesa/drivers/dri/i965/brw_oa.py >> index 63db28bba97..4719b4c01c8 100644 >> --- a/src/mesa/drivers/dri/i965/brw_oa.py >> +++ b/src/mesa/drivers/dri/i965/brw_oa.py >> @@ -32,19 +32,16 @@ c_file = None >> _c_indent = 0 >> def c(*args): >> -if c_file: >> -code = ' '.join(map(str,args)) >> -for line in code.splitlines(): >> -text = ''.rjust(_c_indent) + line >> -c_file.write(text.rstrip() + "\n") >> +code = ' '.join(map(str,args)) >> +for line in code.splitlines(): >> +text = ''.rjust(_c_indent) + line >> +c_file.write(text.rstrip() + "\n") >> # indented, but no trailing newline... >> def c_line_start(code): >> -if c_file: >> -c_file.write(''.rjust(_c_indent) + code) >> +c_file.write(''.rjust(_c_indent) + code) >> def c_raw(code): >> -if c_file: >> -c_file.write(code) >> +c_file.write(code) >> def c_indent(n): >> global _c_indent >> @@ -57,11 +54,10 @@ header_file = None >> _h_indent = 0 >> def h(*args): >> -if header_file: >> -code = ' '.join(map(str,args)) >> -for line in code.splitlines(): >> -text = ''.rjust(_h_indent) + line >> -header_file.write(text.rstrip() + "\n") >> +code = ' '.join(map(str,args)) >> +for line in code.splitlines(): >> +text = ''.rjust(_h_indent) + line >> +header_file.write(text.rstrip() + "\n") >> def h_indent(n): >> global _c_indent >> @@ -556,17 +552,14 @@ def main(): >> global header_file >> parser = argparse.ArgumentParser() >> -parser.add_argument("--header", help="Header file to write") >> -parser.add_argument("--code", help="C file to write") >> +parser.add_argument("--header", help="Header file to write", >> required=True) >> +parser.add_argument("--code", help="C file to write", required=True) >> parser.add_argument("xml_files", nargs='+', help="List of xml >> metrics files to process") >> args = parser.parse_args() >> -if args.header: >> -header_file = open(args.header, 'w') >> - >> -if args.code: >> -c_file = open(args.code, 'w') >> +header_file = open(args.header, 'w') >> +c_file = open(args.code, 'w') >> gens = [] >> for xml_file in args.xml_files: >> @@ -617,7 +610,7 @@ def main(): >> """)) >> -c("#include \"" + os.path.basename(args.header) + "\"") >> +c("#include \"" + os.path.basename(header_file) + "\"") > > > basename() on a file object doesn't work. > With that fixed : > > Reviewed-by: Lionel Landwerlin > Right. Fixed and pushed. Thank you Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: annotate brw_oa.py's --header and --code as required
On 20/03/18 16:29, Emil Velikov wrote: From: Emil Velikov As of earlier commit, the --header was made a hard requirement when using --code. Hence - annotate both as required and drop a few no longer needed checks. Fixes: 035cc7a12dc0 ("i965: perf: reduce i965 binary size") Cc: Lionel Landwerlin Signed-off-by: Emil Velikov --- Tad easier to read with git show -w --- src/mesa/drivers/dri/i965/brw_oa.py | 37 +++-- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_oa.py b/src/mesa/drivers/dri/i965/brw_oa.py index 63db28bba97..4719b4c01c8 100644 --- a/src/mesa/drivers/dri/i965/brw_oa.py +++ b/src/mesa/drivers/dri/i965/brw_oa.py @@ -32,19 +32,16 @@ c_file = None _c_indent = 0 def c(*args): -if c_file: -code = ' '.join(map(str,args)) -for line in code.splitlines(): -text = ''.rjust(_c_indent) + line -c_file.write(text.rstrip() + "\n") +code = ' '.join(map(str,args)) +for line in code.splitlines(): +text = ''.rjust(_c_indent) + line +c_file.write(text.rstrip() + "\n") # indented, but no trailing newline... def c_line_start(code): -if c_file: -c_file.write(''.rjust(_c_indent) + code) +c_file.write(''.rjust(_c_indent) + code) def c_raw(code): -if c_file: -c_file.write(code) +c_file.write(code) def c_indent(n): global _c_indent @@ -57,11 +54,10 @@ header_file = None _h_indent = 0 def h(*args): -if header_file: -code = ' '.join(map(str,args)) -for line in code.splitlines(): -text = ''.rjust(_h_indent) + line -header_file.write(text.rstrip() + "\n") +code = ' '.join(map(str,args)) +for line in code.splitlines(): +text = ''.rjust(_h_indent) + line +header_file.write(text.rstrip() + "\n") def h_indent(n): global _c_indent @@ -556,17 +552,14 @@ def main(): global header_file parser = argparse.ArgumentParser() -parser.add_argument("--header", help="Header file to write") -parser.add_argument("--code", help="C file to write") +parser.add_argument("--header", help="Header file to write", required=True) +parser.add_argument("--code", help="C file to write", required=True) parser.add_argument("xml_files", nargs='+', help="List of xml metrics files to process") args = parser.parse_args() -if args.header: -header_file = open(args.header, 'w') - -if args.code: -c_file = open(args.code, 'w') +header_file = open(args.header, 'w') +c_file = open(args.code, 'w') gens = [] for xml_file in args.xml_files: @@ -617,7 +610,7 @@ def main(): """)) -c("#include \"" + os.path.basename(args.header) + "\"") +c("#include \"" + os.path.basename(header_file) + "\"") basename() on a file object doesn't work. With that fixed : Reviewed-by: Lionel Landwerlin Thanks for this cleanup! c(textwrap.dedent("""\ #include "brw_context.h" ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: annotate brw_oa.py's --header and --code as required
Quoting Emil Velikov (2018-03-20 09:29:00) [snip] > gens = [] > for xml_file in args.xml_files: > @@ -617,7 +610,7 @@ def main(): > > """)) > > -c("#include \"" + os.path.basename(args.header) + "\"") > +c("#include \"" + os.path.basename(header_file) + "\"") You're calling os.path.basename on a file object, which isn't valid. This should still be args.header. > > c(textwrap.dedent("""\ > #include "brw_context.h" > -- > 2.16.0 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev signature.asc Description: signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: fix autotools/android build
On 20/03/18 16:23, Emil Velikov wrote: On 20 March 2018 at 14:59, Lionel Landwerlin wrote: Autotools/android builds generate the header & code files in 2 steps, but the code generation requires the name of the header file to include it. This change generates both files in one command. Fixes: 035cc7a12dc ("i965: perf: reduce i965 binary size") Signed-off-by: Lionel Landwerlin --- Hmm I did not see that the reworked script requires the header, since the required=True is missing. Will send a patch for that in a moment. # .c and .h files in one go so we don't hit problems with parallel # make and multiple invocations of the same script trying to write # to the same files. Please drop the comment block. With that the patch is Reviewed-by: Emil Velikov -Emil Thanks, pushed. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] dri3: Fix typo in version check
On 20 March 2018 at 16:24, Dylan Baker wrote: > Quoting Daniel Stone (2018-03-20 09:17:21) >> The have-new-DRI3 codepaths would never actually properly trigger, since >> there was a typo in configure.ac which broke the version check. This >> went unnoticed but for an error in config.log if you looked closely >> enough. > > Reviewed-by: Dylan Baker Thanks Dylan, and thanks Dave for reviewing the previous one! Both pushed now. Cheers, Daniel ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] aubinator_error_decode: Compare only the class_name of the ring.
On 20/03/18 16:30, Chris Wilson wrote: Quoting Rafael Antognolli (2018-03-20 16:13:08) ring_name is " + " (e.g. rcs0). So we need to first compare the class name only, then get the instance id. Without this, INSTDONE is not being decoded. Signed-off-by: Rafael Antognolli Cc: Chris Wilson --- src/intel/tools/aubinator_error_decode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/intel/tools/aubinator_error_decode.c b/src/intel/tools/aubinator_error_decode.c index 017be5bbc2b..db880d74a9e 100644 --- a/src/intel/tools/aubinator_error_decode.c +++ b/src/intel/tools/aubinator_error_decode.c @@ -120,7 +120,7 @@ static int ring_name_to_class(const char *ring_name, [VECS] = "vecs", }; for (size_t i = 0; i < ARRAY_SIZE(class_names); i++) { - if (strcmp(ring_name, class_names[i])) + if (strncmp(ring_name, class_names[i], strlen(class_names[i]))) Gah, I remember noticing this and completely forgot to send a patch. Reviewed-by: Chris Wilson Thanks, -Chris Just an idea, but maybe sending the addresses of the registers from the kernel would help? ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] i965: annotate brw_oa.py's --header and --code as required
From: Emil Velikov As of earlier commit, the --header was made a hard requirement when using --code. Hence - annotate both as required and drop a few no longer needed checks. Fixes: 035cc7a12dc0 ("i965: perf: reduce i965 binary size") Cc: Lionel Landwerlin Signed-off-by: Emil Velikov --- Tad easier to read with git show -w --- src/mesa/drivers/dri/i965/brw_oa.py | 37 +++-- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_oa.py b/src/mesa/drivers/dri/i965/brw_oa.py index 63db28bba97..4719b4c01c8 100644 --- a/src/mesa/drivers/dri/i965/brw_oa.py +++ b/src/mesa/drivers/dri/i965/brw_oa.py @@ -32,19 +32,16 @@ c_file = None _c_indent = 0 def c(*args): -if c_file: -code = ' '.join(map(str,args)) -for line in code.splitlines(): -text = ''.rjust(_c_indent) + line -c_file.write(text.rstrip() + "\n") +code = ' '.join(map(str,args)) +for line in code.splitlines(): +text = ''.rjust(_c_indent) + line +c_file.write(text.rstrip() + "\n") # indented, but no trailing newline... def c_line_start(code): -if c_file: -c_file.write(''.rjust(_c_indent) + code) +c_file.write(''.rjust(_c_indent) + code) def c_raw(code): -if c_file: -c_file.write(code) +c_file.write(code) def c_indent(n): global _c_indent @@ -57,11 +54,10 @@ header_file = None _h_indent = 0 def h(*args): -if header_file: -code = ' '.join(map(str,args)) -for line in code.splitlines(): -text = ''.rjust(_h_indent) + line -header_file.write(text.rstrip() + "\n") +code = ' '.join(map(str,args)) +for line in code.splitlines(): +text = ''.rjust(_h_indent) + line +header_file.write(text.rstrip() + "\n") def h_indent(n): global _c_indent @@ -556,17 +552,14 @@ def main(): global header_file parser = argparse.ArgumentParser() -parser.add_argument("--header", help="Header file to write") -parser.add_argument("--code", help="C file to write") +parser.add_argument("--header", help="Header file to write", required=True) +parser.add_argument("--code", help="C file to write", required=True) parser.add_argument("xml_files", nargs='+', help="List of xml metrics files to process") args = parser.parse_args() -if args.header: -header_file = open(args.header, 'w') - -if args.code: -c_file = open(args.code, 'w') +header_file = open(args.header, 'w') +c_file = open(args.code, 'w') gens = [] for xml_file in args.xml_files: @@ -617,7 +610,7 @@ def main(): """)) -c("#include \"" + os.path.basename(args.header) + "\"") +c("#include \"" + os.path.basename(header_file) + "\"") c(textwrap.dedent("""\ #include "brw_context.h" -- 2.16.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] aubinator_error_decode: Compare only the class_name of the ring.
Quoting Rafael Antognolli (2018-03-20 16:13:08) > ring_name is " + " (e.g. rcs0). So we need to > first compare the class name only, then get the instance id. > > Without this, INSTDONE is not being decoded. > > Signed-off-by: Rafael Antognolli > Cc: Chris Wilson > --- > src/intel/tools/aubinator_error_decode.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/intel/tools/aubinator_error_decode.c > b/src/intel/tools/aubinator_error_decode.c > index 017be5bbc2b..db880d74a9e 100644 > --- a/src/intel/tools/aubinator_error_decode.c > +++ b/src/intel/tools/aubinator_error_decode.c > @@ -120,7 +120,7 @@ static int ring_name_to_class(const char *ring_name, >[VECS] = "vecs", > }; > for (size_t i = 0; i < ARRAY_SIZE(class_names); i++) { > - if (strcmp(ring_name, class_names[i])) > + if (strncmp(ring_name, class_names[i], strlen(class_names[i]))) Gah, I remember noticing this and completely forgot to send a patch. Reviewed-by: Chris Wilson Thanks, -Chris ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v6 1/2] gallium/winsys/kms: Fix possible leak in map/unmap.
On Wed, Mar 21, 2018 at 12:58 AM, Emil Velikov wrote: > On 20 March 2018 at 14:24, Tomasz Figa wrote: >> On Tue, Mar 20, 2018 at 10:44 PM, Emil Velikov >> wrote: >>> On 20 March 2018 at 04:40, Tomasz Figa wrote: On Tue, Mar 20, 2018 at 2:55 AM, Emil Velikov wrote: > Hi Lepton, > > On 19 March 2018 at 17:33, Lepton Wu wrote: >> If user calls map twice for kms_sw_displaytarget, the first mapped >> buffer could get leaked. Instead of calling mmap every time, just >> reuse previous mapping. Since user could map same displaytarget with >> different flags, we have to keep two different pointers, one for rw >> mapping and one for ro mapping. Also introduce reference count for >> mapped buffer so we can unmap them at right time. >> >> Reviewed-by: Emil Velikov >> Reviewed-by: Tomasz Figa >> Signed-off-by: Lepton Wu > > Nit: normally it's a good idea to have brief revision log when sending > new version: > v2: > - split from larger patch (Emil) > v3: > - remove munmap w/a from dt_destory(Emil) > ... > >> @@ -170,6 +172,14 @@ kms_sw_displaytarget_destroy(struct sw_winsys *ws, >> if (kms_sw_dt->ref_count > 0) >>return; >> >> + if (kms_sw_dt->map_count > 0) { >> + DEBUG_PRINT("KMS-DEBUG: fix leaked map buffer %u\n", >> kms_sw_dt->handle); >> + munmap(kms_sw_dt->mapped, kms_sw_dt->size); >> + kms_sw_dt->mapped = NULL; >> + munmap(kms_sw_dt->ro_mapped, kms_sw_dt->size); >> + kms_sw_dt->ro_mapped = NULL; >> + } >> + > I could swear this workaround was missing in earlier revisions. I > don't see anything in Tomasz' reply that suggesting we should bring it > back? > AFAICT the added refcounting makes no difference - the driver isn't > cleaning up after itself. > > Am I missing something? I think this is actually consistent with what other winsys implementations do. They free the map (or shadow malloc/shm buffer) in _destroy() callback, so we should probably do the same. >>> Looking at the SW winsys - none of them seem to unmap at destroy time. >>> Perhaps you meant that the HW ones do? >> >> dri: >> https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/winsys/sw/dri/dri_sw_winsys.c#n128 >> >> gdi: >> https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/winsys/sw/gdi/gdi_sw_winsys.c#n116 >> >> hgl: >> https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/winsys/sw/hgl/hgl_sw_winsys.c#n152 >> >> xlib: >> https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/winsys/sw/xlib/xlib_sw_winsys.c#n260 >> https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/winsys/sw/xlib/xlib_sw_winsys.c#n271 >> >> The don't do real mapping - they all work on locally allocated memory. >> However, after destroy, no resources are leaked and the pointers >> returned from _map() are not valid anymore. >> > As mentioned before - zero objections against changing that, but keep > it separate patch. > Pretty please? SGTM. Best regards, Tomasz ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] dri3: Fix typo in version check
Quoting Daniel Stone (2018-03-20 09:17:21) > The have-new-DRI3 codepaths would never actually properly trigger, since > there was a typo in configure.ac which broke the version check. This > went unnoticed but for an error in config.log if you looked closely > enough. > > Signed-off-by: Daniel Stone > Reported-by: Lukas F. Hartmann > Fixes: 7aeef2d4efdc ("dri3: allow building against older xcb (v3)") > Cc: Dave Airlie > --- > configure.ac | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/configure.ac b/configure.ac > index d1c8bb82dae..5074275211e 100644 > --- a/configure.ac > +++ b/configure.ac > @@ -1852,7 +1852,7 @@ if test x"$enable_dri3" = xyes; then > > dri3_modules="x11-xcb xcb >= $XCB_REQUIRED xcb-dri3 xcb-xfixes > xcb-present xcb-sync xshmfence >= $XSHMFENCE_REQUIRED" > PKG_CHECK_MODULES([XCB_DRI3], [$dri3_modules]) > -dri3_modifier_modules="xcb-dri3 >= $XCBDRI3_MODIFIERS_REQUIRED > xcb-present >= $XCBPRESENT_MODIFIERS_REQUIRES" > +dri3_modifier_modules="xcb-dri3 >= $XCBDRI3_MODIFIERS_REQUIRED > xcb-present >= $XCBPRESENT_MODIFIERS_REQUIRED" > PKG_CHECK_MODULES([XCB_DRI3_MODIFIERS], [$dri3_modifier_modules], > [have_dri3_modifiers=yes], [have_dri3_modifiers=no]) > > if test "x$have_dri3_modifiers" == xyes; then > -- > 2.16.2 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev Reviewed-by: Dylan Baker signature.asc Description: signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: fix autotools/android build
On 20 March 2018 at 14:59, Lionel Landwerlin wrote: > Autotools/android builds generate the header & code files in 2 steps, > but the code generation requires the name of the header file to > include it. > > This change generates both files in one command. > > Fixes: 035cc7a12dc ("i965: perf: reduce i965 binary size") > Signed-off-by: Lionel Landwerlin > --- Hmm I did not see that the reworked script requires the header, since the required=True is missing. Will send a patch for that in a moment. > # .c and .h files in one go so we don't hit problems with parallel > # make and multiple invocations of the same script trying to write > # to the same files. Please drop the comment block. With that the patch is Reviewed-by: Emil Velikov -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] meson: Don't build svga on ARM/AArch64
On 20 March 2018 at 16:16, Dylan Baker wrote: > Quoting Daniel Stone (2018-03-20 01:54:25) >> VMware has no (published) support for Arm-architecture guests. Pushed now with review and the new suggested title - thanks both for review! Cheers, Daniel ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] dri3: Fix typo in version check
The have-new-DRI3 codepaths would never actually properly trigger, since there was a typo in configure.ac which broke the version check. This went unnoticed but for an error in config.log if you looked closely enough. Signed-off-by: Daniel Stone Reported-by: Lukas F. Hartmann Fixes: 7aeef2d4efdc ("dri3: allow building against older xcb (v3)") Cc: Dave Airlie --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index d1c8bb82dae..5074275211e 100644 --- a/configure.ac +++ b/configure.ac @@ -1852,7 +1852,7 @@ if test x"$enable_dri3" = xyes; then dri3_modules="x11-xcb xcb >= $XCB_REQUIRED xcb-dri3 xcb-xfixes xcb-present xcb-sync xshmfence >= $XSHMFENCE_REQUIRED" PKG_CHECK_MODULES([XCB_DRI3], [$dri3_modules]) -dri3_modifier_modules="xcb-dri3 >= $XCBDRI3_MODIFIERS_REQUIRED xcb-present >= $XCBPRESENT_MODIFIERS_REQUIRES" +dri3_modifier_modules="xcb-dri3 >= $XCBDRI3_MODIFIERS_REQUIRED xcb-present >= $XCBPRESENT_MODIFIERS_REQUIRED" PKG_CHECK_MODULES([XCB_DRI3_MODIFIERS], [$dri3_modifier_modules], [have_dri3_modifiers=yes], [have_dri3_modifiers=no]) if test "x$have_dri3_modifiers" == xyes; then -- 2.16.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] st/mesa: add compiler/nir/ prefix for nir includes
On 20 March 2018 at 16:15, Brian Paul wrote: > On 03/20/2018 05:41 AM, Emil Velikov wrote: >> >> From: Emil Velikov >> >> Stay consistent with the rest of the codebase, effectively fixing the >> autotools build. >> >> Bugzilla: >> https://urldefense.proofpoint.com/v2/url?u=https-3A__bugs.freedesktop.org_show-5Fbug.cgi-3Fid-3D105621&d=DwIGaQ&c=uilaK90D4TOVoH58JNXRgQ&r=Ie7_encNUsqxbSRbqbNgofw0ITcfE8JKfaUjIQhncGA&m=Vk4HmwKubcUs_fwVK_L6w5Tf9LvLGGY2l5z3nKDmFGQ&s=2ocWWSR1k_iuISMLUsdeE3-0YFlSQA0PPBef_eKW4Lw&e= >> Fixes: ffa4bbe4665 ("st/nir/radeonsi: move nir_lower_uniforms_to_ubo() >> to the state tracker") >> Cc: Timothy Arceri >> Signed-off-by: Emil Velikov >> --- >> src/mesa/state_tracker/st_nir_lower_uniforms_to_ubo.c | 4 ++-- >> 1 file changed, 2 insertions(+), 2 deletions(-) >> >> diff --git a/src/mesa/state_tracker/st_nir_lower_uniforms_to_ubo.c >> b/src/mesa/state_tracker/st_nir_lower_uniforms_to_ubo.c >> index e2a477ecc74..b2f8f833bbb 100644 >> --- a/src/mesa/state_tracker/st_nir_lower_uniforms_to_ubo.c >> +++ b/src/mesa/state_tracker/st_nir_lower_uniforms_to_ubo.c >> @@ -29,8 +29,8 @@ >>* point by 1. >>*/ >> -#include "nir.h" >> -#include "nir_builder.h" >> +#include "compiler/nir/nir.h" >> +#include "compiler/nir/nir_builder.h" >> #include "st_nir.h" >> #include "program/prog_parameter.h" >> > > > Reviewed-by: Brian Paul > > Go ahead and push your patch. I didn't know there was a bugzilla report for > it. > Done - thank you! -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] meson: Don't build svga on ARM/AArch64
Quoting Daniel Stone (2018-03-20 01:54:25) > VMware has no (published) support for Arm-architecture guests. > > Signed-off-by: Daniel Stone > Cc: Dylan Baker > --- > meson.build | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/meson.build b/meson.build > index 88e90fe8119..24cad58c61e 100644 > --- a/meson.build > +++ b/meson.build > @@ -151,7 +151,7 @@ if _drivers == 'auto' > if ['x86', 'x86_64'].contains(host_machine.cpu_family()) >_drivers = 'r300,r600,radeonsi,nouveau,virgl,svga,swrast' > elif ['arm', 'aarch64'].contains(host_machine.cpu_family()) > - _drivers = > 'pl111,vc4,vc5,freedreno,etnaviv,imx,nouveau,tegra,virgl,svga,swrast' > + _drivers = > 'pl111,vc4,vc5,freedreno,etnaviv,imx,nouveau,tegra,virgl,swrast' > else >error('Unknown architecture. Please pass -Dgallium-drivers to set > driver options. Patches gladly accepted to fix this.') > endif > -- > 2.16.2 > for the series: Reviewed-by: Dylan Baker signature.asc Description: signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] st/mesa: add compiler/nir/ prefix for nir includes
On 03/20/2018 05:41 AM, Emil Velikov wrote: From: Emil Velikov Stay consistent with the rest of the codebase, effectively fixing the autotools build. Bugzilla: https://urldefense.proofpoint.com/v2/url?u=https-3A__bugs.freedesktop.org_show-5Fbug.cgi-3Fid-3D105621&d=DwIGaQ&c=uilaK90D4TOVoH58JNXRgQ&r=Ie7_encNUsqxbSRbqbNgofw0ITcfE8JKfaUjIQhncGA&m=Vk4HmwKubcUs_fwVK_L6w5Tf9LvLGGY2l5z3nKDmFGQ&s=2ocWWSR1k_iuISMLUsdeE3-0YFlSQA0PPBef_eKW4Lw&e= Fixes: ffa4bbe4665 ("st/nir/radeonsi: move nir_lower_uniforms_to_ubo() to the state tracker") Cc: Timothy Arceri Signed-off-by: Emil Velikov --- src/mesa/state_tracker/st_nir_lower_uniforms_to_ubo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mesa/state_tracker/st_nir_lower_uniforms_to_ubo.c b/src/mesa/state_tracker/st_nir_lower_uniforms_to_ubo.c index e2a477ecc74..b2f8f833bbb 100644 --- a/src/mesa/state_tracker/st_nir_lower_uniforms_to_ubo.c +++ b/src/mesa/state_tracker/st_nir_lower_uniforms_to_ubo.c @@ -29,8 +29,8 @@ * point by 1. */ -#include "nir.h" -#include "nir_builder.h" +#include "compiler/nir/nir.h" +#include "compiler/nir/nir_builder.h" #include "st_nir.h" #include "program/prog_parameter.h" Reviewed-by: Brian Paul Go ahead and push your patch. I didn't know there was a bugzilla report for it. -Brian ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] aubinator_error_decode: Compare only the class_name of the ring.
ring_name is " + " (e.g. rcs0). So we need to first compare the class name only, then get the instance id. Without this, INSTDONE is not being decoded. Signed-off-by: Rafael Antognolli Cc: Chris Wilson --- src/intel/tools/aubinator_error_decode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/intel/tools/aubinator_error_decode.c b/src/intel/tools/aubinator_error_decode.c index 017be5bbc2b..db880d74a9e 100644 --- a/src/intel/tools/aubinator_error_decode.c +++ b/src/intel/tools/aubinator_error_decode.c @@ -120,7 +120,7 @@ static int ring_name_to_class(const char *ring_name, [VECS] = "vecs", }; for (size_t i = 0; i < ARRAY_SIZE(class_names); i++) { - if (strcmp(ring_name, class_names[i])) + if (strncmp(ring_name, class_names[i], strlen(class_names[i]))) continue; *class = i; -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] st/mesa: fix nir.h, nir_builder.h #includes to fix build
On 20 March 2018 at 15:51, Brian Paul wrote: > Use compiler/nir/nir.h as we do in other places in the state tracker. > I'm not sure why this just started to fail. The #includes have been > there for a while. I've sent identical patch (+fixes/cc tags) a some hours ago [1]. Feel free to push either one: Reviewed-by: Emil Velikov -Emil [1] https://patchwork.freedesktop.org/patch/211459/ ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] radeonsi: correct si_vgt_param_key on big endian machines
Using mesa OpenCL failed on a big endian PowerPC machine because si_vgt_param_key is using bitfields and a 32 bit int for an index into an array. Fix si_vgt_param_key to work correctly on both little endian and big endian machines. Signed-off-by: Bas Vermeulen --- src/gallium/drivers/radeonsi/si_pipe.h | 13 + 1 file changed, 13 insertions(+) diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 2053dcb9fc..32dbdf6e2c 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -385,6 +385,7 @@ struct si_shader_ctx_state { */ union si_vgt_param_key { struct { +#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) unsigned prim:4; unsigned uses_instancing:1; unsigned multi_instances_smaller_than_primgroup:1; @@ -395,6 +396,18 @@ union si_vgt_param_key { unsigned tess_uses_prim_id:1; unsigned uses_gs:1; unsigned _pad:32 - SI_NUM_VGT_PARAM_KEY_BITS; +#else /* __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ */ + unsigned _pad:32 - SI_NUM_VGT_PARAM_KEY_BITS; + unsigned uses_gs:1; + unsigned tess_uses_prim_id:1; + unsigned uses_tess:1; + unsigned line_stipple_enabled:1; + unsigned count_from_stream_output:1; + unsigned primitive_restart:1; + unsigned multi_instances_smaller_than_primgroup:1; + unsigned uses_instancing:1; + unsigned prim:4; +#endif } u; uint32_t index; }; -- 2.14.1 -- This message has been scanned for viruses and dangerous content by MailScanner, and is believed to be clean. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] radeonsi: correct si_vgt_param_key on big endian machines
I'm able to call clinfo without things crashing. Without this fix, clinfo results in a signal 11 because key.index is byte swapped. With it, I get the information I would expect. I'm working to test the OpenCL currently. I'll update the patch to use PIPE_ARCH_LITTLE_ENDIAN instead of my own #if. Bas Vermeulen On Tue, Mar 20, 2018 at 3:33 PM, Nicolai Hähnle wrote: > Nice, did you actually get it to work entirely on a big endian machine? > > Bit fields aren't super portable, but this looks good enough. However, I > think we should use the PIPE_ARCH_LITTLE_ENDIAN define from u_endian.h > > Cheers, > Nicolai > > > On 20.03.2018 15:21, Bas Vermeulen wrote: > >> Using mesa OpenCL failed on a big endian PowerPC machine because >> si_vgt_param_key is using bitfields and a 32 bit int for an >> index into an array. >> >> Fix si_vgt_param_key to work correctly on both little endian >> and big endian machines. >> >> Signed-off-by: Bas Vermeulen >> --- >> src/gallium/drivers/radeonsi/si_pipe.h | 13 + >> 1 file changed, 13 insertions(+) >> >> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h >> b/src/gallium/drivers/radeonsi/si_pipe.h >> index 2053dcb9fc..32dbdf6e2c 100644 >> --- a/src/gallium/drivers/radeonsi/si_pipe.h >> +++ b/src/gallium/drivers/radeonsi/si_pipe.h >> @@ -385,6 +385,7 @@ struct si_shader_ctx_state { >>*/ >> union si_vgt_param_key { >> struct { >> +#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) >> unsigned prim:4; >> unsigned uses_instancing:1; >> unsigned multi_instances_smaller_than_primgroup:1; >> @@ -395,6 +396,18 @@ union si_vgt_param_key { >> unsigned tess_uses_prim_id:1; >> unsigned uses_gs:1; >> unsigned _pad:32 - SI_NUM_VGT_PARAM_KEY_BITS; >> +#else /* __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ */ >> + unsigned _pad:32 - SI_NUM_VGT_PARAM_KEY_BITS; >> + unsigned uses_gs:1; >> + unsigned tess_uses_prim_id:1; >> + unsigned uses_tess:1; >> + unsigned line_stipple_enabled:1; >> + unsigned count_from_stream_output:1; >> + unsigned primitive_restart:1; >> + unsigned multi_instances_smaller_than_primgroup:1; >> + unsigned uses_instancing:1; >> + unsigned prim:4; >> +#endif >> } u; >> uint32_t index; >> }; >> >> > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v6 1/2] gallium/winsys/kms: Fix possible leak in map/unmap.
On 20 March 2018 at 14:24, Tomasz Figa wrote: > On Tue, Mar 20, 2018 at 10:44 PM, Emil Velikov > wrote: >> On 20 March 2018 at 04:40, Tomasz Figa wrote: >>> On Tue, Mar 20, 2018 at 2:55 AM, Emil Velikov >>> wrote: Hi Lepton, On 19 March 2018 at 17:33, Lepton Wu wrote: > If user calls map twice for kms_sw_displaytarget, the first mapped > buffer could get leaked. Instead of calling mmap every time, just > reuse previous mapping. Since user could map same displaytarget with > different flags, we have to keep two different pointers, one for rw > mapping and one for ro mapping. Also introduce reference count for > mapped buffer so we can unmap them at right time. > > Reviewed-by: Emil Velikov > Reviewed-by: Tomasz Figa > Signed-off-by: Lepton Wu Nit: normally it's a good idea to have brief revision log when sending new version: v2: - split from larger patch (Emil) v3: - remove munmap w/a from dt_destory(Emil) ... > @@ -170,6 +172,14 @@ kms_sw_displaytarget_destroy(struct sw_winsys *ws, > if (kms_sw_dt->ref_count > 0) >return; > > + if (kms_sw_dt->map_count > 0) { > + DEBUG_PRINT("KMS-DEBUG: fix leaked map buffer %u\n", > kms_sw_dt->handle); > + munmap(kms_sw_dt->mapped, kms_sw_dt->size); > + kms_sw_dt->mapped = NULL; > + munmap(kms_sw_dt->ro_mapped, kms_sw_dt->size); > + kms_sw_dt->ro_mapped = NULL; > + } > + I could swear this workaround was missing in earlier revisions. I don't see anything in Tomasz' reply that suggesting we should bring it back? AFAICT the added refcounting makes no difference - the driver isn't cleaning up after itself. Am I missing something? >>> >>> I think this is actually consistent with what other winsys >>> implementations do. They free the map (or shadow malloc/shm buffer) in >>> _destroy() callback, so we should probably do the same. >>> >> Looking at the SW winsys - none of them seem to unmap at destroy time. >> Perhaps you meant that the HW ones do? > > dri: > https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/winsys/sw/dri/dri_sw_winsys.c#n128 > > gdi: > https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/winsys/sw/gdi/gdi_sw_winsys.c#n116 > > hgl: > https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/winsys/sw/hgl/hgl_sw_winsys.c#n152 > > xlib: > https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/winsys/sw/xlib/xlib_sw_winsys.c#n260 > https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/winsys/sw/xlib/xlib_sw_winsys.c#n271 > > The don't do real mapping - they all work on locally allocated memory. > However, after destroy, no resources are leaked and the pointers > returned from _map() are not valid anymore. > As mentioned before - zero objections against changing that, but keep it separate patch. Pretty please? -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] st/mesa: fix nir.h, nir_builder.h #includes to fix build
Use compiler/nir/nir.h as we do in other places in the state tracker. I'm not sure why this just started to fail. The #includes have been there for a while. --- src/mesa/state_tracker/st_nir_lower_uniforms_to_ubo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mesa/state_tracker/st_nir_lower_uniforms_to_ubo.c b/src/mesa/state_tracker/st_nir_lower_uniforms_to_ubo.c index e2a477e..b2f8f83 100644 --- a/src/mesa/state_tracker/st_nir_lower_uniforms_to_ubo.c +++ b/src/mesa/state_tracker/st_nir_lower_uniforms_to_ubo.c @@ -29,8 +29,8 @@ * point by 1. */ -#include "nir.h" -#include "nir_builder.h" +#include "compiler/nir/nir.h" +#include "compiler/nir/nir_builder.h" #include "st_nir.h" #include "program/prog_parameter.h" -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: fix autotools/android build
Reviewed-by: Tapani Pälli On 20.03.2018 16:59, Lionel Landwerlin wrote: Autotools/android builds generate the header & code files in 2 steps, but the code generation requires the name of the header file to include it. This change generates both files in one command. Fixes: 035cc7a12dc ("i965: perf: reduce i965 binary size") Signed-off-by: Lionel Landwerlin --- src/mesa/drivers/dri/i965/Android.mk | 9 +++-- src/mesa/drivers/dri/i965/Makefile.am | 6 +++--- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/mesa/drivers/dri/i965/Android.mk b/src/mesa/drivers/dri/i965/Android.mk index a3d010a5894..8c4a613bcf3 100644 --- a/src/mesa/drivers/dri/i965/Android.mk +++ b/src/mesa/drivers/dri/i965/Android.mk @@ -312,15 +312,12 @@ LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/, \ i965_oa_xml_FILES := $(addprefix $(LOCAL_PATH)/, \ $(i965_oa_xml_FILES)) -$(intermediates)/brw_oa_metrics.h: $(LOCAL_PATH)/brw_oa.py $(i965_oa_xml_FILES) - @echo "target Generated: $(PRIVATE_MODULE) <= $(notdir $(@))" - @mkdir -p $(dir $@) - $(hide) $(MESA_PYTHON2) $< --header=$@ $(i965_oa_xml_FILES) - $(intermediates)/brw_oa_metrics.c: $(LOCAL_PATH)/brw_oa.py $(i965_oa_xml_FILES) @echo "target Generated: $(PRIVATE_MODULE) <= $(notdir $(@))" @mkdir -p $(dir $@) - $(hide) $(MESA_PYTHON2) $< --code=$@ $(i965_oa_xml_FILES) + $(hide) $(MESA_PYTHON2) $< --code=$@ $(i965_oa_xml_FILES) --header=$@ $(i965_oa_xml_FILES) + +$(intermediates)/brw_oa_metrics.h: $(intermediates)/brw_oa_metrics.c include $(MESA_COMMON_MK) include $(BUILD_SHARED_LIBRARY) diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am index 8c8ecc6d76b..f561d680f2c 100644 --- a/src/mesa/drivers/dri/i965/Makefile.am +++ b/src/mesa/drivers/dri/i965/Makefile.am @@ -118,7 +118,7 @@ EXTRA_DIST = \ # .c and .h files in one go so we don't hit problems with parallel # make and multiple invocations of the same script trying to write # to the same files. -brw_oa_metrics.h: brw_oa.py $(i965_oa_xml_FILES) - $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/brw_oa.py --header=$(builddir)/brw_oa_metrics.h $(i965_oa_xml_FILES) brw_oa_metrics.c: brw_oa.py $(i965_oa_xml_FILES) - $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/brw_oa.py --code=$(builddir)/brw_oa_metrics.c $(i965_oa_xml_FILES) + $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/brw_oa.py --code=$(builddir)/brw_oa_metrics.c --header=$(builddir)/brw_oa_metrics.h $(i965_oa_xml_FILES) + +brw_oa_metrics.h: brw_oa_metrics.c ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/9] mesa: Update vao internal state when setting the _DrawVAO.
On 03/15/2018 11:48 PM, mathias.froehl...@gmx.net wrote: From: Mathias Fröhlich We do care if the vao set as Array._DrawVAO is ready to draw not so much that the current Array.VAO in terms of the OpenGL api is fully up to date for drawing. Can you rephrase that? Also no driver looks at any VAO's NewArrays value from within the Driver.UpdateState callback. So it should be safe to move this update into the _mesa_set_draw_vao method. Signed-off-by: Mathias Fröhlich --- src/mesa/main/arrayobj.c | 6 ++ src/mesa/main/state.c| 12 +++- src/mesa/vbo/vbo_exec_draw.c | 3 --- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/mesa/main/arrayobj.c b/src/mesa/main/arrayobj.c index 7cb9833719..0d2f7a918a 100644 --- a/src/mesa/main/arrayobj.c +++ b/src/mesa/main/arrayobj.c @@ -461,6 +461,12 @@ _mesa_update_vao_derived_arrays(struct gl_context *ctx, { /* Make sure we do not run into problems with shared objects */ assert(!vao->SharedAndImmutable || vao->NewArrays == 0); + + /* +* Stay tuned, the next series scans for duplicate bindings in this +* function. So that drivers can easily know the minimum unique set +* of bindings. +*/ } diff --git a/src/mesa/main/state.c b/src/mesa/main/state.c index 6dd7a7ec07..e523bccd0c 100644 --- a/src/mesa/main/state.c +++ b/src/mesa/main/state.c @@ -360,9 +360,6 @@ _mesa_update_state_locked( struct gl_context *ctx ) update_program(ctx); } - if (new_state & _NEW_ARRAY) - _mesa_update_vao_derived_arrays(ctx, ctx->Array.VAO); - out: new_prog_state |= update_program_constants(ctx); @@ -377,7 +374,6 @@ _mesa_update_state_locked( struct gl_context *ctx ) */ ctx->Driver.UpdateState(ctx); ctx->NewState = 0; - ctx->Array.VAO->NewArrays = 0x0; } @@ -496,8 +492,14 @@ _mesa_set_draw_vao(struct gl_context *ctx, struct gl_vertex_array_object *vao, struct gl_vertex_array_object **ptr = &ctx->Array._DrawVAO; if (*ptr != vao) { _mesa_reference_vao_(ctx, ptr, vao); + ctx->NewDriverState |= ctx->DriverFlags.NewArray; - } else if (vao->NewArrays) { + } + + if (vao->NewArrays) { + _mesa_update_vao_derived_arrays(ctx, vao); + vao->NewArrays = 0; + ctx->NewDriverState |= ctx->DriverFlags.NewArray; } diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c index 1ed9d5eac0..3490dbe44d 100644 --- a/src/mesa/vbo/vbo_exec_draw.c +++ b/src/mesa/vbo/vbo_exec_draw.c @@ -231,9 +231,6 @@ vbo_exec_bind_arrays(struct gl_context *ctx) assert(!_mesa_is_bufferobj(exec->vtx.bufferobj) || (vao_enabled & ~vao->VertexAttribBufferMask) == 0); - _mesa_update_vao_derived_arrays(ctx, vao); - vao->NewArrays = 0; - _mesa_set_draw_vao(ctx, vao, _vbo_get_vao_filter(mode)); /* The exec VAO is not immutable, so we need to set manually */ ctx->NewDriverState |= ctx->DriverFlags.NewArray; ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 9/9] vbo: Remove now duplicate _DrawVAO notification.
On 03/15/2018 11:48 PM, mathias.froehl...@gmx.net wrote: From: Mathias Fröhlich The DriverFlags.NewArray bit is set into NewDriverState already on "The DriverFlags.NewArray bit is already set to NewDriverState in" _mesa_set_draw_vao since we have actually just above changed the VAOs content. So this can be removed. The _vbo_update_inputs is called by the vbo...recalculate_inputs being set through the same mechanism as described above. Signed-off-by: Mathias Fröhlich --- src/mesa/vbo/vbo_exec_draw.c | 5 - 1 file changed, 5 deletions(-) diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c index 3490dbe44d..026b7be129 100644 --- a/src/mesa/vbo/vbo_exec_draw.c +++ b/src/mesa/vbo/vbo_exec_draw.c @@ -232,11 +232,6 @@ vbo_exec_bind_arrays(struct gl_context *ctx) (vao_enabled & ~vao->VertexAttribBufferMask) == 0); _mesa_set_draw_vao(ctx, vao, _vbo_get_vao_filter(mode)); - /* The exec VAO is not immutable, so we need to set manually */ - ctx->NewDriverState |= ctx->DriverFlags.NewArray; - - /* Finally update the inputs array */ - _vbo_update_inputs(ctx, &vbo->draw_arrays); } ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev