Re: [Mesa-dev] [PATCH mesa] i965/blorp: fix indentation level
On Wed, Jul 06, 2016 at 10:02:42PM +0100, Eric Engestrom wrote: > Signed-off-by: Eric Engestrom> --- > src/mesa/drivers/dri/i965/gen7_blorp.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) Thanks for fixing this! Reviewed-by: Topi Pohjolainen Do you need me to push this for you? > > diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.c > b/src/mesa/drivers/dri/i965/gen7_blorp.c > index 7201549..0afd76b 100644 > --- a/src/mesa/drivers/dri/i965/gen7_blorp.c > +++ b/src/mesa/drivers/dri/i965/gen7_blorp.c > @@ -797,7 +797,7 @@ gen7_blorp_exec(struct brw_context *brw, > if (params->wm_prog_data) >gen7_blorp_emit_binding_table_pointers_ps(brw, wm_bind_bo_offset); > > - gen7_blorp_emit_constant_ps_disable(brw); > + gen7_blorp_emit_constant_ps_disable(brw); > > if (params->src.mt) { >const uint32_t sampler_offset = > -- > 2.9.0 > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] osmesa: Export OSMesaCreateContextAttribs.
On Wednesday, July 06, 2016 07:53:06 Brian Paul wrote: > On 07/05/2016 09:08 PM, mathias.froehl...@gmx.net wrote: > > From: Mathias Fröhlich> > > > Hi, > > > > Since the function is exported like any other > > public api fucnntion and put in the header > > as if you could link against it export it also > > from shared objects. > > > > Please review! > > Does this need to be tagged for the stable branches? That's probably a good idea. According to the release notes this will be for 11.2 and 12.0. > Reviewed-by: Brian Paul Thanks! Mathias___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] mesa: print number of samples in renderbuffer_storage error msg
On Wed, 2016-07-06 at 18:02 -0600, Brian Paul wrote: > --- > src/mesa/main/fbobject.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c > index bf47c1c..2c01526 100644 > --- a/src/mesa/main/fbobject.c > +++ b/src/mesa/main/fbobject.c > @@ -2128,7 +2128,7 @@ renderbuffer_storage(struct gl_context *ctx, > struct gl_renderbuffer *rb, > } > > if (sample_count_error != GL_NO_ERROR) { > - _mesa_error(ctx, sample_count_error, "%s(samples)", func); > + _mesa_error(ctx, sample_count_error, "%s(samples=%d)", > func, samples); > return; > } > } Reviewed-by: Timothy Arceri___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 16/17] i965: enable ARB_enhanced_layouts for gen6+
--- src/mesa/drivers/dri/i965/intel_extensions.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index 27dfb0c..9989611 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -294,6 +294,7 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.ARB_conditional_render_inverted = true; ctx->Extensions.ARB_cull_distance = true; ctx->Extensions.ARB_draw_buffers_blend = true; + ctx->Extensions.ARB_enhanced_layouts = true; ctx->Extensions.ARB_ES3_compatibility = true; ctx->Extensions.ARB_fragment_layer_viewport = true; ctx->Extensions.ARB_sample_shading = true; -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 17/17] docs: mark ARB_enhanced_layouts as DONE for i965
--- docs/GL3.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/GL3.txt b/docs/GL3.txt index ce34869..0e89a12 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -193,11 +193,11 @@ GL 4.4, GLSL 4.40: GL_MAX_VERTEX_ATTRIB_STRIDE DONE (all drivers) GL_ARB_buffer_storage DONE (i965, nv50, nvc0, r600, radeonsi) GL_ARB_clear_texture DONE (i965, nv50, nvc0) - GL_ARB_enhanced_layouts in progress (Timothy) + GL_ARB_enhanced_layouts DONE (i965) - compile-time constant expressions DONE - explicit byte offsets for blocksDONE - forced alignment within blocks DONE - - specified vec4-slot component numbers in progress + - specified vec4-slot component numbers DONE (i965) - specified transform/feedback layout DONE - input/output block locationsDONE GL_ARB_multi_bind DONE (all drivers) -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 15/17] i965/vec4: add packing support for tes load outputs
--- src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp | 17 + src/mesa/drivers/dri/i965/brw_vec4_tcs.h | 1 + 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp index 0a33a39..27b8988 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp @@ -201,6 +201,7 @@ vec4_tcs_visitor::emit_input_urb_read(const dst_reg , void vec4_tcs_visitor::emit_output_urb_read(const dst_reg , unsigned base_offset, + unsigned first_component, const src_reg _offset) { vec4_instruction *inst; @@ -216,6 +217,12 @@ vec4_tcs_visitor::emit_output_urb_read(const dst_reg , read->offset = base_offset; read->mlen = 1; read->base_mrf = -1; + + if (first_component) { + src_reg src = src_reg(dst); + src.swizzle = BRW_SWZ_COMP_RIGHT(first_component); + emit(MOV(dst, src)); + } } void @@ -295,14 +302,15 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) case GL_QUADS: { /* DWords 3-2 (reversed); use offset 0 and WZYX swizzle. */ dst_reg tmp(this, glsl_type::vec4_type); -emit_output_urb_read(tmp, 0, src_reg()); +emit_output_urb_read(tmp, 0, 0, src_reg()); emit(MOV(writemask(dst, WRITEMASK_XY), swizzle(src_reg(tmp), BRW_SWIZZLE_WZYX))); break; } case GL_TRIANGLES: /* DWord 4; use offset 1 but normal swizzle/writemask. */ -emit_output_urb_read(writemask(dst, WRITEMASK_X), 1, src_reg()); +emit_output_urb_read(writemask(dst, WRITEMASK_X), 1, 0, + src_reg()); break; case GL_ISOLINES: /* All channels are undefined. */ @@ -334,10 +342,11 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) } dst_reg tmp(this, glsl_type::vec4_type); - emit_output_urb_read(tmp, 1, src_reg()); + emit_output_urb_read(tmp, 1, 0, src_reg()); emit(MOV(dst, swizzle(src_reg(tmp), swiz))); } else { - emit_output_urb_read(dst, imm_offset, indirect_offset); + emit_output_urb_read(dst, imm_offset, nir_intrinsic_component(instr), + indirect_offset); } break; } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.h b/src/mesa/drivers/dri/i965/brw_vec4_tcs.h index d408e56..030eb5e 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.h +++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.h @@ -64,6 +64,7 @@ protected: const src_reg _offset); void emit_output_urb_read(const dst_reg , unsigned base_offset, + unsigned first_component, const src_reg _offset); void emit_urb_write(const src_reg , unsigned writemask, -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 12/17] i965/vec4: support packing tcs inputs
--- src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp | 8 ++-- src/mesa/drivers/dri/i965/brw_vec4_tcs.h | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp index f61c612..8c76e81 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp @@ -166,6 +166,7 @@ void vec4_tcs_visitor::emit_input_urb_read(const dst_reg , const src_reg _index, unsigned base_offset, + unsigned first_component, const src_reg _offset) { vec4_instruction *inst; @@ -191,7 +192,9 @@ vec4_tcs_visitor::emit_input_urb_read(const dst_reg , if (inst->offset == 0 && indirect_offset.file == BAD_FILE) { emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_))); } else { - emit(MOV(dst, src_reg(temp))); + src_reg src = src_reg(temp); + src.swizzle = BRW_SWZ_COMP_RIGHT(first_component); + emit(MOV(dst, src)); } } @@ -267,7 +270,8 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); dst.writemask = brw_writemask_for_size(instr->num_components); - emit_input_urb_read(dst, vertex_index, imm_offset, indirect_offset); + emit_input_urb_read(dst, vertex_index, imm_offset, + nir_intrinsic_component(instr), indirect_offset); break; } case nir_intrinsic_load_input: diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.h b/src/mesa/drivers/dri/i965/brw_vec4_tcs.h index 329cd7d..d408e56 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.h +++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.h @@ -60,6 +60,7 @@ protected: void emit_input_urb_read(const dst_reg , const src_reg _index, unsigned base_offset, +unsigned first_component, const src_reg _offset); void emit_output_urb_read(const dst_reg , unsigned base_offset, -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 13/17] i965/vec4: add support for packing tcs outputs
--- src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp | 7 +++ 1 file changed, 7 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp index 8c76e81..0a33a39 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp @@ -406,6 +406,13 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) } } + unsigned first_component = nir_intrinsic_component(instr); + if (first_component) { + assert(swiz == BRW_SWIZZLE_XYZW); + swiz = BRW_SWZ_COMP_LEFT(first_component); + mask = mask << first_component; + } + emit_urb_write(swizzle(value, swiz), mask, imm_offset, indirect_offset); break; -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 10/17] i965: add helper for creating packing writemask
For example where n=3 first_component=1 this will give us 0xE (WRITEMASK_YZW). --- src/mesa/drivers/dri/i965/brw_reg.h | 6 ++ 1 file changed, 6 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h index 8e6fcd5..792015b 100644 --- a/src/mesa/drivers/dri/i965/brw_reg.h +++ b/src/mesa/drivers/dri/i965/brw_reg.h @@ -972,6 +972,12 @@ brw_writemask_for_size(unsigned n) return (1 << n) - 1; } +static inline unsigned +brw_writemask_for_component_packing(unsigned n, unsigned first_component) +{ + return (((1 << n) - 1) << first_component); +} + static inline struct brw_reg negate(struct brw_reg reg) { -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 11/17] i965/vec4: add component packing for gs
--- src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp index 9ebfb27..4b15302 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp @@ -72,6 +72,8 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) src = src_reg(ATTR, BRW_VARYING_SLOT_COUNT * vertex->u32[0] + instr->const_index[0] + offset->u32[0], type); + src.swizzle = BRW_SWZ_COMP_RIGHT(nir_intrinsic_component(instr)); + /* gl_PointSize is passed in the .w component of the VUE header */ if (instr->const_index[0] == VARYING_SLOT_PSIZ) src.swizzle = BRW_SWIZZLE_; -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 14/17] i965/vec4: add support for packing tes inputs
--- src/mesa/drivers/dri/i965/brw_vec4_tes.cpp | 14 ++ 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp index 6639c86..22293da 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp @@ -177,7 +177,9 @@ vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) case nir_intrinsic_load_input: case nir_intrinsic_load_per_vertex_input: { src_reg indirect_offset = get_indirect_offset(instr); + dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); unsigned imm_offset = instr->const_index[0]; + unsigned fist_component = nir_intrinsic_component(instr); src_reg header = input_read_header; if (indirect_offset.file != BAD_FILE) { @@ -190,8 +192,10 @@ vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) */ const unsigned max_push_slots = 24; if (imm_offset < max_push_slots) { -emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D), - src_reg(ATTR, imm_offset, glsl_type::ivec4_type))); +src_reg src = src_reg(ATTR, imm_offset, glsl_type::ivec4_type); +src.swizzle = BRW_SWZ_COMP_RIGHT(fist_component); + +emit(MOV(dst, src)); prog_data->urb_read_length = MAX2(prog_data->urb_read_length, DIV_ROUND_UP(imm_offset + 1, 2)); @@ -205,12 +209,14 @@ vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) read->offset = imm_offset; read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; + src_reg src = src_reg(temp); + src.swizzle = BRW_SWZ_COMP_RIGHT(fist_component); + /* Copy to target. We might end up with some funky writemasks landing * in here, but we really don't want them in the above pseudo-ops. */ - dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); dst.writemask = brw_writemask_for_size(instr->num_components); - emit(MOV(dst, src_reg(temp))); + emit(MOV(dst, src)); break; } default: -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 08/17] i965/vec4: add support for packing inputs
--- src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index f3b4528..cbe7468 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -397,6 +397,8 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) src = src_reg(ATTR, instr->const_index[0] + const_offset->u32[0], glsl_type::uvec4_type); + /* Swizzle source based on component layout qualifier */ + src.swizzle = BRW_SWZ_COMP_RIGHT(nir_intrinsic_component(instr)); dest = get_nir_dest(instr->dest, src.type); dest.writemask = brw_writemask_for_size(instr->num_components); -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 09/17] i965/vec4: add support for packing vs/gs/tes outputs
Here we create a new output_generic_reg array with the ability to store the dst_reg for each component of user defined varyings. This is needed as the previous code only stored the dst_reg based on the varying location which meant packed varyings would overwrite each other. --- src/mesa/drivers/dri/i965/brw_vec4.h | 3 +++ src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 9 ++- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 37 +++--- 3 files changed, 45 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 76dea04..d8fc471 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -114,6 +114,8 @@ public: * for the ir->location's used. */ dst_reg output_reg[BRW_VARYING_SLOT_COUNT]; + dst_reg output_generic_reg[MAX_VARYINGS_INCL_PATCH][4]; + unsigned output_generic_num_components[MAX_VARYINGS_INCL_PATCH][4]; const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT]; int uniforms; @@ -268,6 +270,7 @@ public: void emit_ndc_computation(); void emit_psiz_and_flags(dst_reg reg); vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying); + void emit_generic_urb_slot(dst_reg reg, int varying, int component); virtual void emit_urb_slot(dst_reg reg, int varying); void emit_shader_time_begin(); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index cbe7468..94e361d 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -416,7 +416,14 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F, instr->num_components); - output_reg[varying] = dst_reg(src); + if (varying >= VARYING_SLOT_VAR0) { + unsigned c = nir_intrinsic_component(instr); + unsigned v = varying - VARYING_SLOT_VAR0; + output_generic_reg[v][c] = dst_reg(src); + output_generic_num_components[v][c] = instr->num_components; + } else { + output_reg[varying] = dst_reg(src); + } break; } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index b392919..fbe04e3 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1278,13 +1278,35 @@ vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying) assert(varying < VARYING_SLOT_MAX); assert(output_reg[varying].type == reg.type); current_annotation = output_reg_annotation[varying]; - if (output_reg[varying].file != BAD_FILE) + if (output_reg[varying].file != BAD_FILE) { return emit(MOV(reg, src_reg(output_reg[varying]))); - else + } else return NULL; } void +vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying, int component) +{ + assert(varying < VARYING_SLOT_MAX); + assert(varying >= VARYING_SLOT_VAR0); + varying = varying - VARYING_SLOT_VAR0; + + unsigned num_comps = output_generic_num_components[varying][component]; + if (num_comps == 0) + return; + + assert(output_generic_reg[varying][component].type == reg.type); + current_annotation = output_reg_annotation[varying]; + if (output_generic_reg[varying][component].file != BAD_FILE) { + src_reg src = src_reg(output_generic_reg[varying][component]); + src.swizzle = BRW_SWZ_COMP_LEFT(component); + reg.writemask = + brw_writemask_for_component_packing(num_comps, component); + emit(MOV(reg, src)); + } +} + +void vec4_visitor::emit_urb_slot(dst_reg reg, int varying) { reg.type = BRW_REGISTER_TYPE_F; @@ -1323,7 +1345,13 @@ vec4_visitor::emit_urb_slot(dst_reg reg, int varying) /* No need to write to this slot */ break; default: - emit_generic_urb_slot(reg, varying); + if (varying >= VARYING_SLOT_VAR0) { + for (int i = 0; i < 4; i++) { +emit_generic_urb_slot(reg, varying, i); + } + } else { + emit_generic_urb_slot(reg, varying); + } break; } } @@ -1771,6 +1799,9 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler, this->current_annotation = NULL; memset(this->output_reg_annotation, 0, sizeof(this->output_reg_annotation)); + memset(this->output_generic_num_components, 0, + sizeof(this->output_generic_num_components)); + this->virtual_grf_start = NULL; this->virtual_grf_end = NULL; this->live_intervals = NULL; -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 07/17] i965: add helpers for creating component layout swizzle
This will be used to swizzle components to the beginning or end of the vector based on the component layout qualifier and whether we are doing a load or store. --- src/mesa/drivers/dri/i965/brw_reg.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h index 38cf8e3..8e6fcd5 100644 --- a/src/mesa/drivers/dri/i965/brw_reg.h +++ b/src/mesa/drivers/dri/i965/brw_reg.h @@ -88,6 +88,9 @@ struct brw_device_info; #define BRW_SWIZZLE_ZWZW BRW_SWIZZLE4(2,3,2,3) #define BRW_SWIZZLE_WZYX BRW_SWIZZLE4(3,2,1,0) +#define BRW_SWZ_COMP_RIGHT(comp) (BRW_SWIZZLE_XYZW >> ((comp)*2)) +#define BRW_SWZ_COMP_LEFT(comp) (BRW_SWIZZLE_XYZW << ((comp)*2)) + static inline bool brw_is_single_value_swizzle(unsigned swiz) { -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 04/17] i965: add support for packing arrays
Here we add a new helper function calc_type_size_offset() to help calculate the size of a varying once packing is taken into account. --- src/compiler/nir/nir_lower_io.c | 55 +++-- 1 file changed, 48 insertions(+), 7 deletions(-) diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c index c25790a..b966348 100644 --- a/src/compiler/nir/nir_lower_io.c +++ b/src/compiler/nir/nir_lower_io.c @@ -41,6 +41,36 @@ struct lower_io_state { nir_variable_mode modes; }; +/** + * Calculates the offset for a type by allowing for other components that are + * packed into the same location. + */ +static unsigned +calc_type_size_offset(unsigned num_packed_components, + const struct glsl_type *type, + int (*type_size)(const struct glsl_type *)) +{ + unsigned base_size; + const struct glsl_type *wa = glsl_without_array(type); + int comp_diff = num_packed_components - glsl_get_vector_elements(wa); + + /* If there is no difference in component sizes or the type_size function +* being used treats everything as a vec4 return. +*/ + if (comp_diff <= 0 || + type_size(glsl_float_type()) == type_size(glsl_double_type())) + return 0; + + if (glsl_get_base_type(wa) == GLSL_TYPE_DOUBLE) { + base_size = type_size(glsl_dvec_type(comp_diff)); + } else { + base_size = type_size(glsl_vec_type(comp_diff)); + } + + return glsl_type_is_array(type) ? base_size * glsl_get_aoa_size(type) : + base_size; +} + void nir_assign_var_locations(struct exec_list *var_list, unsigned *size, unsigned base_offset, @@ -74,13 +104,17 @@ nir_assign_var_locations(struct exec_list *var_list, unsigned *size, if (locations[idx][var->data.index] == -1) { var->data.driver_location = location; locations[idx][var->data.index] = location; -location += type_size(var->type); +location += type_size(var->type) + + calc_type_size_offset(var->data.num_packed_components, + var->type, type_size); } else { var->data.driver_location = locations[idx][var->data.index]; } } else { var->data.driver_location = location; - location += type_size(var->type); + location += type_size(var->type) + +calc_type_size_offset(var->data.num_packed_components, var->type, + type_size); } } @@ -113,7 +147,8 @@ is_per_vertex_output(struct lower_io_state *state, nir_variable *var) static nir_ssa_def * get_io_offset(nir_builder *b, nir_deref_var *deref, nir_ssa_def **vertex_index, - int (*type_size)(const struct glsl_type *)) + int (*type_size)(const struct glsl_type *), + unsigned num_packed_components) { nir_deref *tail = >deref; @@ -141,7 +176,9 @@ get_io_offset(nir_builder *b, nir_deref_var *deref, if (tail->deref_type == nir_deref_type_array) { nir_deref_array *deref_array = nir_deref_as_array(tail); - unsigned size = type_size(tail->type); + unsigned size = type_size(tail->type) + +calc_type_size_offset(num_packed_components, tail->type, + type_size); offset = nir_iadd(b, offset, nir_imm_int(b, size * deref_array->base_offset)); @@ -289,7 +326,9 @@ nir_lower_io_block(nir_block *block, offset = get_io_offset(b, intrin->variables[0], per_vertex ? _index : NULL, -state->type_size); +state->type_size, +intrin->variables[0]->var-> + data.num_packed_components); nir_intrinsic_instr *load = nir_intrinsic_instr_create(state->mem_ctx, @@ -339,7 +378,9 @@ nir_lower_io_block(nir_block *block, offset = get_io_offset(b, intrin->variables[0], per_vertex ? _index : NULL, -state->type_size); +state->type_size, +intrin->variables[0]->var-> + data.num_packed_components); nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx, @@ -381,7 +422,7 @@ nir_lower_io_block(nir_block *block, nir_ssa_def *offset; offset = get_io_offset(b, intrin->variables[0], -NULL, state->type_size); +NULL, state->type_size, 0); nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(state->mem_ctx, -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org
[Mesa-dev] [PATCH 03/17] nir: add glsl_dvec_type() helper
--- src/compiler/nir_types.cpp | 6 ++ src/compiler/nir_types.h | 1 + 2 files changed, 7 insertions(+) diff --git a/src/compiler/nir_types.cpp b/src/compiler/nir_types.cpp index 2d46ed2..f694a84 100644 --- a/src/compiler/nir_types.cpp +++ b/src/compiler/nir_types.cpp @@ -263,6 +263,12 @@ glsl_vec_type(unsigned n) } const glsl_type * +glsl_dvec_type(unsigned n) +{ + return glsl_type::dvec(n); +} + +const glsl_type * glsl_vec4_type(void) { return glsl_type::vec4_type; diff --git a/src/compiler/nir_types.h b/src/compiler/nir_types.h index c505ac0..6b4f646 100644 --- a/src/compiler/nir_types.h +++ b/src/compiler/nir_types.h @@ -119,6 +119,7 @@ const struct glsl_type *glsl_void_type(void); const struct glsl_type *glsl_float_type(void); const struct glsl_type *glsl_double_type(void); const struct glsl_type *glsl_vec_type(unsigned n); +const struct glsl_type *glsl_dvec_type(unsigned n); const struct glsl_type *glsl_vec4_type(void); const struct glsl_type *glsl_int_type(void); const struct glsl_type *glsl_uint_type(void); -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 06/17] nir/glsl: add double packing support to vs and fs
--- src/compiler/glsl/link_varyings.cpp | 16 +--- src/compiler/nir/nir_lower_io.c | 16 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/src/compiler/glsl/link_varyings.cpp b/src/compiler/glsl/link_varyings.cpp index 35f97a9..31faaa1 100644 --- a/src/compiler/glsl/link_varyings.cpp +++ b/src/compiler/glsl/link_varyings.cpp @@ -1995,10 +1995,11 @@ set_num_packed_components(struct gl_shader *shader, ir_variable_mode io_mode, var->type->without_array()->is_matrix()) continue; + unsigned dfrac = var->type->without_array()->is_double() ? 2 : 1; if (var->type->is_array()) { const glsl_type *type = get_varying_type(var, shader->Stage); unsigned array_components = type->without_array()->vector_elements + -var->data.location_frac; +var->data.location_frac / dfrac; assert(type->arrays_of_arrays_size() + idx <= ARRAY_SIZE(num_components)); for (unsigned i = idx; i < type->arrays_of_arrays_size(); i++) { @@ -2006,7 +2007,7 @@ set_num_packed_components(struct gl_shader *shader, ir_variable_mode io_mode, } } else { unsigned comps = var->type->vector_elements + -var->data.location_frac; +var->data.location_frac / dfrac; num_components[idx] = MAX2(comps, num_components[idx]); } } @@ -2034,7 +2035,16 @@ set_num_packed_components(struct gl_shader *shader, ir_variable_mode io_mode, c = MAX2(c, num_components[i]); } } else { - c = num_components[idx]; + /* Handle special case of packing dvec3 with a double. The only + * valid scenario is packing a double in the 4th component of the + * double vector. + */ + if (var->type->is_double() && var->type->vector_elements == 3 && + num_components[idx+1] == 2) { +c = 4; + } else { +c = num_components[idx]; + } } var->data.num_packed_components = c; } diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c index b966348..5566c83 100644 --- a/src/compiler/nir/nir_lower_io.c +++ b/src/compiler/nir/nir_lower_io.c @@ -104,6 +104,22 @@ nir_assign_var_locations(struct exec_list *var_list, unsigned *size, if (locations[idx][var->data.index] == -1) { var->data.driver_location = location; locations[idx][var->data.index] = location; + +/* A dvec3 can be packed with a double we need special handling + * for this as we are packing across two locations. + */ +if (glsl_get_base_type(var->type) == GLSL_TYPE_DOUBLE && +glsl_get_vector_elements(var->type) == 3) { + /* Hack around type_size functions that expect vectors to be +* padded out to vec4. +*/ + unsigned dsize = type_size(glsl_double_type()); + unsigned offset = + dsize == type_size(glsl_float_type()) ? dsize : dsize * 2; + + locations[idx + 1][var->data.index] = location + offset; +} + location += type_size(var->type) + calc_type_size_offset(var->data.num_packed_components, var->type, type_size); -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 02/17] i965: enable component packing for vs and fs
--- src/mesa/drivers/dri/i965/brw_fs.cpp | 20 src/mesa/drivers/dri/i965/brw_fs.h | 5 +++-- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 29 - 3 files changed, 35 insertions(+), 19 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 2f473cc..9e7223e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1109,7 +1109,8 @@ fs_visitor::emit_general_interpolation(fs_reg *attr, const char *name, const glsl_type *type, glsl_interp_qualifier interpolation_mode, int *location, bool mod_centroid, - bool mod_sample) + bool mod_sample, + unsigned num_packed_components) { assert(stage == MESA_SHADER_FRAGMENT); brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data; @@ -1131,22 +1132,26 @@ fs_visitor::emit_general_interpolation(fs_reg *attr, const char *name, for (unsigned i = 0; i < length; i++) { emit_general_interpolation(attr, name, elem_type, interpolation_mode, -location, mod_centroid, mod_sample); +location, mod_centroid, mod_sample, +num_packed_components); } } else if (type->is_record()) { for (unsigned i = 0; i < type->length; i++) { const glsl_type *field_type = type->fields.structure[i].type; emit_general_interpolation(attr, name, field_type, interpolation_mode, -location, mod_centroid, mod_sample); +location, mod_centroid, mod_sample, +num_packed_components); } } else { assert(type->is_scalar() || type->is_vector()); + unsigned num_components = num_packed_components ? + num_packed_components : type->vector_elements; if (prog_data->urb_setup[*location] == -1) { /* If there's no incoming setup data for this slot, don't * emit interpolation for it. */ - *attr = offset(*attr, bld, type->vector_elements); + *attr = offset(*attr, bld, num_components); (*location)++; return; } @@ -1158,7 +1163,6 @@ fs_visitor::emit_general_interpolation(fs_reg *attr, const char *name, * handed us defined values in only the constant offset * field of the setup reg. */ - unsigned vector_elements = type->vector_elements; /* Data starts at suboffet 3 in 32-bit units (12 bytes), so it is not * 64-bit aligned and the current implementation fails to read the @@ -1166,10 +1170,10 @@ fs_visitor::emit_general_interpolation(fs_reg *attr, const char *name, * read it as vector of floats with twice the number of components. */ if (attr->type == BRW_REGISTER_TYPE_DF) { -vector_elements *= 2; +num_components *= 2; attr->type = BRW_REGISTER_TYPE_F; } - for (unsigned int i = 0; i < vector_elements; i++) { + for (unsigned int i = 0; i < num_components; i++) { struct brw_reg interp = interp_reg(*location, i); interp = suboffset(interp, 3); interp.type = attr->type; @@ -1178,7 +1182,7 @@ fs_visitor::emit_general_interpolation(fs_reg *attr, const char *name, } } else { /* Smooth/noperspective interpolation case. */ - for (unsigned int i = 0; i < type->vector_elements; i++) { + for (unsigned int i = 0; i < num_components; i++) { struct brw_reg interp = interp_reg(*location, i); if (devinfo->needs_unlit_centroid_workaround && mod_centroid) { /* Get the pixel/sample mask into f0 so that we know diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 1f88f8f..0c72802 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -181,7 +181,7 @@ public: const glsl_type *type, glsl_interp_qualifier interpolation_mode, int *location, bool mod_centroid, - bool mod_sample); + bool mod_sample, unsigned num_components); fs_reg *emit_vs_system_value(int location); void emit_interpolation_setup_gen4(); void emit_interpolation_setup_gen6(); @@ -200,7 +200,8 @@ public: void emit_nir_code(); void nir_setup_inputs(); void nir_setup_single_output_varying(fs_reg *reg, const glsl_type *type, -
[Mesa-dev] [PATCH 05/17] i965: add component packing support for load_output intrinsics
--- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 38 +++- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index a08297e..cac0527 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -2530,6 +2530,7 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder , case nir_intrinsic_load_per_vertex_output: { fs_reg indirect_offset = get_indirect_offset(instr); unsigned imm_offset = instr->const_index[0]; + unsigned first_component = nir_intrinsic_component(instr); fs_inst *inst; if (indirect_offset.file == BAD_FILE) { @@ -2610,10 +2611,24 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder , } bld.LOAD_PAYLOAD(dst, srcs, num_components, 0); } else { -inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst, patch_handle); +if (first_component != 0) { + unsigned read_components = + instr->num_components + first_component; + fs_reg tmp = bld.vgrf(dst.type, read_components); + inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp, + patch_handle); + inst->regs_written = read_components; + for (unsigned i = 0; i < instr->num_components; i++) { + bld.MOV(offset(dst, bld, i), + offset(tmp, bld, i + first_component)); + } +} else { + inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst, + patch_handle); + inst->regs_written = instr->num_components; +} inst->offset = imm_offset; inst->mlen = 1; -inst->regs_written = instr->num_components; } } else { /* Indirect indexing - use per-slot offsets as well. */ @@ -2623,11 +2638,24 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder , }; fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2); bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0); - - inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dst, payload); + if (first_component != 0) { +unsigned read_components = + instr->num_components + first_component; +fs_reg tmp = bld.vgrf(dst.type, read_components); +inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, tmp, +payload); +inst->regs_written = read_components; +for (unsigned i = 0; i < instr->num_components; i++) { + bld.MOV(offset(dst, bld, i), + offset(tmp, bld, i + first_component)); +} + } else { +inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dst, +payload); +inst->regs_written = instr->num_components; + } inst->offset = imm_offset; inst->mlen = 2; - inst->regs_written = instr->num_components; } break; } -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] Resend V4 ARB_enhanced_layouts packing support for i965 Gen6+
This is just a resend that is rebased and has previously reviewed patches pushed to master. V4: - add vec4 backend support and enable for Gen6+ V3: - Rewrite patch 9 (add support for packing arrays) to not add hacks to the type_size() functions. - Add packing support for the load_output intrinsics (patch 12) - Add glsl_dvec_type() helper (patch 8) V2: - validation fixes patches 1-2 - added support for packing doubles now that explicit location fixes have landed. - fix various issues with intel debug output with new COMPONENT const index. This adds component packing support for Gen6+. Series can be found in my component_packing_gen6+ branch: https://github.com/tarceri/Mesa_arrays_of_arrays.git ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 01/17] glsl/nir: add new num_packed_components field
This will be used to store the total number of components used at this location when packing via ARB_enhanced_layouts. --- src/compiler/glsl/glsl_to_nir.cpp | 1 + src/compiler/glsl/ir.h | 5 +++ src/compiler/glsl/link_varyings.cpp | 74 - src/compiler/glsl/linker.cpp| 2 + src/compiler/glsl/linker.h | 4 ++ src/compiler/nir/nir.h | 5 +++ 6 files changed, 89 insertions(+), 2 deletions(-) diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index 20302e3..baba624 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -375,6 +375,7 @@ nir_visitor::visit(ir_variable *ir) var->data.explicit_binding = ir->data.explicit_binding; var->data.has_initializer = ir->data.has_initializer; var->data.location_frac = ir->data.location_frac; + var->data.num_packed_components = ir->data.num_packed_components; switch (ir->data.depth_layout) { case ir_depth_layout_none: diff --git a/src/compiler/glsl/ir.h b/src/compiler/glsl/ir.h index 1325e35..637b53c 100644 --- a/src/compiler/glsl/ir.h +++ b/src/compiler/glsl/ir.h @@ -770,6 +770,11 @@ public: unsigned location_frac:2; /** + * The total number of components packed into this location. + */ + unsigned num_packed_components:4; + + /** * Layout of the matrix. Uses glsl_matrix_layout values. */ unsigned matrix_layout:2; diff --git a/src/compiler/glsl/link_varyings.cpp b/src/compiler/glsl/link_varyings.cpp index 76d0be1..35f97a9 100644 --- a/src/compiler/glsl/link_varyings.cpp +++ b/src/compiler/glsl/link_varyings.cpp @@ -1975,6 +1975,70 @@ reserved_varying_slot(struct gl_linked_shader *stage, return slots; } +void +set_num_packed_components(struct gl_shader *shader, ir_variable_mode io_mode, + unsigned base_offset) +{ + /* Find the max number of components used at this location */ + unsigned num_components[MAX_VARYINGS_INCL_PATCH] = { 0 }; + + foreach_in_list(ir_instruction, node, shader->ir) { + ir_variable *const var = node->as_variable(); + + if (var == NULL || var->data.mode != io_mode || + !var->data.explicit_location) + continue; + + int idx = var->data.location - base_offset; + if (idx < 0 || idx >= MAX_VARYINGS_INCL_PATCH || + var->type->without_array()->is_record() || + var->type->without_array()->is_matrix()) + continue; + + if (var->type->is_array()) { + const glsl_type *type = get_varying_type(var, shader->Stage); + unsigned array_components = type->without_array()->vector_elements + +var->data.location_frac; + assert(type->arrays_of_arrays_size() + idx <= +ARRAY_SIZE(num_components)); + for (unsigned i = idx; i < type->arrays_of_arrays_size(); i++) { +num_components[i] = MAX2(array_components, num_components[i]); + } + } else { + unsigned comps = var->type->vector_elements + +var->data.location_frac; + num_components[idx] = MAX2(comps, num_components[idx]); + } + } + + foreach_in_list(ir_instruction, node, shader->ir) { + ir_variable *const var = node->as_variable(); + + if (var == NULL || var->data.mode != io_mode || + !var->data.explicit_location) + continue; + + int idx = var->data.location - base_offset; + if (idx < 0 || idx >= MAX_VARYINGS_INCL_PATCH || + var->type->without_array()->is_record() || + var->type->without_array()->is_matrix()) + continue; + + /* For arrays we need to check all elements in order to find the max + * number of components used. + */ + unsigned c = 0; + if (var->type->is_array()) { + const glsl_type *type = get_varying_type(var, shader->Stage); + for (unsigned i = idx; i < type->arrays_of_arrays_size(); i++) { +c = MAX2(c, num_components[i]); + } + } else { + c = num_components[idx]; + } + var->data.num_packed_components = c; + } +} /** * Assign locations for all variables that are produced in one pipeline stage @@ -2091,11 +2155,17 @@ assign_varying_locations(struct gl_context *ctx, * 4. Mark input variables in the consumer that do not have locations as *not being inputs. This lets the optimizer eliminate them. */ - if (consumer) + if (consumer) { canonicalize_shader_io(consumer->ir, ir_var_shader_in); + set_num_packed_components(consumer, ir_var_shader_in, +VARYING_SLOT_VAR0); + } - if (producer) + if (producer) { canonicalize_shader_io(producer->ir, ir_var_shader_out); + set_num_packed_components(producer, ir_var_shader_out, +VARYING_SLOT_VAR0); + } if (consumer)
Re: [Mesa-dev] [PATCH 1/2] nir: Add optimization for (a || True == True)
Looks good to me Reviewed-by: Jason EkstrandOn Wed, Jul 6, 2016 at 5:12 PM, Eric Anholt wrote: > This was appearing in vc4 VS/CS in mupen64, due to vertex attrib lowering > producing some constants that were getting compared. > > total instructions in shared programs: 112276 -> 112198 (-0.07%) > instructions in affected programs: 2239 -> 2161 (-3.48%) > total estimated cycles in shared programs: 283102 -> 283038 (-0.02%) > estimated cycles in affected programs: 2365 -> 2301 (-2.71%) > --- > src/compiler/nir/nir_opt_algebraic.py | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/src/compiler/nir/nir_opt_algebraic.py > b/src/compiler/nir/nir_opt_algebraic.py > index fd228017c54e..7d04ef941b73 100644 > --- a/src/compiler/nir/nir_opt_algebraic.py > +++ b/src/compiler/nir/nir_opt_algebraic.py > @@ -197,6 +197,7 @@ optimizations = [ > (('iand', a, 0), 0), > (('ior', a, a), a), > (('ior', a, 0), a), > + (('ior', a, True), True), > (('fxor', a, a), 0.0), > (('ixor', a, a), 0), > (('ixor', a, 0), a), > -- > 2.8.1 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] nir: Optimize away IF statements with no body on either side.
On Wed, Jul 6, 2016 at 5:12 PM, Eric Anholtwrote: > Due to the rampant dead code elimination in coordinate shaders for vc4, we > often end up with IFs that do nothing on either side. In the > loops-enabled build, shader-db gives: > > total instructions in shared programs: 125192 -> 119693 (-4.39%) > instructions in affected programs: 30649 -> 25150 (-17.94%) > total uniforms in shared programs: 38436 -> 37632 (-2.09%) > uniforms in affected programs: 6168 -> 5364 (-13.04%) > --- > src/compiler/nir/nir_opt_dead_cf.c | 41 > ++ > 1 file changed, 37 insertions(+), 4 deletions(-) > > diff --git a/src/compiler/nir/nir_opt_dead_cf.c > b/src/compiler/nir/nir_opt_dead_cf.c > index 81c1b650da96..eb98dc9507fb 100644 > --- a/src/compiler/nir/nir_opt_dead_cf.c > +++ b/src/compiler/nir/nir_opt_dead_cf.c > @@ -60,6 +60,12 @@ > * } > * ... > * > + * We also delete IF statements with no instructions in either body: > + * > + * if (...) { > + * } else { > + * } > We already have a NIR pass that does exactly this called opt_peephole_select. From what I see with your pass, it doesn't properly handle phi nodes that may occur after the if statement which is exactly what the peephole_select pass is for. Maybe that pass should just be rolled into dead_cf? --Jason > + * > * Finally, we also handle removing useless loops, i.e. loops with no side > * effects and without any definitions that are used elsewhere. This case > is a > * little different from the first two in that the code is actually run > (it > @@ -134,6 +140,32 @@ opt_constant_if(nir_if *if_stmt, bool condition) > nir_cf_node_remove(_stmt->cf_node); > } > > +/* If the nir_if has no instructions on either side, then we can delete > the > + * IF, and therefore also its use of the condition variable. > + */ > +static bool > +opt_empty_if(nir_if *if_stmt) > +{ > + nir_cf_node *then_node = nir_if_first_then_node(if_stmt); > + nir_cf_node *else_node = nir_if_first_else_node(if_stmt); > + > + /* We can only have one block in each side, with no instructions in > them */ > + if (nir_if_last_then_node(if_stmt) != then_node) > + return false; > + if (nir_if_last_else_node(if_stmt) != else_node) > + return false; > + nir_block *then_block = nir_cf_node_as_block(then_node); > + nir_block *else_block = nir_cf_node_as_block(else_node); > + if (!exec_list_is_empty(_block->instr_list)) > + return false; > + if (!exec_list_is_empty(_block->instr_list)) > + return false; > + > + nir_cf_node_remove(_stmt->cf_node); > + > + return true; > +} > + > static bool > cf_node_has_side_effects(nir_cf_node *node) > { > @@ -224,11 +256,12 @@ dead_cf_block(nir_block *block) >nir_const_value *const_value = > nir_src_as_const_value(following_if->condition); > > - if (!const_value) > - return false; > + if (const_value) { > + opt_constant_if(following_if, const_value->u32[0] != 0); > + return true; > + } > > - opt_constant_if(following_if, const_value->u32[0] != 0); > - return true; > + return opt_empty_if(following_if); > } > > nir_loop *following_loop = nir_block_get_following_loop(block); > -- > 2.8.1 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH mesa] i965/blorp: fix indentation level
Signed-off-by: Eric Engestrom--- src/mesa/drivers/dri/i965/gen7_blorp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.c b/src/mesa/drivers/dri/i965/gen7_blorp.c index 7201549..0afd76b 100644 --- a/src/mesa/drivers/dri/i965/gen7_blorp.c +++ b/src/mesa/drivers/dri/i965/gen7_blorp.c @@ -797,7 +797,7 @@ gen7_blorp_exec(struct brw_context *brw, if (params->wm_prog_data) gen7_blorp_emit_binding_table_pointers_ps(brw, wm_bind_bo_offset); - gen7_blorp_emit_constant_ps_disable(brw); + gen7_blorp_emit_constant_ps_disable(brw); if (params->src.mt) { const uint32_t sampler_offset = -- 2.9.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] radeonsi: don't interleave R600_DEBUG-enabled shader dumps
On Wed, Jul 06, 2016 at 11:55:03PM +0200, Nicolai Hähnle wrote: > From: Nicolai Hähnle> > Only setting R600_DEBUG doesn't set any debug callback. Conversely, the debug > callback is only called when R600_DEBUG is set. I don't get any output from shader-db with this patch. -Tom > --- > src/gallium/drivers/radeonsi/si_state_shaders.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c > b/src/gallium/drivers/radeonsi/si_state_shaders.c > index abbe451..059ff70 100644 > --- a/src/gallium/drivers/radeonsi/si_state_shaders.c > +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c > @@ -1324,7 +1324,7 @@ static void *si_create_shader_selector(struct > pipe_context *ctx, > pipe_mutex_init(sel->mutex); > util_queue_fence_init(>ready); > > - if (sctx->b.debug.debug_message || > + if (r600_can_dump_shader(>b, sel->info.processor) || > !util_queue_is_initialized(>shader_compiler_queue)) > si_init_shader_selector_async(sel, -1); > else > -- > 2.7.4 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] nir: Add optimization for (a || True == True)
This was appearing in vc4 VS/CS in mupen64, due to vertex attrib lowering producing some constants that were getting compared. total instructions in shared programs: 112276 -> 112198 (-0.07%) instructions in affected programs: 2239 -> 2161 (-3.48%) total estimated cycles in shared programs: 283102 -> 283038 (-0.02%) estimated cycles in affected programs: 2365 -> 2301 (-2.71%) --- src/compiler/nir/nir_opt_algebraic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index fd228017c54e..7d04ef941b73 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -197,6 +197,7 @@ optimizations = [ (('iand', a, 0), 0), (('ior', a, a), a), (('ior', a, 0), a), + (('ior', a, True), True), (('fxor', a, a), 0.0), (('ixor', a, a), 0), (('ixor', a, 0), a), -- 2.8.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/2] nir: Optimize away IF statements with no body on either side.
Due to the rampant dead code elimination in coordinate shaders for vc4, we often end up with IFs that do nothing on either side. In the loops-enabled build, shader-db gives: total instructions in shared programs: 125192 -> 119693 (-4.39%) instructions in affected programs: 30649 -> 25150 (-17.94%) total uniforms in shared programs: 38436 -> 37632 (-2.09%) uniforms in affected programs: 6168 -> 5364 (-13.04%) --- src/compiler/nir/nir_opt_dead_cf.c | 41 ++ 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/src/compiler/nir/nir_opt_dead_cf.c b/src/compiler/nir/nir_opt_dead_cf.c index 81c1b650da96..eb98dc9507fb 100644 --- a/src/compiler/nir/nir_opt_dead_cf.c +++ b/src/compiler/nir/nir_opt_dead_cf.c @@ -60,6 +60,12 @@ * } * ... * + * We also delete IF statements with no instructions in either body: + * + * if (...) { + * } else { + * } + * * Finally, we also handle removing useless loops, i.e. loops with no side * effects and without any definitions that are used elsewhere. This case is a * little different from the first two in that the code is actually run (it @@ -134,6 +140,32 @@ opt_constant_if(nir_if *if_stmt, bool condition) nir_cf_node_remove(_stmt->cf_node); } +/* If the nir_if has no instructions on either side, then we can delete the + * IF, and therefore also its use of the condition variable. + */ +static bool +opt_empty_if(nir_if *if_stmt) +{ + nir_cf_node *then_node = nir_if_first_then_node(if_stmt); + nir_cf_node *else_node = nir_if_first_else_node(if_stmt); + + /* We can only have one block in each side, with no instructions in them */ + if (nir_if_last_then_node(if_stmt) != then_node) + return false; + if (nir_if_last_else_node(if_stmt) != else_node) + return false; + nir_block *then_block = nir_cf_node_as_block(then_node); + nir_block *else_block = nir_cf_node_as_block(else_node); + if (!exec_list_is_empty(_block->instr_list)) + return false; + if (!exec_list_is_empty(_block->instr_list)) + return false; + + nir_cf_node_remove(_stmt->cf_node); + + return true; +} + static bool cf_node_has_side_effects(nir_cf_node *node) { @@ -224,11 +256,12 @@ dead_cf_block(nir_block *block) nir_const_value *const_value = nir_src_as_const_value(following_if->condition); - if (!const_value) - return false; + if (const_value) { + opt_constant_if(following_if, const_value->u32[0] != 0); + return true; + } - opt_constant_if(following_if, const_value->u32[0] != 0); - return true; + return opt_empty_if(following_if); } nir_loop *following_loop = nir_block_get_following_loop(block); -- 2.8.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] egl: Fix the bad surface attributes combination checking for pbuffers. (v2)
On Wed 06 Jul 2016, Chad Versace wrote: > On Mon 20 Jun 2016, Guillaume Charifi wrote: > > Fixes a regression induced by commit > > a0674ce5c41903ccd161e89abb149621bfbc40d2: > > When EGL_TEXTURE_FORMAT and EGL_TEXTURE_TARGET were both specified (and > > both != EGL_NO_TEXTURE), an error was instantly triggered, before the > > other one had even a chance to be checked, which is obviously not the > > intended behaviour. > > > > v2: Full commit hash, remove useless variables. > > > > Signed-off-by: Guillaume Charifi> > Reviewed-by: Frank Binns > > --- > > src/egl/main/eglsurface.c | 36 +++- > > 1 file changed, 15 insertions(+), 21 deletions(-) > > This patch looks good to me. And I checked that it didn't regress > dEQP-EGL.functional.negative_api.create_pbuffer_surface. > > But I want to run the patch through Intel's Jenkins before giving my > reviewed-and-tested-by. The Jenkins run should finish before morning > (I'm US West Coast). I verified that your patch regresses no tests in 'dEQP-EGL.*', at least on Intel. So, it's looking good. I'm still waiting for the full Jenkins run to complete, though. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] egl: Fix the bad surface attributes combination checking for pbuffers. (v2)
On Mon 20 Jun 2016, Guillaume Charifi wrote: > Fixes a regression induced by commit a0674ce5c41903ccd161e89abb149621bfbc40d2: > When EGL_TEXTURE_FORMAT and EGL_TEXTURE_TARGET were both specified (and > both != EGL_NO_TEXTURE), an error was instantly triggered, before the > other one had even a chance to be checked, which is obviously not the > intended behaviour. > > v2: Full commit hash, remove useless variables. > > Signed-off-by: Guillaume Charifi> Reviewed-by: Frank Binns > --- > src/egl/main/eglsurface.c | 36 +++- > 1 file changed, 15 insertions(+), 21 deletions(-) This patch looks good to me. And I checked that it didn't regress dEQP-EGL.functional.negative_api.create_pbuffer_surface. But I want to run the patch through Intel's Jenkins before giving my reviewed-and-tested-by. The Jenkins run should finish before morning (I'm US West Coast). ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/5] swr: [rasterizer] add support for llvm-3.9
Am 06.07.2016 um 23:51 schrieb Tim Rowley: > --- > .../drivers/swr/rasterizer/jitter/builder_misc.cpp | 38 > -- > .../jitter/scripts/gen_llvm_ir_macros.py | 5 --- > 2 files changed, 28 insertions(+), 15 deletions(-) > > diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp > b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp > index 671178f..b23a10d 100644 > --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp > +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp > @@ -700,20 +700,22 @@ Value *Builder::PSHUFB(Value* a, Value* b) > /// lower 8 values are used. > Value *Builder::PMOVSXBD(Value* a) > { > -Value* res; > +// llvm-3.9 removed the pmovsxbd intrinsic > +#if HAVE_LLVM < 0x309 > // use avx2 byte sign extend instruction if available > if(JM()->mArch.AVX2()) > { > -res = VPMOVSXBD(a); > +Function *pmovsxbd = > Intrinsic::getDeclaration(JM()->mpCurrentModule, > Intrinsic::x86_avx2_pmovsxbd); > +return CALL(pmovsxbd, std::initializer_list{a}); > } > else > +#endif > { > // VPMOVSXBD output type > Type* v8x32Ty = VectorType::get(mInt32Ty, 8); > // Extract 8 values from 128bit lane and sign extend > -res = S_EXT(VSHUFFLE(a, a, C({0, 1, 2, 3, 4, 5, 6, 7})), > v8x32Ty); > +return S_EXT(VSHUFFLE(a, a, C({0, 1, 2, 3, 4, 5, 6, 7})), > v8x32Ty); > } > -return res; > } > > // > @@ -722,20 +724,22 @@ Value *Builder::PMOVSXBD(Value* a) > /// @param a - 128bit SIMD lane(8x16bit) of 16bit integer values. > Value *Builder::PMOVSXWD(Value* a) > { > -Value* res; > +// llvm-3.9 removed the pmovsxwd intrinsic > +#if HAVE_LLVM < 0x309 > // use avx2 word sign extend if available > if(JM()->mArch.AVX2()) > { > -res = VPMOVSXWD(a); > +Function *pmovsxwd = > Intrinsic::getDeclaration(JM()->mpCurrentModule, > Intrinsic::x86_avx2_pmovsxwd); > +return CALL(pmovsxwd, std::initializer_list {a}); > } > else > +#endif > { > // VPMOVSXWD output type > Type* v8x32Ty = VectorType::get(mInt32Ty, 8); > // Extract 8 values from 128bit lane and sign extend > -res = S_EXT(VSHUFFLE(a, a, C({0, 1, 2, 3, 4, 5, 6, 7})), > v8x32Ty); > +return S_EXT(VSHUFFLE(a, a, C({0, 1, 2, 3, 4, 5, 6, 7})), > v8x32Ty); > } > -return res; > } > > // > @@ -875,9 +879,15 @@ Value *Builder::CVTPS2PH(Value* a, Value* rounding) > > Value *Builder::PMAXSD(Value* a, Value* b) > { > +// llvm-3.9 removed the pmax intrinsics > +#if HAVE_LLVM >= 0x309 > +Value* cmp = ICMP_UGT(a, b); > +return SELECT(VMASK(cmp), a, b); > +#else > if (JM()->mArch.AVX2()) > { > -return VPMAXSD(a, b); > +Function* pmaxsd = Intrinsic::getDeclaration(JM()->mpCurrentModule, > Intrinsic::x86_avx2_pmaxs_d); > +return CALL(pmaxsd, {a, b}); > } > else > { > @@ -900,13 +910,20 @@ Value *Builder::PMAXSD(Value* a, Value* b) > > return result; > } > +#endif > } > > Value *Builder::PMINSD(Value* a, Value* b) > { > +// llvm-3.9 removed the pmin intrinsics > +#if HAVE_LLVM >= 0x309 > +Value* cmp = ICMP_ULT(a, b); > +return SELECT(VMASK(cmp), a, b); > +#else Yep, had to deal with that in gallivm as well... That said, these were signed min/max here. I think you wanted to use ICMP_SLT/ICMP_SGT... Roland > if (JM()->mArch.AVX2()) > { > -return VPMINSD(a, b); > +Function* pminsd = Intrinsic::getDeclaration(JM()->mpCurrentModule, > Intrinsic::x86_avx2_pmins_d); > +return CALL(pminsd, {a, b}); > } > else > { > @@ -929,6 +946,7 @@ Value *Builder::PMINSD(Value* a, Value* b) > > return result; > } > +#endif > } > > void Builder::Gather4(const SWR_FORMAT format, Value* pSrcBase, Value* > byteOffsets, > diff --git > a/src/gallium/drivers/swr/rasterizer/jitter/scripts/gen_llvm_ir_macros.py > b/src/gallium/drivers/swr/rasterizer/jitter/scripts/gen_llvm_ir_macros.py > index 4963c5e..234889b 100644 > --- a/src/gallium/drivers/swr/rasterizer/jitter/scripts/gen_llvm_ir_macros.py > +++ b/src/gallium/drivers/swr/rasterizer/jitter/scripts/gen_llvm_ir_macros.py > @@ -91,8 +91,6 @@ intrinsics = [ > ["VRCPPS", "x86_avx_rcp_ps_256", ["a"]], > ["VMINPS", "x86_avx_min_ps_256", ["a", "b"]], > ["VMAXPS", "x86_avx_max_ps_256", ["a", "b"]], > -["VPMINSD", "x86_avx2_pmins_d", ["a", "b"]], > -["VPMAXSD", "x86_avx2_pmaxs_d", ["a", "b"]], > ["VROUND", "x86_avx_round_ps_256", ["a", "rounding"]], > ["VCMPPS", "x86_avx_cmp_ps_256", ["a", "b", "cmpop"]], > ["VBLENDVPS", "x86_avx_blendv_ps_256", ["a", "b",
[Mesa-dev] [Bug 96765] BindFragDataLocationIndexed on array fragment shader output.
https://bugs.freedesktop.org/show_bug.cgi?id=96765 --- Comment #8 from Ilia Mirkin--- https://patchwork.freedesktop.org/patch/97464/ This patch should also fix it for the [0] names. I chose to implement it as a fallback thing, so if you have foo and foo[0] set, foo will always get picked, no matter which was set first. If I stripped the [0]'s at bind time, one might run into the issue of a out vec4 foo; shader (which, as the spec says, can be added after the binding is done on the program), and someone doing a bind on both foo and foo[0], and it'd be awkward for the foo[0] to win out in that case. -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] mesa: print number of samples in renderbuffer_storage error msg
--- src/mesa/main/fbobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index bf47c1c..2c01526 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -2128,7 +2128,7 @@ renderbuffer_storage(struct gl_context *ctx, struct gl_renderbuffer *rb, } if (sample_count_error != GL_NO_ERROR) { - _mesa_error(ctx, sample_count_error, "%s(samples)", func); + _mesa_error(ctx, sample_count_error, "%s(samples=%d)", func, samples); return; } } -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] glsl: look for frag data bindings with [0] tacked onto the end for arrays
The GL spec is very unclear on this point. Apparently this is discussed without resolution in the closed Khronos bugtracker at https://cvs.khronos.org/bugzilla/show_bug.cgi?id=7829 . The recommendation is to allow dropping the [0] for looking up the bindings. The approach taken in this patch is to instead tack on [0]'s for each arrayness level of the output's type, and doing the lookup again. That way, for out vec4 foo[2][2][2] we will end up looking for bindings for foo, foo[0], foo[0][0], and foo[0][0][0], in that order of preference. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96765 Signed-off-by: Ilia Mirkin--- src/compiler/glsl/linker.cpp | 39 --- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp index d963f54..9d54c2f 100644 --- a/src/compiler/glsl/linker.cpp +++ b/src/compiler/glsl/linker.cpp @@ -2566,6 +2566,7 @@ find_available_slots(unsigned used_mask, unsigned needed_count) /** * Assign locations for either VS inputs or FS outputs * + * \param mem_ctx Temporary ralloc context used for linking * \param prog Shader program whose variables need locations assigned * \param constants Driver specific constant values for the program. * \param target_index Selector for the program target to receive location @@ -2577,7 +2578,8 @@ find_available_slots(unsigned used_mask, unsigned needed_count) * error is emitted to the shader link log and false is returned. */ bool -assign_attribute_or_color_locations(gl_shader_program *prog, +assign_attribute_or_color_locations(void *mem_ctx, +gl_shader_program *prog, struct gl_constants *constants, unsigned target_index) { @@ -2680,16 +2682,31 @@ assign_attribute_or_color_locations(gl_shader_program *prog, } else if (target_index == MESA_SHADER_FRAGMENT) { unsigned binding; unsigned index; + const char *name = var->name; + const glsl_type *type = var->type; + + while (type) { +/* Check if there's a binding for the variable name */ +if (prog->FragDataBindings->get(binding, name)) { + assert(binding >= FRAG_RESULT_DATA0); + var->data.location = binding; + var->data.is_unmatched_generic_inout = 0; + + if (prog->FragDataIndexBindings->get(index, name)) { + var->data.index = index; + } + break; +} -if (prog->FragDataBindings->get(binding, var->name)) { - assert(binding >= FRAG_RESULT_DATA0); - var->data.location = binding; -var->data.is_unmatched_generic_inout = 0; +/* If not, but it's an array type, look for name[0] */ +if (type->is_array()) { + name = ralloc_asprintf(mem_ctx, "%s[0]", name); + type = type->fields.array; + continue; +} - if (prog->FragDataIndexBindings->get(index, var->name)) { - var->data.index = index; - } -} +break; + } } /* From GL4.5 core spec, section 15.2 (Shader Execution): @@ -4816,12 +4833,12 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) prev = i; } - if (!assign_attribute_or_color_locations(prog, >Const, + if (!assign_attribute_or_color_locations(mem_ctx, prog, >Const, MESA_SHADER_VERTEX)) { goto done; } - if (!assign_attribute_or_color_locations(prog, >Const, + if (!assign_attribute_or_color_locations(mem_ctx, prog, >Const, MESA_SHADER_FRAGMENT)) { goto done; } -- 2.7.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 89599] symbol 'x86_64_entry_start' is already defined when building with LLVM/clang
https://bugs.freedesktop.org/show_bug.cgi?id=89599 austinengl...@gmail.comchanged: What|Removed |Added CC||austinengl...@gmail.com -- You are receiving this mail because: You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] radeonsi: don't interleave R600_DEBUG-enabled shader dumps
From: Nicolai HähnleOnly setting R600_DEBUG doesn't set any debug callback. Conversely, the debug callback is only called when R600_DEBUG is set. --- src/gallium/drivers/radeonsi/si_state_shaders.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index abbe451..059ff70 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1324,7 +1324,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx, pipe_mutex_init(sel->mutex); util_queue_fence_init(>ready); - if (sctx->b.debug.debug_message || + if (r600_can_dump_shader(>b, sel->info.processor) || !util_queue_is_initialized(>shader_compiler_queue)) si_init_shader_selector_async(sel, -1); else -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/5] swr: [rasterizer core] make all api functions call GetContext
Small api cleanup. Make all api functions call GetContext instead of locally casting handle. Makes debugging easier by providing a single point to track context changes. --- src/gallium/drivers/swr/rasterizer/core/api.cpp | 28 - 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp index edde918..6f9c402 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp @@ -48,6 +48,11 @@ void SetupDefaultState(SWR_CONTEXT *pContext); +static INLINE SWR_CONTEXT* GetContext(HANDLE hContext) +{ +return (SWR_CONTEXT*)hContext; +} + // /// @brief Create SWR Context. /// @param pCreateInfo - pointer to creation info. @@ -140,7 +145,7 @@ HANDLE SwrCreateContext( void SwrDestroyContext(HANDLE hContext) { -SWR_CONTEXT *pContext = (SWR_CONTEXT*)hContext; +SWR_CONTEXT *pContext = GetContext(hContext); DestroyThreadPool(pContext, >threadPool); // free the fifos @@ -168,7 +173,7 @@ void SwrDestroyContext(HANDLE hContext) delete(pContext->pHotTileMgr); pContext->~SWR_CONTEXT(); -AlignedFree((SWR_CONTEXT*)hContext); +AlignedFree(GetContext(hContext)); } void CopyState(DRAW_STATE& dst, const DRAW_STATE& src) @@ -357,7 +362,7 @@ void SWR_API SwrSaveState( void* pOutputStateBlock, size_t memSize) { -SWR_CONTEXT *pContext = (SWR_CONTEXT*)hContext; +SWR_CONTEXT *pContext = GetContext(hContext); auto pSrc = GetDrawState(pContext); SWR_ASSERT(pOutputStateBlock && memSize >= sizeof(*pSrc)); @@ -369,7 +374,7 @@ void SWR_API SwrRestoreState( const void* pStateBlock, size_t memSize) { -SWR_CONTEXT *pContext = (SWR_CONTEXT*)hContext; +SWR_CONTEXT *pContext = GetContext(hContext); auto pDst = GetDrawState(pContext); SWR_ASSERT(pStateBlock && memSize >= sizeof(*pDst)); @@ -384,11 +389,6 @@ void SetupDefaultState(SWR_CONTEXT *pContext) pState->rastState.frontWinding = SWR_FRONTWINDING_CCW; } -static INLINE SWR_CONTEXT* GetContext(HANDLE hContext) -{ -return (SWR_CONTEXT*)hContext; -} - void SwrSync(HANDLE hContext, PFN_CALLBACK_FUNC pfnFunc, uint64_t userData, uint64_t userData2, uint64_t userData3) { RDTSC_START(APISync); @@ -1286,7 +1286,7 @@ void SwrInvalidateTiles( return; } -SWR_CONTEXT *pContext = (SWR_CONTEXT*)hContext; +SWR_CONTEXT *pContext = GetContext(hContext); DRAW_CONTEXT* pDC = GetDrawContext(pContext); pDC->FeWork.type = DISCARDINVALIDATETILES; @@ -1316,7 +1316,7 @@ void SwrDiscardRect( return; } -SWR_CONTEXT *pContext = (SWR_CONTEXT*)hContext; +SWR_CONTEXT *pContext = GetContext(hContext); DRAW_CONTEXT* pDC = GetDrawContext(pContext); // Queue a load to the hottile @@ -1350,7 +1350,7 @@ void SwrDispatch( } RDTSC_START(APIDispatch); -SWR_CONTEXT *pContext = (SWR_CONTEXT*)hContext; +SWR_CONTEXT *pContext = GetContext(hContext); DRAW_CONTEXT* pDC = GetDrawContext(pContext); pDC->isCompute = true; // This is a compute context. @@ -1384,7 +1384,7 @@ void SwrStoreTiles( RDTSC_START(APIStoreTiles); -SWR_CONTEXT *pContext = (SWR_CONTEXT*)hContext; +SWR_CONTEXT *pContext = GetContext(hContext); DRAW_CONTEXT* pDC = GetDrawContext(pContext); SetupMacroTileScissors(pDC); @@ -1414,7 +1414,7 @@ void SwrClearRenderTarget( RDTSC_START(APIClearRenderTarget); -SWR_CONTEXT *pContext = (SWR_CONTEXT*)hContext; +SWR_CONTEXT *pContext = GetContext(hContext); DRAW_CONTEXT* pDC = GetDrawContext(pContext); -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/5] swr: [rasterizer] add support for llvm-3.9
--- .../drivers/swr/rasterizer/jitter/builder_misc.cpp | 38 -- .../jitter/scripts/gen_llvm_ir_macros.py | 5 --- 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp index 671178f..b23a10d 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp @@ -700,20 +700,22 @@ Value *Builder::PSHUFB(Value* a, Value* b) /// lower 8 values are used. Value *Builder::PMOVSXBD(Value* a) { -Value* res; +// llvm-3.9 removed the pmovsxbd intrinsic +#if HAVE_LLVM < 0x309 // use avx2 byte sign extend instruction if available if(JM()->mArch.AVX2()) { -res = VPMOVSXBD(a); +Function *pmovsxbd = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::x86_avx2_pmovsxbd); +return CALL(pmovsxbd, std::initializer_list{a}); } else +#endif { // VPMOVSXBD output type Type* v8x32Ty = VectorType::get(mInt32Ty, 8); // Extract 8 values from 128bit lane and sign extend -res = S_EXT(VSHUFFLE(a, a, C({0, 1, 2, 3, 4, 5, 6, 7})), v8x32Ty); +return S_EXT(VSHUFFLE(a, a, C({0, 1, 2, 3, 4, 5, 6, 7})), v8x32Ty); } -return res; } // @@ -722,20 +724,22 @@ Value *Builder::PMOVSXBD(Value* a) /// @param a - 128bit SIMD lane(8x16bit) of 16bit integer values. Value *Builder::PMOVSXWD(Value* a) { -Value* res; +// llvm-3.9 removed the pmovsxwd intrinsic +#if HAVE_LLVM < 0x309 // use avx2 word sign extend if available if(JM()->mArch.AVX2()) { -res = VPMOVSXWD(a); +Function *pmovsxwd = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::x86_avx2_pmovsxwd); +return CALL(pmovsxwd, std::initializer_list {a}); } else +#endif { // VPMOVSXWD output type Type* v8x32Ty = VectorType::get(mInt32Ty, 8); // Extract 8 values from 128bit lane and sign extend -res = S_EXT(VSHUFFLE(a, a, C({0, 1, 2, 3, 4, 5, 6, 7})), v8x32Ty); +return S_EXT(VSHUFFLE(a, a, C({0, 1, 2, 3, 4, 5, 6, 7})), v8x32Ty); } -return res; } // @@ -875,9 +879,15 @@ Value *Builder::CVTPS2PH(Value* a, Value* rounding) Value *Builder::PMAXSD(Value* a, Value* b) { +// llvm-3.9 removed the pmax intrinsics +#if HAVE_LLVM >= 0x309 +Value* cmp = ICMP_UGT(a, b); +return SELECT(VMASK(cmp), a, b); +#else if (JM()->mArch.AVX2()) { -return VPMAXSD(a, b); +Function* pmaxsd = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::x86_avx2_pmaxs_d); +return CALL(pmaxsd, {a, b}); } else { @@ -900,13 +910,20 @@ Value *Builder::PMAXSD(Value* a, Value* b) return result; } +#endif } Value *Builder::PMINSD(Value* a, Value* b) { +// llvm-3.9 removed the pmin intrinsics +#if HAVE_LLVM >= 0x309 +Value* cmp = ICMP_ULT(a, b); +return SELECT(VMASK(cmp), a, b); +#else if (JM()->mArch.AVX2()) { -return VPMINSD(a, b); +Function* pminsd = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::x86_avx2_pmins_d); +return CALL(pminsd, {a, b}); } else { @@ -929,6 +946,7 @@ Value *Builder::PMINSD(Value* a, Value* b) return result; } +#endif } void Builder::Gather4(const SWR_FORMAT format, Value* pSrcBase, Value* byteOffsets, diff --git a/src/gallium/drivers/swr/rasterizer/jitter/scripts/gen_llvm_ir_macros.py b/src/gallium/drivers/swr/rasterizer/jitter/scripts/gen_llvm_ir_macros.py index 4963c5e..234889b 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/scripts/gen_llvm_ir_macros.py +++ b/src/gallium/drivers/swr/rasterizer/jitter/scripts/gen_llvm_ir_macros.py @@ -91,8 +91,6 @@ intrinsics = [ ["VRCPPS", "x86_avx_rcp_ps_256", ["a"]], ["VMINPS", "x86_avx_min_ps_256", ["a", "b"]], ["VMAXPS", "x86_avx_max_ps_256", ["a", "b"]], -["VPMINSD", "x86_avx2_pmins_d", ["a", "b"]], -["VPMAXSD", "x86_avx2_pmaxs_d", ["a", "b"]], ["VROUND", "x86_avx_round_ps_256", ["a", "rounding"]], ["VCMPPS", "x86_avx_cmp_ps_256", ["a", "b", "cmpop"]], ["VBLENDVPS", "x86_avx_blendv_ps_256", ["a", "b", "mask"]], @@ -100,8 +98,6 @@ intrinsics = [ ["VMASKLOADD", "x86_avx2_maskload_d_256", ["src", "mask"]], ["VMASKMOVPS", "x86_avx_maskload_ps_256", ["src", "mask"]], ["VPSHUFB", "x86_avx2_pshuf_b", ["a", "b"]], -["VPMOVSXBD", "x86_avx2_pmovsxbd", ["a"]], # sign extend packed 8bit components -["VPMOVSXWD", "x86_avx2_pmovsxwd", ["a"]], # sign extend packed 16bit components ["VPERMD", "x86_avx2_permd", ["a", "idx"]],
[Mesa-dev] [PATCH 5/5] swr: [rasterizer core] correct MSAA behavior for conservative rasterization
--- .../drivers/swr/rasterizer/core/multisample.h | 4 +++ .../drivers/swr/rasterizer/core/rasterizer.cpp | 35 +++--- .../drivers/swr/rasterizer/core/rasterizer.h | 3 ++ 3 files changed, 31 insertions(+), 11 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/multisample.h b/src/gallium/drivers/swr/rasterizer/core/multisample.h index a52e8ac..bba0d9a 100644 --- a/src/gallium/drivers/swr/rasterizer/core/multisample.h +++ b/src/gallium/drivers/swr/rasterizer/core/multisample.h @@ -29,6 +29,10 @@ #include "context.h" #include "format_traits.h" +// +/// @brief convenience typedef for testing for single sample case +typedef std::integral_constantSingleSampleT; + INLINE uint32_t GetNumSamples(SWR_MULTISAMPLE_COUNT sampleCount) { diff --git a/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp b/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp index c9b0285..1e7da2b 100644 --- a/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp @@ -516,7 +516,7 @@ void ComputeEdgeData(const POS& p0, const POS& p1, EDGE& edge) /// the UpdateEdgeMasks function. Offset evaluated edges from UL pixel /// corner to sample position, and test for coverage /// @tparam sampleCount: multisample count -template +template INLINE void UpdateEdgeMasks(const __m256d ()[3], const __m256d ()[7], int32_t , int32_t , int32_t ) { @@ -531,11 +531,11 @@ INLINE void UpdateEdgeMasks(const __m256d ()[3], const __m256d ( } // -/// @brief UpdateEdgeMasks partial specialization, -/// instantiated when MSAA is disabled. +/// @brief UpdateEdgeMasks specialization, instantiated +/// when only rasterizing a single coverage test point template <> -INLINE void UpdateEdgeMasks(const __m256d(&)[3], const __m256d ()[7], -int32_t , int32_t , int32_t ) +INLINE void UpdateEdgeMasks(const __m256d(&)[3], const __m256d ()[7], + int32_t , int32_t , int32_t ) { mask0 = _mm256_movemask_pd(vEdgeFix16[0]); mask1 = _mm256_movemask_pd(vEdgeFix16[1]); @@ -812,7 +812,12 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, int32_t x = AlignDown(intersect.left, (FIXED_POINT_SCALE * KNOB_TILE_X_DIM)); int32_t y = AlignDown(intersect.top, (FIXED_POINT_SCALE * KNOB_TILE_Y_DIM)); -if(RT::MT::sampleCount == SWR_MULTISAMPLE_1X) +// convenience typedef +typedef typename RT::NumRasterSamplesT NumRasterSamplesT; + +// single sample rasterization evaluates edges at pixel center, +// multisample evaluates edges UL pixel corner and steps to each sample position +if(std::is_same ::value) { // Add 0.5, in fixed point, to offset to pixel center x += (FIXED_POINT_SCALE / 2); @@ -887,7 +892,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, // | | // min(xSamples),max(ySamples) -- max(xSamples),max(ySamples) __m256d vEdgeTileBbox[3]; -if (RT::MT::sampleCount > SWR_MULTISAMPLE_1X) +if (NumRasterSamplesT::value > 1) { __m128i vTileSampleBBoxXh = RT::MT::TileSampleOffsetsX(); __m128i vTileSampleBBoxYh = RT::MT::TileSampleOffsetsY(); @@ -931,9 +936,9 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, // is the corner of the edge outside of the raster tile? (vEdge < 0) int mask0, mask1, mask2; -UpdateEdgeMasks(vEdgeTileBbox, vEdgeFix16, mask0, mask1, mask2); +UpdateEdgeMasks(vEdgeTileBbox, vEdgeFix16, mask0, mask1, mask2); -for (uint32_t sampleNum = 0; sampleNum < RT::MT::numSamples; sampleNum++) +for (uint32_t sampleNum = 0; sampleNum < NumRasterSamplesT::value; sampleNum++) { // trivial reject, at least one edge has all 4 corners of raster tile outside bool trivialReject = (!(mask0 && mask1 && mask2)) ? true : false; @@ -952,7 +957,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, else { __m256d vEdgeAtSample[RT::NumEdgesT::value]; -if(RT::MT::sampleCount == SWR_MULTISAMPLE_1X) +if(std::is_same ::value) { // should get optimized out for single sample case (global value numbering or copy propagation) for (uint32_t e = 0; e < RT::NumEdgesT::value; ++e) @@ -995,7 +1000,7 @@
[Mesa-dev] [PATCH 4/5] swr: [rasterizer core] conservative rast backend changes
--- src/gallium/drivers/swr/rasterizer/core/api.cpp| 6 +- .../drivers/swr/rasterizer/core/backend.cpp| 22 +- .../drivers/swr/rasterizer/core/conservativeRast.h | 111 - .../drivers/swr/rasterizer/core/frontend.cpp | 28 +- src/gallium/drivers/swr/rasterizer/core/frontend.h | 8 - .../drivers/swr/rasterizer/core/rasterizer.cpp | 511 ++--- .../drivers/swr/rasterizer/core/rasterizer.h | 71 ++- src/gallium/drivers/swr/rasterizer/core/state.h| 2 +- 8 files changed, 538 insertions(+), 221 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp index 6f9c402..6460a16 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp @@ -760,8 +760,8 @@ void SetupMacroTileScissors(DRAW_CONTEXT *pDC) // templated backend function tables extern PFN_BACKEND_FUNC gBackendNullPs[SWR_MULTISAMPLE_TYPE_MAX]; extern PFN_BACKEND_FUNC gBackendSingleSample[2][2][2]; -extern PFN_BACKEND_FUNC gBackendPixelRateTable[SWR_MULTISAMPLE_TYPE_MAX][SWR_MSAA_SAMPLE_PATTERN_MAX][SWR_INPUT_COVERAGE_MAX][2][2][2]; -extern PFN_BACKEND_FUNC gBackendSampleRateTable[SWR_MULTISAMPLE_TYPE_MAX][SWR_INPUT_COVERAGE_MAX][2][2]; +extern PFN_BACKEND_FUNC gBackendPixelRateTable[SWR_MULTISAMPLE_TYPE_MAX][SWR_MSAA_SAMPLE_PATTERN_MAX][2][2][2][2]; +extern PFN_BACKEND_FUNC gBackendSampleRateTable[SWR_MULTISAMPLE_TYPE_MAX][2][2][2]; void SetupPipeline(DRAW_CONTEXT *pDC) { DRAW_STATE* pState = pDC->pState; @@ -780,7 +780,7 @@ void SetupPipeline(DRAW_CONTEXT *pDC) const bool bMultisampleEnable = ((rastState.sampleCount > SWR_MULTISAMPLE_1X) || rastState.forcedSampleCount) ? 1 : 0; const uint32_t centroid = ((psState.barycentricsMask & SWR_BARYCENTRIC_CENTROID_MASK) > 0) ? 1 : 0; const uint32_t canEarlyZ = (psState.forceEarlyZ || (!psState.writesODepth && !psState.usesSourceDepth && !psState.usesUAV)) ? 1 : 0; -const uint32_t inputCoverage = (psState.inputCoverage != SWR_INPUT_COVERAGE_NONE); +const uint32_t inputCoverage = (psState.inputCoverage != SWR_INPUT_COVERAGE_NONE) ? 1 : 0; SWR_BARYCENTRICS_MASK barycentricsMask = (SWR_BARYCENTRICS_MASK)psState.barycentricsMask; diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp b/src/gallium/drivers/swr/rasterizer/core/backend.cpp index 8e1fa78..b492810 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp @@ -1154,12 +1154,13 @@ PFN_BACKEND_FUNC gBackendSingleSample[2] // input coverage = {}; PFN_BACKEND_FUNC gBackendPixelRateTable[SWR_MULTISAMPLE_TYPE_MAX] [SWR_MSAA_SAMPLE_PATTERN_MAX] - [SWR_INPUT_COVERAGE_MAX] + [2] // input coverage [2] // centroid [2] // forcedSampleCount [2] // canEarlyZ = {}; -PFN_BACKEND_FUNC gBackendSampleRateTable[SWR_MULTISAMPLE_TYPE_MAX][SWR_INPUT_COVERAGE_MAX] +PFN_BACKEND_FUNC gBackendSampleRateTable[SWR_MULTISAMPLE_TYPE_MAX] +[2] // input coverage [2] // centroid [2] // canEarlyZ = {}; @@ -1232,28 +1233,27 @@ struct BEChooser void InitBackendSingleFuncTable(PFN_BACKEND_FUNC ()[2][2][2]) { -for(uint32_t inputCoverage = SWR_INPUT_COVERAGE_NONE; inputCoverage < SWR_INPUT_COVERAGE_MAX; inputCoverage++) +for(uint32_t inputCoverage = 0; inputCoverage < 2; inputCoverage++) { for(uint32_t isCentroid = 0; isCentroid < 2; isCentroid++) { for(uint32_t canEarlyZ = 0; canEarlyZ < 2; canEarlyZ++) { table[inputCoverage][isCentroid][canEarlyZ] = -BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, (inputCoverage == SWR_INPUT_COVERAGE_NORMAL), +BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, (inputCoverage > 0), (isCentroid > 0), false, (canEarlyZ > 0), SWR_BACKEND_SINGLE_SAMPLE); } } } } -void InitBackendPixelFuncTable(PFN_BACKEND_FUNC ()[SWR_MULTISAMPLE_TYPE_MAX][SWR_MSAA_SAMPLE_PATTERN_MAX][SWR_INPUT_COVERAGE_MAX] -[2][2][2]) +void InitBackendPixelFuncTable(PFN_BACKEND_FUNC ()[SWR_MULTISAMPLE_TYPE_MAX][SWR_MSAA_SAMPLE_PATTERN_MAX][2][2][2][2]) { for(uint32_t sampleCount = SWR_MULTISAMPLE_1X; sampleCount < SWR_MULTISAMPLE_TYPE_MAX; sampleCount++) { for(uint32_t
[Mesa-dev] [PATCH 0/5] update swr rasterizer
Highlights include llvm-3.9 support, conservative rasterization work, and small cleanups. Tim Rowley (5): swr: [rasterizer] add support for llvm-3.9 swr: [rasterizer core] make all api functions call GetContext swr: [rasterizer] buckets cleanup swr: [rasterizer core] conservative rast backend changes swr: [rasterizer core] correct MSAA behavior for conservative rasterization .../swr/rasterizer/common/rdtsc_buckets.cpp| 16 +- .../drivers/swr/rasterizer/common/rdtsc_buckets.h | 33 +- .../swr/rasterizer/common/rdtsc_buckets_shared.h | 2 + src/gallium/drivers/swr/rasterizer/core/api.cpp| 34 +- .../drivers/swr/rasterizer/core/backend.cpp| 22 +- .../drivers/swr/rasterizer/core/conservativeRast.h | 111 - .../drivers/swr/rasterizer/core/frontend.cpp | 28 +- src/gallium/drivers/swr/rasterizer/core/frontend.h | 8 - .../drivers/swr/rasterizer/core/multisample.h | 4 + .../drivers/swr/rasterizer/core/rasterizer.cpp | 524 ++--- .../drivers/swr/rasterizer/core/rasterizer.h | 74 ++- .../drivers/swr/rasterizer/core/rdtsc_core.h | 4 +- src/gallium/drivers/swr/rasterizer/core/state.h| 2 +- .../drivers/swr/rasterizer/jitter/builder_misc.cpp | 38 +- .../jitter/scripts/gen_llvm_ir_macros.py | 5 - 15 files changed, 643 insertions(+), 262 deletions(-) -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/5] swr: [rasterizer] buckets cleanup
--- .../swr/rasterizer/common/rdtsc_buckets.cpp| 16 ++- .../drivers/swr/rasterizer/common/rdtsc_buckets.h | 33 -- .../swr/rasterizer/common/rdtsc_buckets_shared.h | 2 ++ .../drivers/swr/rasterizer/core/rdtsc_core.h | 4 +-- 4 files changed, 43 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.cpp b/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.cpp index 412182f..288b071 100644 --- a/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.cpp +++ b/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.cpp @@ -40,6 +40,10 @@ THREAD UINT tlsThreadId = 0; +BucketManager::~BucketManager() +{ +} + void BucketManager::RegisterThread(const std::string& name) { // lazy evaluate threadviz knob @@ -51,7 +55,7 @@ void BucketManager::RegisterThread(const std::string& name) mThreadVizDir = str.str(); CreateDirectory(mThreadVizDir.c_str(), NULL); -mThreadViz = true; +mThreadViz = KNOB_BUCKETS_ENABLE_THREADVIZ; } BUCKET_THREAD newThread; @@ -207,12 +211,22 @@ void BucketManager::PrintReport(const std::string& filename) PrintThread(f, thread); fprintf(f, "\n"); } + mThreadMutex.unlock(); fclose(f); } } + +void BucketManager::StartCapture() +{ + +printf("Capture Starting\n"); + +mCapturing = true; +} + void BucketManager_StartBucket(BucketManager* pBucketMgr, uint32_t id) { pBucketMgr->StartBucket(id); diff --git a/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.h b/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.h index fe25e77..e50a8a5 100644 --- a/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.h +++ b/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.h @@ -36,6 +36,7 @@ #include "rdtsc_buckets_shared.h" + // unique thread id stored in thread local storage extern THREAD UINT tlsThreadId; @@ -48,6 +49,7 @@ class BucketManager { public: BucketManager() { } +~BucketManager(); // removes all registered thread data void ClearThreads() @@ -92,11 +94,9 @@ public: // print report void PrintReport(const std::string& filename); + // start capturing -INLINE void StartCapture() -{ -mCapturing = true; -} +void StartCapture(); // stop capturing INLINE void StopCapture() @@ -117,6 +117,9 @@ public: } } } + +mDoneCapturing = true; +printf("Capture Stopped\n"); } // start a bucket @@ -129,13 +132,15 @@ public: BUCKET_THREAD& bt = mThreads[tlsThreadId]; +uint64_t tsc = __rdtsc(); + // if threadviz is enabled, only need to dump start info to threads viz file if (mThreadViz) { SWR_ASSERT(bt.vizFile != nullptr); if (mBuckets[id].enableThreadViz) { -VIZ_START_DATA data{ VIZ_START, id, __rdtsc() }; +VIZ_START_DATA data{ VIZ_START, id, tsc }; Serialize(bt.vizFile, data); } } @@ -148,12 +153,13 @@ public: BUCKET = bt.pCurrent->children[id]; child.pParent = bt.pCurrent; child.id = id; -child.start = __rdtsc(); +child.start = tsc; // update thread's currently executing bucket bt.pCurrent = } + bt.level++; } @@ -163,14 +169,19 @@ public: SWR_ASSERT(tlsThreadId < mThreads.size()); BUCKET_THREAD = mThreads[tlsThreadId]; -if (bt.level == 0) return; +if (bt.level == 0) +{ +return; +} + +uint64_t tsc = __rdtsc(); if (mThreadViz) { SWR_ASSERT(bt.vizFile != nullptr); if (mBuckets[id].enableThreadViz) { -VIZ_STOP_DATA data{ VIZ_STOP, __rdtsc() }; +VIZ_STOP_DATA data{ VIZ_STOP, tsc }; Serialize(bt.vizFile, data); } } @@ -179,7 +190,7 @@ public: if (bt.pCurrent->start == 0) return; SWR_ASSERT(bt.pCurrent->id == id, "Mismatched buckets detected"); -bt.pCurrent->elapsed += (__rdtsc() - bt.pCurrent->start); +bt.pCurrent->elapsed += (tsc - bt.pCurrent->start); bt.pCurrent->count++; // pop to parent @@ -224,11 +235,15 @@ private: // is capturing currently enabled volatile bool mCapturing{ false }; +// has capturing completed +volatile bool mDoneCapturing{ false }; + std::mutex mThreadMutex; // enable threadviz bool mThreadViz{ false }; std::string mThreadVizDir; + }; diff --git a/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets_shared.h b/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets_shared.h index 34c322e..f6e75cd
[Mesa-dev] [PATCH] swr: [rasterizer jitter] fix llvm-3.7 compile
d3d97f8 broke llvm-3.7, which has a mismatched API for setDataLayout/getDataLayout. --- src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp | 5 + 1 file changed, 5 insertions(+) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp index 6e00a70..c6cbccf 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp @@ -241,7 +241,12 @@ bool JitManager::SetupModuleFromIR(const uint8_t *pIR) return false; } +#if HAVE_LLVM == 0x307 +// llvm-3.7 has mismatched setDataLyout/getDataLayout APIs +newModule->setDataLayout(*mpExec->getDataLayout()); +#else newModule->setDataLayout(mpExec->getDataLayout()); +#endif mpCurrentModule = newModule.get(); #if defined(_WIN32) -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/6] i965/fs: use the new helper function to create double immediates
On Wednesday, July 6, 2016 12:09:58 PM PDT Samuel Iglesias Gonsálvez wrote: > From: Iago Toral Quiroga> > --- > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 6 +++--- > 1 file changed, 3 insertions(+), 3 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > index 268c847..d805d95 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > @@ -832,7 +832,7 @@ fs_visitor::nir_emit_alu(const fs_builder , > nir_alu_instr *instr) >* a register and compare with that. >*/ > fs_reg tmp = vgrf(glsl_type::double_type); > - bld.MOV(tmp, brw_imm_df(0.0)); > + bld.MOV(tmp, setup_imm_df(0.0)); Does this need to be splatted out to a full SIMD-width? Why not just do: fs_reg tmp = setup_imm_df(0.0); and let the CMP compare against the stride 0 register? > > /* A direct DF CMP using the flag register (null dst) won't work in >* SIMD16 because the CMP will be split in two by lower_simd_width, > @@ -1171,7 +1171,7 @@ fs_visitor::nir_emit_alu(const fs_builder , > nir_alu_instr *instr) > case nir_op_d2b: { >/* two-argument instructions can't take 64-bit immediates */ >fs_reg zero = vgrf(glsl_type::double_type); > - bld.MOV(zero, brw_imm_df(0.0)); > + bld.MOV(zero, setup_imm_df(0.0)); >/* A SIMD16 execution needs to be split in two instructions, so use > * a vgrf instead of the flag register as dst so instruction splitting > * works Likewise, I don't think you need to splat here. > @@ -1483,7 +1483,7 @@ fs_visitor::nir_emit_load_const(const fs_builder , > > case 64: >for (unsigned i = 0; i < instr->def.num_components; i++) > - bld.MOV(offset(reg, bld, i), brw_imm_df(instr->value.f64[i])); > + bld.MOV(offset(reg, bld, i), setup_imm_df(instr->value.f64[i])); >break; > > default: > This hunk looks good. signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/6] i965/fs: add a helper function to create double immediates
On Wednesday, July 6, 2016 12:09:57 PM PDT Samuel Iglesias Gonsálvez wrote: > From: Iago Toral Quiroga> > Gen7 hardware does not support double immediates so these need > to be moved in 32-bit chunks to a regular vgrf instead. Instead > of doing this every time we need to create a DF immediate, > create a helper function that does the right thing depending > on the hardware generation. > --- > src/mesa/drivers/dri/i965/brw_fs.h | 2 ++ > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 43 > > 2 files changed, 45 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.h > b/src/mesa/drivers/dri/i965/brw_fs.h > index 4237197..dd7ce7d 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.h > +++ b/src/mesa/drivers/dri/i965/brw_fs.h > @@ -167,6 +167,8 @@ public: > bool lower_simd_width(); > bool opt_combine_constants(); > > + fs_reg setup_imm_df(double v); > + > void emit_dummy_fs(); > void emit_repclear_shader(); > fs_reg *emit_fragcoord_interpolation(); > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > index b3f5dfd..268c847 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > @@ -616,6 +616,49 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr > *instr, > return true; > } > > +fs_reg > +fs_visitor::setup_imm_df(double v) > +{ > + assert(devinfo->gen >= 7); > + > + if (devinfo->gen >= 8) > + return brw_imm_df(v); You could probably use the DIM instruction on Haswell (only). > + > + /* gen7 does not support DF immediates, so we generate a 64-bit constant > by > +* writing the low 32-bit of the constant to suboffset 0 of a VGRF and > +* the high 32-bit to suboffset 4 and then applying a stride of 0. > +* > +* Alternatively, we could also produce a normal VGRF (without stride 0) > +* by writing to all the channels in the VGRF, however, that would hit the > +* gen7 bug where we have to split writes that span more than 1 register > +* into instructions with a width of 4 (otherwise the write to the second > +* register written runs into an execmask hardware bug) which isn't very > +* nice. > +*/ > + union { > + double d; > + struct { > + uint32_t i1; > + uint32_t i2; > + }; > + } di; > + > + di.d = v; > + > + fs_reg tmp = vgrf(glsl_type::uint_type); > + fs_inst *inst = bld.MOV(tmp, brw_imm_ud(di.i1)); > + inst->force_writemask_all = true; > + inst->exec_size = 1; > + inst->regs_written = 1; > + > + inst = bld.MOV(horiz_offset(tmp, 1), brw_imm_ud(di.i2)); > + inst->force_writemask_all = true; > + inst->exec_size = 1; > + inst->regs_written = 1; > + > + return component(retype(tmp, BRW_REGISTER_TYPE_DF), 0); > +} > + > void > fs_visitor::nir_emit_alu(const fs_builder , nir_alu_instr *instr) > { > Otherwise this looks reasonable to me. I was wondering whether we should use subscript() rather than horiz_offset(), but given that everything's exec_size 1 and NoMask, I don't think it matters. Reviewed-by: Kenneth Graunke signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH mesa] i965/docs: update Intel Linux Graphics URLs
On Wednesday, July 6, 2016 5:09:15 PM PDT Eric Engestrom wrote: > Signed-off-by: Eric Engestrom> --- > docs/developers.html | 2 +- > docs/faq.html| 2 +- > src/mesa/drivers/dri/i965/brw_defines.h | 2 +- > src/mesa/drivers/dri/i965/brw_sf_state.c | 2 +- > 4 files changed, 4 insertions(+), 4 deletions(-) Pushed, thanks! signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 96765] BindFragDataLocationIndexed on array fragment shader output.
https://bugs.freedesktop.org/show_bug.cgi?id=96765 --- Comment #7 from Corentin Wallez--- Thank you, I'm still figuring out how to test top of tree Mesa and will confirm if that fixes all the failures. I've fixed the core profile GL_EXTENSIONS bug locally, will push to the repo. -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 96765] BindFragDataLocationIndexed on array fragment shader output.
https://bugs.freedesktop.org/show_bug.cgi?id=96765 --- Comment #6 from Ilia Mirkin--- Oh, actually it should work now with the patch I pushed out last night but failed to mention, since it wasn't addressing the array issue. Double-checked on i965: https://cgit.freedesktop.org/mesa/mesa/commit/?id=a37e46323c7e18bec4160f2f66847c10b7041dc1 commit a37e46323c7e18bec4160f2f66847c10b7041dc1 Author: Ilia Mirkin Date: Fri Jul 1 19:10:36 2016 -0400 glsl: don't try to lower non-gl builtins as if they were gl_FragData If a shader has an output array, it will get treated as though it were gl_FragData and rewritten into gl_out_FragData instances. We only want this to happen on the actual gl_FragData and not everything else. This is a small part of the problem pointed out by the below bug. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96765 Signed-off-by: Ilia Mirkin Reviewed-by: Marek Olšák Reviewed-by: Kenneth Graunke Cc: "11.2 12.0" [BTW, your repo appears to have obtained a bug where you try to do glGetString(GL_EXTENSIONS) and die when it's not there -- you're supposed to use glGetStringi(GL_EXTENSIONS) in core contexts.] -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Mesa 12.0.0 release candidate 4
On Thu, Jun 23, 2016 at 9:35 AM, Emil Velikovwrote: > Hi all, > > On 21 June 2016 at 15:35, Emil Velikov wrote: >> The fourth release candidate for Mesa 12.0.0 is now available. >> >> Note: this is the final release candidate, with Mesa 12.0.0 expected in a >> couple of days. >> > Considering the requests, from different parties, the final release > will be out tomorrow Friday after 20:00 GMT. > > All your nominations (that have master landed in master, if > applicable) will be included, but do let me know if certain patch(es) > should be included/excluded from the release. btw, in case you missed my note on IRC, these would be good to have on the 12.0 branch: 7295428 freedreno: fix crash on smaller gpus and higher resolutions 01ccb0d i965: don't drop const initializers in vector splitting f78a6b1 glsl: add driconf to zero-init unintialized vars BR, -R > Thanks > Emil > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Mesa 12.0.0 release candidate 4
On Thu, Jun 23, 2016 at 6:35 AM, Emil Velikovwrote: > Hi all, > > On 21 June 2016 at 15:35, Emil Velikov wrote: > > The fourth release candidate for Mesa 12.0.0 is now available. > > > > Note: this is the final release candidate, with Mesa 12.0.0 expected in > a couple of days. > > > Considering the requests, from different parties, the final release > will be out tomorrow Friday after 20:00 GMT. > What's going on here? I don't think I missed the release but "tomorrow" was 12 days ago according to my e-mail client. --Jason > > All your nominations (that have master landed in master, if > applicable) will be included, but do let me know if certain patch(es) > should be included/excluded from the release. > > Thanks > Emil > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] anv: vulkan: remove the anv_device.$(OBJEXT) rule
On Wed, Jul 6, 2016 at 10:46 AM, Emil Velikovwrote: > On 6 July 2016 at 16:39, Jason Ekstrand wrote: > > So here's a thought: could we make anv_timestamp.h generation trigger > off of > > libvulkan_intel.so getting rebuilt? I'm not quite sure how one would > even > > do that but it seems like a thing you might be able to do... > > > Not sure I fully understood you here. You're thinking about having > dummy file for the initial `make' invocation, and regenerating it as > the final (shared) library is created ? > > I believe that'll still get the build/link done twice, plus it'll > produce a nasty result for people that don't do "make && make install" > but a straight "make install". > I'm not 100% sure what I meant. :-) More to the point, I know what I meant but I don't know how to get there. What I meant is that you should regenerate the timestamp if and only if libvulkan_intel needs to be rebuilt. Maybe we can do that by providing some dependencies to anv_timestamp? I honestly have no idea how to actually accomplish it. Mostly just food for thought. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/4] vl/compositor: set layer of y or uv to render
Signed-off-by: Leo Liu--- src/gallium/auxiliary/vl/vl_compositor.c | 30 ++ src/gallium/auxiliary/vl/vl_compositor.h | 12 2 files changed, 42 insertions(+) diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c index a19d624..f7517f3 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.c +++ b/src/gallium/auxiliary/vl/vl_compositor.c @@ -,6 +,36 @@ vl_compositor_set_layer_rotation(struct vl_compositor_state *s, } void +vl_compositor_set_yuv_layer(struct vl_compositor_state *s, +struct vl_compositor *c, +unsigned layer, +struct pipe_video_buffer *buffer, +struct u_rect *src_rect, +struct u_rect *dst_rect, +bool y) +{ + struct pipe_sampler_view **sampler_views; + unsigned i; + + assert(s && c && buffer); + + assert(layer < VL_COMPOSITOR_MAX_LAYERS); + + s->used_layers |= 1 << layer; + sampler_views = buffer->get_sampler_view_components(buffer); + for (i = 0; i < 3; ++i) { + s->layers[layer].samplers[i] = c->sampler_linear; + pipe_sampler_view_reference(>layers[layer].sampler_views[i], sampler_views[i]); + } + + calc_src_and_dst(>layers[layer], buffer->width, buffer->height, +src_rect ? *src_rect : default_rect(>layers[layer]), +dst_rect ? *dst_rect : default_rect(>layers[layer])); + + s->layers[layer].fs = (y) ? c->fs_weave_yuv.y : c->fs_weave_yuv.uv; +} + +void vl_compositor_render(struct vl_compositor_state *s, struct vl_compositor *c, struct pipe_surface*dst_surface, diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h index 6c632ff..ceab5e0 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.h +++ b/src/gallium/auxiliary/vl/vl_compositor.h @@ -241,6 +241,18 @@ vl_compositor_set_layer_rotation(struct vl_compositor_state *state, unsigned layer, enum vl_compositor_rotation rotate); +/** + * set a layer of y or uv to render + */ +void +vl_compositor_set_yuv_layer(struct vl_compositor_state *s, +struct vl_compositor *c, +unsigned layer, +struct pipe_video_buffer *buffer, +struct u_rect *src_rect, +struct u_rect *dst_rect, +bool y); + /*@}*/ /** -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/4] vl/compositor: add weave to yuv shader
This shader will make interlaced yuv to progressive yuv. Signed-off-by: Leo Liu--- src/gallium/auxiliary/vl/vl_compositor.c | 38 src/gallium/auxiliary/vl/vl_compositor.h | 5 + 2 files changed, 43 insertions(+) diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c index 275022b..a19d624 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.c +++ b/src/gallium/auxiliary/vl/vl_compositor.c @@ -293,6 +293,35 @@ create_frag_shader_weave_rgb(struct vl_compositor *c) } static void * +create_frag_shader_weave_yuv(struct vl_compositor *c, bool y) +{ + struct ureg_program *shader; + struct ureg_dst texel, fragment; + + shader = ureg_create(PIPE_SHADER_FRAGMENT); + if (!shader) + return false; + + texel = ureg_DECL_temporary(shader); + fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); + + create_frag_shader_weave(shader, texel); + + if (y) + ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X), ureg_src(texel)); + else + ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XY), + ureg_swizzle(ureg_src(texel), TGSI_SWIZZLE_Y, + TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W)); + + ureg_release_temporary(shader, texel); + + ureg_END(shader); + + return ureg_create_shader_and_destroy(shader, c->pipe); +} + +static void * create_frag_shader_palette(struct vl_compositor *c, bool include_cc) { struct ureg_program *shader; @@ -391,6 +420,13 @@ init_shaders(struct vl_compositor *c) return false; } + c->fs_weave_yuv.y = create_frag_shader_weave_yuv(c, true); + c->fs_weave_yuv.uv = create_frag_shader_weave_yuv(c, false); + if (!c->fs_weave_yuv.y || !c->fs_weave_yuv.uv) { + debug_printf("Unable to create YCbCr i-to-YCbCr p weave fragment shader.\n"); + return false; + } + c->fs_palette.yuv = create_frag_shader_palette(c, true); if (!c->fs_palette.yuv) { debug_printf("Unable to create YUV-Palette-to-RGB fragment shader.\n"); @@ -419,6 +455,8 @@ static void cleanup_shaders(struct vl_compositor *c) c->pipe->delete_vs_state(c->pipe, c->vs); c->pipe->delete_fs_state(c->pipe, c->fs_video_buffer); c->pipe->delete_fs_state(c->pipe, c->fs_weave_rgb); + c->pipe->delete_fs_state(c->pipe, c->fs_weave_yuv.y); + c->pipe->delete_fs_state(c->pipe, c->fs_weave_yuv.uv); c->pipe->delete_fs_state(c->pipe, c->fs_palette.yuv); c->pipe->delete_fs_state(c->pipe, c->fs_palette.rgb); c->pipe->delete_fs_state(c->pipe, c->fs_rgba); diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h index 3f81bc9..6c632ff 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.h +++ b/src/gallium/auxiliary/vl/vl_compositor.h @@ -117,6 +117,11 @@ struct vl_compositor void *fs_rgba; struct { + void *y; + void *uv; + } fs_weave_yuv; + + struct { void *rgb; void *yuv; } fs_palette; -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/4] vl/compositor: move weave shader out from rgb weaving
We'll use weave shader in the later patch. Signed-off-by: Leo Liu--- src/gallium/auxiliary/vl/vl_compositor.c | 157 --- src/gallium/auxiliary/vl/vl_compositor.h | 2 +- 2 files changed, 83 insertions(+), 76 deletions(-) diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c index 77fc92e..275022b 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.c +++ b/src/gallium/auxiliary/vl/vl_compositor.c @@ -126,6 +126,77 @@ create_vert_shader(struct vl_compositor *c) } static void +create_frag_shader_weave(struct ureg_program *shader, struct ureg_dst fragment) +{ + struct ureg_src i_tc[2]; + struct ureg_src sampler[3]; + struct ureg_dst t_tc[2]; + struct ureg_dst t_texel[2]; + unsigned i, j; + + i_tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP, TGSI_INTERPOLATE_LINEAR); + i_tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM, TGSI_INTERPOLATE_LINEAR); + + for (i = 0; i < 3; ++i) + sampler[i] = ureg_DECL_sampler(shader, i); + + for (i = 0; i < 2; ++i) { + t_tc[i] = ureg_DECL_temporary(shader); + t_texel[i] = ureg_DECL_temporary(shader); + } + + /* calculate the texture offsets +* t_tc.x = i_tc.x +* t_tc.y = (round(i_tc.y - 0.5) + 0.5) / height * 2 +*/ + for (i = 0; i < 2; ++i) { + ureg_MOV(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_X), i_tc[i]); + ureg_SUB(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_YZ), + i_tc[i], ureg_imm1f(shader, 0.5f)); + ureg_ROUND(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_YZ), ureg_src(t_tc[i])); + ureg_MOV(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_W), + ureg_imm1f(shader, i ? 1.0f : 0.0f)); + ureg_ADD(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_YZ), + ureg_src(t_tc[i]), ureg_imm1f(shader, 0.5f)); + ureg_MUL(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_Y), + ureg_src(t_tc[i]), ureg_scalar(i_tc[0], TGSI_SWIZZLE_W)); + ureg_MUL(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_Z), + ureg_src(t_tc[i]), ureg_scalar(i_tc[1], TGSI_SWIZZLE_W)); + } + + /* fetch the texels +* texel[0..1].x = tex(t_tc[0..1][0]) +* texel[0..1].y = tex(t_tc[0..1][1]) +* texel[0..1].z = tex(t_tc[0..1][2]) +*/ + for (i = 0; i < 2; ++i) + for (j = 0; j < 3; ++j) { + struct ureg_src src = ureg_swizzle(ureg_src(t_tc[i]), +TGSI_SWIZZLE_X, j ? TGSI_SWIZZLE_Z : TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W); + + ureg_TEX(shader, ureg_writemask(t_texel[i], TGSI_WRITEMASK_X << j), + TGSI_TEXTURE_2D_ARRAY, src, sampler[j]); + } + + /* calculate linear interpolation factor +* factor = |round(i_tc.y) - i_tc.y| * 2 +*/ + ureg_ROUND(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_YZ), i_tc[0]); + ureg_ADD(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_YZ), +ureg_src(t_tc[0]), ureg_negate(i_tc[0])); + ureg_MUL(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_YZ), +ureg_abs(ureg_src(t_tc[0])), ureg_imm1f(shader, 2.0f)); + ureg_LRP(shader, fragment, ureg_swizzle(ureg_src(t_tc[0]), +TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z), +ureg_src(t_texel[0]), ureg_src(t_texel[1])); + + for (i = 0; i < 2; ++i) { + ureg_release_temporary(shader, t_texel[i]); + ureg_release_temporary(shader, t_tc[i]); + } +} + +static void create_frag_shader_csc(struct ureg_program *shader, struct ureg_dst texel, struct ureg_dst fragment) { @@ -199,86 +270,22 @@ create_frag_shader_video_buffer(struct vl_compositor *c) } static void * -create_frag_shader_weave(struct vl_compositor *c) +create_frag_shader_weave_rgb(struct vl_compositor *c) { struct ureg_program *shader; - struct ureg_src i_tc[2]; - struct ureg_src sampler[3]; - struct ureg_dst t_tc[2]; - struct ureg_dst t_texel[2]; - struct ureg_dst o_fragment; - unsigned i, j; + struct ureg_dst texel, fragment; shader = ureg_create(PIPE_SHADER_FRAGMENT); if (!shader) return false; - i_tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP, TGSI_INTERPOLATE_LINEAR); - i_tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM, TGSI_INTERPOLATE_LINEAR); - - for (i = 0; i < 3; ++i) - sampler[i] = ureg_DECL_sampler(shader, i); - - for (i = 0; i < 2; ++i) { - t_tc[i] = ureg_DECL_temporary(shader); - t_texel[i] = ureg_DECL_temporary(shader); - } - o_fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); - - /* calculate the texture offsets -* t_tc.x = i_tc.x -* t_tc.y = (round(i_tc.y - 0.5) + 0.5) / height * 2 -*/ - for (i = 0; i < 2; ++i) { - ureg_MOV(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_X), i_tc[i]); - ureg_SUB(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_YZ), -
[Mesa-dev] [PATCH 4/4] st/omx/dec: make decoder video buffer progressive
The idea of encode tunneling is to use video buffer directly for encoder, but currently the encoder doesn’t support interlaced surface, the OMX decoder set progressive surface before on that purpose. Since now we are polling the driver for interlacing information for decoder, we got the interlaced as preferred as other APIs(VDPAU, VA-API), thus breaking the transcode with tunneling. The solution is when with tunnel detected, re-allocate progressive target buffers, and then converting the interlaced decoder results to there. This has been tested with transcode results bit to bit matching as before with surface from progressive to progressive. Signed-off-by: Leo Liu--- src/gallium/state_trackers/omx/vid_dec.c | 65 +++- src/gallium/state_trackers/omx/vid_dec.h | 6 ++- 2 files changed, 68 insertions(+), 3 deletions(-) diff --git a/src/gallium/state_trackers/omx/vid_dec.c b/src/gallium/state_trackers/omx/vid_dec.c index a989c10..7842966 100644 --- a/src/gallium/state_trackers/omx/vid_dec.c +++ b/src/gallium/state_trackers/omx/vid_dec.c @@ -167,6 +167,19 @@ static OMX_ERRORTYPE vid_dec_Constructor(OMX_COMPONENTTYPE *comp, OMX_STRING nam if (!priv->pipe) return OMX_ErrorInsufficientResources; + if (!vl_compositor_init(>compositor, priv->pipe)) { + priv->pipe->destroy(priv->pipe); + priv->pipe = NULL; + return OMX_ErrorInsufficientResources; + } + + if (!vl_compositor_init_state(>cstate, priv->pipe)) { + vl_compositor_cleanup(>compositor); + priv->pipe->destroy(priv->pipe); + priv->pipe = NULL; + return OMX_ErrorInsufficientResources; + } + priv->sPortTypesParam[OMX_PortDomainVideo].nStartPortNumber = 0; priv->sPortTypesParam[OMX_PortDomainVideo].nPorts = 2; priv->ports = CALLOC(2, sizeof(omx_base_PortType *)); @@ -218,8 +231,11 @@ static OMX_ERRORTYPE vid_dec_Destructor(OMX_COMPONENTTYPE *comp) priv->ports=NULL; } - if (priv->pipe) + if (priv->pipe) { + vl_compositor_cleanup_state(>cstate); + vl_compositor_cleanup(>compositor); priv->pipe->destroy(priv->pipe); + } if (priv->screen) omx_put_screen(); @@ -547,6 +563,25 @@ static void vid_dec_FillOutput(vid_dec_PrivateType *priv, struct pipe_video_buff } } +static void vid_dec_deint(vid_dec_PrivateType *priv, struct pipe_video_buffer *src_buf, + struct pipe_video_buffer *dst_buf) +{ + struct vl_compositor *compositor = >compositor; + struct vl_compositor_state *s = >cstate; + struct pipe_surface **dst_surface; + + dst_surface = dst_buf->get_surfaces(dst_buf); + vl_compositor_clear_layers(s); + + vl_compositor_set_yuv_layer(s, compositor, 0, src_buf, NULL, NULL, true); + vl_compositor_set_layer_dst_area(s, 0, NULL); + vl_compositor_render(s, compositor, dst_surface[0], NULL, false); + + vl_compositor_set_yuv_layer(s, compositor, 0, src_buf, NULL, NULL, false); + vl_compositor_set_layer_dst_area(s, 0, NULL); + vl_compositor_render(s, compositor, dst_surface[1], NULL, false); +} + static void vid_dec_FrameDecoded(OMX_COMPONENTTYPE *comp, OMX_BUFFERHEADERTYPE* input, OMX_BUFFERHEADERTYPE* output) { @@ -562,7 +597,33 @@ static void vid_dec_FrameDecoded(OMX_COMPONENTTYPE *comp, OMX_BUFFERHEADERTYPE* if (input->pInputPortPrivate) { if (output->pInputPortPrivate) { - struct pipe_video_buffer *tmp = output->pOutputPortPrivate; + struct pipe_video_buffer *tmp, *vbuf, *new_vbuf; + + tmp = output->pOutputPortPrivate; + vbuf = input->pInputPortPrivate; + if (vbuf->interlaced) { +/* re-allocate the progressive buffer */ +omx_base_video_PortType *port; +struct pipe_video_buffer templat = {}; + +port = (omx_base_video_PortType *) +priv->ports[OMX_BASE_FILTER_INPUTPORT_INDEX]; +memset(, 0, sizeof(templat)); +templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420; +templat.width = port->sPortParam.format.video.nFrameWidth; +templat.height = port->sPortParam.format.video.nFrameHeight; +templat.buffer_format = PIPE_FORMAT_NV12; +templat.interlaced = false; +new_vbuf = priv->pipe->create_video_buffer(priv->pipe, ); + +/* convert the interlaced to the progressive */ +vid_dec_deint(priv, input->pInputPortPrivate, new_vbuf); +priv->pipe->flush(priv->pipe, NULL, 0); + +/* set the progrssive buffer for next round */ +vbuf->destroy(vbuf); +input->pInputPortPrivate = new_vbuf; + } output->pOutputPortPrivate = input->pInputPortPrivate; input->pInputPortPrivate = tmp; } else { diff --git a/src/gallium/state_trackers/omx/vid_dec.h b/src/gallium/state_trackers/omx/vid_dec.h index 649d745..d268925 100644 ---
[Mesa-dev] [Bug 96825] anv_device.c:31:27: fatal error: anv_timestamp.h: No such file or directory
https://bugs.freedesktop.org/show_bug.cgi?id=96825 Emil Velikovchanged: What|Removed |Added Resolution|--- |FIXED Status|NEW |RESOLVED --- Comment #6 from Emil Velikov --- Update, things were passing due to bugs on our/my end. Should be fixed with commit 9618e2a24c18b5bbc9ff872d1f6870261d14dee5 Author: Emil Velikov Date: Wed Jul 6 16:18:21 2016 +0100 anv: vulkan: remove the anv_device.$(OBJEXT) rule -- You are receiving this mail because: You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 96765] BindFragDataLocationIndexed on array fragment shader output.
https://bugs.freedesktop.org/show_bug.cgi?id=96765 --- Comment #5 from Ilia Mirkin--- (In reply to Corentin Wallez from comment #4) > I also tried the changes you mentioned in Comment 1, while it fixes the > values queried back from the driver, the triangle is still not being shown. > Only making FragColor and SecondaryFragColor to be non-array floats make the > triangle appear. Ah, right you are. I'm used to seeing piglit print a failure message when it didn't work. More investigating is left here. -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] anv: vulkan: remove the anv_device.$(OBJEXT) rule
On 6 July 2016 at 16:39, Jason Ekstrandwrote: > So here's a thought: could we make anv_timestamp.h generation trigger off of > libvulkan_intel.so getting rebuilt? I'm not quite sure how one would even > do that but it seems like a thing you might be able to do... > Not sure I fully understood you here. You're thinking about having dummy file for the initial `make' invocation, and regenerating it as the final (shared) library is created ? I believe that'll still get the build/link done twice, plus it'll produce a nasty result for people that don't do "make && make install" but a straight "make install". -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH mesa] i965/blorp: add missing braces
On Wed, Jul 06, 2016 at 08:40:06PM +0300, Pohjolainen, Topi wrote: > On Wed, Jul 06, 2016 at 05:36:54PM +0100, Eric Engestrom wrote: > > Signed-off-by: Eric Engestrom> > --- > > > > I know nothing about blorp, but GCC6 noticed the weird indentation, and my > > best > > guess looking at the code is that there are missing braces. > > > > CC gen7_blorp.lo > > gen7_blorp.c: In function ???gen7_blorp_exec???: > > gen7_blorp.c:797:4: warning: this ???if??? clause does not guard... > > [-Wmisleading-indentation] > > if (params->wm_prog_data) > > ^~ > > gen7_blorp.c:800:7: note: ...this statement, but the latter is misleadingly > > indented as if it is guarded by the ???if??? > >gen7_blorp_emit_constant_ps_disable(brw); > >^~~ > > > > If this is wrong, then the second line should be de-indented. > > It should be indented. I have just pushed patches removing push constant I meant de-intented. > support in blorp. It seems I missed that when I made the push constant > disabling unconditional. > > > > > --- > > src/mesa/drivers/dri/i965/gen7_blorp.c | 3 ++- > > 1 file changed, 2 insertions(+), 1 deletion(-) > > > > diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.c > > b/src/mesa/drivers/dri/i965/gen7_blorp.c > > index 7201549..f40e445 100644 > > --- a/src/mesa/drivers/dri/i965/gen7_blorp.c > > +++ b/src/mesa/drivers/dri/i965/gen7_blorp.c > > @@ -794,10 +794,11 @@ gen7_blorp_exec(struct brw_context *brw, > > gen6_blorp_emit_clip_disable(brw); > > gen7_blorp_emit_sf_config(brw, params); > > gen7_blorp_emit_wm_config(brw, params); > > - if (params->wm_prog_data) > > + if (params->wm_prog_data) { > >gen7_blorp_emit_binding_table_pointers_ps(brw, wm_bind_bo_offset); > > > >gen7_blorp_emit_constant_ps_disable(brw); > > + } > > > > if (params->src.mt) { > >const uint32_t sampler_offset = > > -- > > 2.9.0 > > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH mesa] i965/blorp: add missing braces
On Wed, Jul 06, 2016 at 05:36:54PM +0100, Eric Engestrom wrote: > Signed-off-by: Eric Engestrom> --- > > I know nothing about blorp, but GCC6 noticed the weird indentation, and my > best > guess looking at the code is that there are missing braces. > > CC gen7_blorp.lo > gen7_blorp.c: In function ???gen7_blorp_exec???: > gen7_blorp.c:797:4: warning: this ???if??? clause does not guard... > [-Wmisleading-indentation] > if (params->wm_prog_data) > ^~ > gen7_blorp.c:800:7: note: ...this statement, but the latter is misleadingly > indented as if it is guarded by the ???if??? >gen7_blorp_emit_constant_ps_disable(brw); >^~~ > > If this is wrong, then the second line should be de-indented. It should be indented. I have just pushed patches removing push constant support in blorp. It seems I missed that when I made the push constant disabling unconditional. > > --- > src/mesa/drivers/dri/i965/gen7_blorp.c | 3 ++- > 1 file changed, 2 insertions(+), 1 deletion(-) > > diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.c > b/src/mesa/drivers/dri/i965/gen7_blorp.c > index 7201549..f40e445 100644 > --- a/src/mesa/drivers/dri/i965/gen7_blorp.c > +++ b/src/mesa/drivers/dri/i965/gen7_blorp.c > @@ -794,10 +794,11 @@ gen7_blorp_exec(struct brw_context *brw, > gen6_blorp_emit_clip_disable(brw); > gen7_blorp_emit_sf_config(brw, params); > gen7_blorp_emit_wm_config(brw, params); > - if (params->wm_prog_data) > + if (params->wm_prog_data) { >gen7_blorp_emit_binding_table_pointers_ps(brw, wm_bind_bo_offset); > >gen7_blorp_emit_constant_ps_disable(brw); > + } > > if (params->src.mt) { >const uint32_t sampler_offset = > -- > 2.9.0 > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 96765] BindFragDataLocationIndexed on array fragment shader output.
https://bugs.freedesktop.org/show_bug.cgi?id=96765 --- Comment #4 from Corentin Wallez--- The bug indicates that "array" and "array[0]" are identical so I'd assume that the binding done last takes precedence. I'm not sure about AofA, or if it's even allowed for fragment outputs. Here's the relevant bit from the bug where a parallel with ARB_program_interface_query is made: > I think my recommendation would be to adopt language similar to that for > GetProgramResourceIndex(), allowing you to drop the "[0]" but not to address > individual elements. I don't think we want you to be able to assign > "array[0]" to location 3 and "array[1]" to location 5, for example. I also tried the changes you mentioned in Comment 1, while it fixes the values queried back from the driver, the triangle is still not being shown. Only making FragColor and SecondaryFragColor to be non-array floats make the triangle appear. -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [Mesa-stable] [PATCH] anv: vulkan: remove the anv_device.$(OBJEXT) rule
In the interest of getting the build going again, I pushed this patch with Jason's review. Emil Velikovwrites: > From: Emil Velikov > > Atm the actual rule will expand to foo.o which is used for static > libraries only. > > Thus the automake manual recommendation [to use OBJEXT] won't help us, > since since we're working with a shared library. > > Thus let's 'demote' the file and add it back to BUILT_SOURCES. This will > manage all the complexity for us, at the (existing expense) of working > only with the all, check and install targets. > > The crazy (why the issue was hard to spot): > If the dependencies (.deps/*.Plo) are already created one can alter the > anv_device.$(OBJEXT) line and/or nuke it all together. That won't lead > to any warnings/issues, even though the Makefile is regenerated. > > Moral of the story: > Always rm -rf top_builddir or don't resolve the dependencies manually > and use BUILT_SOURCES. > > Cc: "12.0" > Cc: Vinson Lee > Cc: Kenneth Graunke > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96825 > Fixes: d7a604c3f7a ("anv: use cache uuid based on the build timestamp.") > Signed-off-by: Emil Velikov > --- > Just checking if make distcheck will find any fall-outs, but should work > like a charm. > > src/intel/vulkan/Makefile.am | 4 +--- > src/intel/vulkan/Makefile.sources | 3 ++- > 2 files changed, 3 insertions(+), 4 deletions(-) > > diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am > index b605c08..0e521cf 100644 > --- a/src/intel/vulkan/Makefile.am > +++ b/src/intel/vulkan/Makefile.am > @@ -144,10 +144,8 @@ anv_timestamp.h: > @echo "Updating anv_timestamp.h" > $(AM_V_GEN) echo "#define ANV_TIMESTAMP \"$(TIMESTAMP_CMD)\"" > $@ > > -anv_device.$(OBJEXT): anv_timestamp.h > - > BUILT_SOURCES = $(VULKAN_GENERATED_FILES) > -CLEANFILES = $(BUILT_SOURCES) dev_icd.json anv_timestamp.h > +CLEANFILES = $(BUILT_SOURCES) dev_icd.json > EXTRA_DIST = \ > $(top_srcdir)/include/vulkan/vk_icd.h \ > anv_entrypoints_gen.py \ > diff --git a/src/intel/vulkan/Makefile.sources > b/src/intel/vulkan/Makefile.sources > index aa1459a..7303995 100644 > --- a/src/intel/vulkan/Makefile.sources > +++ b/src/intel/vulkan/Makefile.sources > @@ -66,7 +66,8 @@ VULKAN_GEM_STUB_FILES := \ > > VULKAN_GENERATED_FILES := \ > anv_entrypoints.c \ > - anv_entrypoints.h > + anv_entrypoints.h \ > + anv_timestamp.h > > > GEN7_FILES := \ > -- > 2.8.2 > > ___ > mesa-stable mailing list > mesa-sta...@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-stable ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 27/64] isl/state: Add assertions for buffer surface restrictions
Hi Jason, On Sat, Jun 11, 2016 at 09:02:42AM -0700, Jason Ekstrand wrote: > --- > src/intel/isl/isl_surface_state.c | 11 +++ > 1 file changed, 11 insertions(+) > > diff --git a/src/intel/isl/isl_surface_state.c > b/src/intel/isl/isl_surface_state.c > index 8f223d1..ca13175 100644 > --- a/src/intel/isl/isl_surface_state.c > +++ b/src/intel/isl/isl_surface_state.c > @@ -416,6 +416,17 @@ isl_genX(buffer_fill_state_s)(void *state, > { > uint32_t num_elements = info->size / info->stride; > > + if (GEN_GEN >= 7) { > + if (info->format == ISL_FORMAT_RAW) { > + assert(num_elements <= (1ull << 31)); I just ran into this assertion while running a crucible test. It seems to be incorrect. According to the PRMs, the number of elements for raw bufffer surfaces range from 1 to 2^30. Shouldn't the 1ull be left-shifted by 30 instead of 31? (Note: adjusting the shift amount shouldn't fix the test.) - Nanley > + assert((num_elements & 3) == 0); > + } else { > + assert(num_elements <= (1ull << 27)); > + } > + } else { > + assert(num_elements <= (1ull << 27)); > + } > + > struct GENX(RENDER_SURFACE_STATE) surface_state = { >.SurfaceType = SURFTYPE_BUFFER, >.SurfaceArray = false, > -- > 2.5.0.400.gff86faf > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH mesa] i965/blorp: add missing braces
Signed-off-by: Eric Engestrom--- I know nothing about blorp, but GCC6 noticed the weird indentation, and my best guess looking at the code is that there are missing braces. CC gen7_blorp.lo gen7_blorp.c: In function ‘gen7_blorp_exec’: gen7_blorp.c:797:4: warning: this ‘if’ clause does not guard... [-Wmisleading-indentation] if (params->wm_prog_data) ^~ gen7_blorp.c:800:7: note: ...this statement, but the latter is misleadingly indented as if it is guarded by the ‘if’ gen7_blorp_emit_constant_ps_disable(brw); ^~~ If this is wrong, then the second line should be de-indented. --- src/mesa/drivers/dri/i965/gen7_blorp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.c b/src/mesa/drivers/dri/i965/gen7_blorp.c index 7201549..f40e445 100644 --- a/src/mesa/drivers/dri/i965/gen7_blorp.c +++ b/src/mesa/drivers/dri/i965/gen7_blorp.c @@ -794,10 +794,11 @@ gen7_blorp_exec(struct brw_context *brw, gen6_blorp_emit_clip_disable(brw); gen7_blorp_emit_sf_config(brw, params); gen7_blorp_emit_wm_config(brw, params); - if (params->wm_prog_data) + if (params->wm_prog_data) { gen7_blorp_emit_binding_table_pointers_ps(brw, wm_bind_bo_offset); gen7_blorp_emit_constant_ps_disable(brw); + } if (params->src.mt) { const uint32_t sampler_offset = -- 2.9.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/6] EGL: Record the debug object label in _EGLThreadInfo.
Added a field to _EGLThreadInfo to hold the object label for the current EGL function call. Changed the _EGL_FUNC_START macro and _eglSetFuncName function to take an object type enum and an _EGLResource pointer, which it uses to fill in the object label. Removed the command name and object label parameters from _eglDebugReport, and made it look them up from the current _EGLThreadInfo. Added a separate _eglDebugReportFull function to allow the caller to specify the command and label. --- src/egl/main/eglapi.c | 191 ++ src/egl/main/eglcurrent.c | 48 src/egl/main/eglcurrent.h | 23 +++--- 3 files changed, 155 insertions(+), 107 deletions(-) diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index 6e39bca..038cea0 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -250,9 +250,9 @@ _eglUnlockDisplay(_EGLDisplay *dpy) mtx_unlock(>Mutex); } -#define _EGL_FUNC_START(disp, ret) \ +#define _EGL_FUNC_START(disp, objectType, object, ret) \ do { \ - if (!_eglSetFuncName(__func__)) { \ + if (!_eglSetFuncName(__func__, disp, objectType, (_EGLResource *) object)) { \ if (disp) \ _eglUnlockDisplay(disp); \ return ret; \ @@ -260,14 +260,32 @@ _eglUnlockDisplay(_EGLDisplay *dpy) } while(0) static EGLBoolean -_eglSetFuncName(const char *funcName) +_eglSetFuncName(const char *funcName, _EGLDisplay *disp, EGLenum objectType, _EGLResource *object) { _EGLThreadInfo *thr = _eglGetCurrentThread(); if (!_eglIsCurrentThreadDummy()) { thr->CurrentFuncName = funcName; + thr->CurrentObjectLabel = NULL; + + if (objectType == EGL_OBJECT_THREAD_KHR) { + thr->CurrentObjectLabel = thr->Label; + } else if (objectType == EGL_OBJECT_DISPLAY_KHR) { + if (disp != NULL) { +thr->CurrentObjectLabel = disp->Label; + } + } else { + /* + * Everything else will either be NULL or a valid _EGLResource + * pointer. + */ + if (object != NULL) { +thr->CurrentObjectLabel = object->Label; + } + } + return EGL_TRUE; } else { - _eglDebugReport(EGL_BAD_ALLOC, funcName, funcName, EGL_DEBUG_MSG_CRITICAL_KHR, NULL, NULL); + _eglDebugReportFull(EGL_BAD_ALLOC, funcName, funcName, EGL_DEBUG_MSG_CRITICAL_KHR, NULL, NULL); return EGL_FALSE; } } @@ -308,7 +326,7 @@ eglGetDisplay(EGLNativeDisplayType nativeDisplay) _EGLDisplay *dpy; void *native_display_ptr; - _EGL_FUNC_START(NULL, EGL_NO_DISPLAY); + _EGL_FUNC_START(NULL, EGL_NONE, NULL, EGL_NO_DISPLAY); STATIC_ASSERT(sizeof(void*) == sizeof(nativeDisplay)); native_display_ptr = (void*) nativeDisplay; @@ -324,7 +342,7 @@ eglGetPlatformDisplayEXT(EGLenum platform, void *native_display, { _EGLDisplay *dpy; - _EGL_FUNC_START(NULL, EGL_NO_DISPLAY); + _EGL_FUNC_START(NULL, EGL_NONE, NULL, EGL_NO_DISPLAY); switch (platform) { #ifdef HAVE_X11_PLATFORM @@ -358,7 +376,7 @@ eglGetPlatformDisplay(EGLenum platform, void *native_display, EGLDisplay display; EGLint *int_attribs; - _EGL_FUNC_START(NULL, EGL_NO_DISPLAY); + _EGL_FUNC_START(NULL, EGL_NONE, NULL, EGL_NO_DISPLAY); int_attribs = _eglConvertAttribsToInt(attrib_list); if (attrib_list && !int_attribs) @@ -501,7 +519,7 @@ eglInitialize(EGLDisplay dpy, EGLint *major, EGLint *minor) { _EGLDisplay *disp = _eglLockDisplay(dpy); - _EGL_FUNC_START(disp, EGL_FALSE); + _EGL_FUNC_START(disp, EGL_OBJECT_DISPLAY_KHR, NULL, EGL_FALSE); if (!disp) RETURN_EGL_ERROR(NULL, EGL_BAD_DISPLAY, EGL_FALSE); @@ -553,7 +571,7 @@ eglTerminate(EGLDisplay dpy) { _EGLDisplay *disp = _eglLockDisplay(dpy); - _EGL_FUNC_START(disp, EGL_FALSE); + _EGL_FUNC_START(disp, EGL_OBJECT_DISPLAY_KHR, NULL, EGL_FALSE); if (!disp) RETURN_EGL_ERROR(NULL, EGL_BAD_DISPLAY, EGL_FALSE); @@ -577,13 +595,13 @@ eglQueryString(EGLDisplay dpy, EGLint name) _EGLDisplay *disp; _EGLDriver *drv; - _EGL_FUNC_START(NULL, NULL); - if (dpy == EGL_NO_DISPLAY && name == EGL_EXTENSIONS) { RETURN_EGL_SUCCESS(NULL, _eglGlobal.ClientExtensionString); } disp = _eglLockDisplay(dpy); + + _EGL_FUNC_START(disp, EGL_OBJECT_DISPLAY_KHR, NULL, NULL); _EGL_CHECK_DISPLAY(disp, NULL, drv); switch (name) { @@ -609,7 +627,7 @@ eglGetConfigs(EGLDisplay dpy, EGLConfig *configs, _EGLDriver *drv; EGLBoolean ret; - _EGL_FUNC_START(disp, EGL_FALSE); + _EGL_FUNC_START(disp, EGL_OBJECT_DISPLAY_KHR, NULL, EGL_FALSE); _EGL_CHECK_DISPLAY(disp, EGL_FALSE, drv); ret = drv->API.GetConfigs(drv, disp, configs, config_size, num_config); @@ -626,7 +644,7 @@ eglChooseConfig(EGLDisplay dpy, const EGLint *attrib_list, EGLConfig *configs, _EGLDriver *drv; EGLBoolean ret; - _EGL_FUNC_START(disp, EGL_FALSE); +
Re: [Mesa-dev] [PATCH mesa] i965/blorp: add missing braces
On Wed, Jul 06, 2016 at 05:26:40PM +0100, Eric Engestrom wrote: > diff --git a/src/intel/vulkan/Makefile.sources > b/src/intel/vulkan/Makefile.sources > index aa1459a..7303995 100644 > --- a/src/intel/vulkan/Makefile.sources > +++ b/src/intel/vulkan/Makefile.sources > @@ -66,7 +66,8 @@ VULKAN_GEM_STUB_FILES := \ > > VULKAN_GENERATED_FILES := \ > anv_entrypoints.c \ > - anv_entrypoints.h > + anv_entrypoints.h \ > + anv_timestamp.h > > > GEN7_FILES := \ Unrelated hunk, sorry. Resending in a minute. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/6] EGL: Implement eglLableObjectKHR.
Added a label to the _EGLThreadInfo, _EGLDisplay, and EGLResource structs. Implemented the function eglLabelObjectKHR. --- src/egl/main/eglapi.c | 64 +++ src/egl/main/eglcurrent.c | 10 src/egl/main/eglcurrent.h | 5 src/egl/main/egldisplay.h | 4 +++ 4 files changed, 83 insertions(+) diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index 4700dbe..bba8a98 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -1809,6 +1809,68 @@ eglExportDMABUFImageMESA(EGLDisplay dpy, EGLImage image, RETURN_EGL_EVAL(disp, ret); } +static EGLint EGLAPIENTRY +eglLabelObjectKHR( + EGLDisplay dpy, + EGLenum objectType, + EGLObjectKHR object, + EGLLabelKHR label) +{ + if (objectType == EGL_OBJECT_THREAD_KHR) { + _EGLThreadInfo *t = _eglGetCurrentThread(); + if (!_eglIsCurrentThreadDummy()) { + t->Label = label; + } + return EGL_SUCCESS; + } else { + _EGLDisplay *disp = _eglLookupDisplay(dpy); + if (disp == NULL) { + _eglError(EGL_BAD_DISPLAY, "eglLabelObjectKHR"); + return EGL_BAD_DISPLAY; + } + + if (objectType == EGL_OBJECT_DISPLAY_KHR) { + if (dpy != (EGLDisplay) object) { +_eglError(EGL_BAD_PARAMETER, "eglLabelObjectKHR"); +return EGL_BAD_PARAMETER; + } + disp->Label = label; + return EGL_SUCCESS; + } else { + _EGLResourceType type; + switch (objectType) + { +case EGL_OBJECT_CONTEXT_KHR: + type = _EGL_RESOURCE_CONTEXT; + break; +case EGL_OBJECT_SURFACE_KHR: + type = _EGL_RESOURCE_SURFACE; + break; +case EGL_OBJECT_IMAGE_KHR: + type = _EGL_RESOURCE_IMAGE; + break; +case EGL_OBJECT_SYNC_KHR: + type = _EGL_RESOURCE_SYNC; + break; +case EGL_OBJECT_STREAM_KHR: +default: +_eglError(EGL_BAD_PARAMETER, "eglLabelObjectKHR"); + return EGL_BAD_PARAMETER; + } + + if (_eglCheckResource(object, type, disp)) { +_EGLResource *res = (_EGLResource *) object; +res->Label = label; +return EGL_SUCCESS; + } else { +_eglError(EGL_BAD_PARAMETER, "eglLabelObjectKHR"); +return EGL_BAD_PARAMETER; + } + } + } +} + + __eglMustCastToProperFunctionPointerType EGLAPIENTRY eglGetProcAddress(const char *procname) { @@ -1888,6 +1950,7 @@ eglGetProcAddress(const char *procname) { "eglGetSyncValuesCHROMIUM", (_EGLProc) eglGetSyncValuesCHROMIUM }, { "eglExportDMABUFImageQueryMESA", (_EGLProc) eglExportDMABUFImageQueryMESA }, { "eglExportDMABUFImageMESA", (_EGLProc) eglExportDMABUFImageMESA }, + { "eglLabelObjectKHR", (_EGLProc) eglLabelObjectKHR }, { NULL, NULL } }; EGLint i; @@ -1981,3 +2044,4 @@ MesaGLInteropEGLExportObject(EGLDisplay dpy, EGLContext context, _eglUnlockDisplay(disp); return ret; } + diff --git a/src/egl/main/eglcurrent.c b/src/egl/main/eglcurrent.c index 835631d..e75e804 100644 --- a/src/egl/main/eglcurrent.c +++ b/src/egl/main/eglcurrent.c @@ -290,3 +290,13 @@ _eglError(EGLint errCode, const char *msg) return EGL_FALSE; } + +/** + * Returns the label set for the current thread. + */ +EGLLabelKHR _eglGetThreadLabel(void) +{ + _EGLThreadInfo *t = _eglGetCurrentThread(); + return t->Label; +} + diff --git a/src/egl/main/eglcurrent.h b/src/egl/main/eglcurrent.h index 1e386ac..ce926aa 100644 --- a/src/egl/main/eglcurrent.h +++ b/src/egl/main/eglcurrent.h @@ -60,6 +60,8 @@ struct _egl_thread_info _EGLContext *CurrentContexts[_EGL_API_NUM_APIS]; /* use index for fast access to current context */ EGLint CurrentAPIIndex; + + EGLLabelKHR Label; }; @@ -118,6 +120,9 @@ _eglGetCurrentContext(void); extern EGLBoolean _eglError(EGLint errCode, const char *msg); +extern EGLLabelKHR +_eglGetThreadLabel(void); + #ifdef __cplusplus } diff --git a/src/egl/main/egldisplay.h b/src/egl/main/egldisplay.h index 6bfc858..d27f63a 100644 --- a/src/egl/main/egldisplay.h +++ b/src/egl/main/egldisplay.h @@ -79,6 +79,8 @@ struct _egl_resource EGLBoolean IsLinked; EGLint RefCount; + EGLLabelKHR Label; + /* used to link resources of the same type */ _EGLResource *Next; }; @@ -165,6 +167,8 @@ struct _egl_display /* lists of resources */ _EGLResource *ResourceLists[_EGL_NUM_RESOURCES]; + + EGLLabelKHR Label; }; -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/6] EGL: Implement remaining functions from EGL_KHR_debug.
Implemented eglDebugMessageControlKHR and eglQueryDebugKHR. Added entries in _egl_global to hold the debug callback and the set of enabled message types. Added a _eglDebugReport function to report a debug message, plus some macros for each of the message types. Still to do is to relace existing calls to _eglError with _eglDebugReport. --- src/egl/main/eglapi.c | 64 +++ src/egl/main/eglcurrent.c | 36 -- src/egl/main/eglcurrent.h | 15 +++ src/egl/main/eglglobals.c | 5 +++- src/egl/main/eglglobals.h | 15 +++ 5 files changed, 132 insertions(+), 3 deletions(-) diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index bba8a98..5220f98 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -1870,6 +1870,68 @@ eglLabelObjectKHR( } } +static EGLint +eglDebugMessageControlKHR(EGLDEBUGPROCKHR callback, const EGLAttrib *attrib_list) +{ + mtx_lock(_eglGlobal.Mutex); + + if (callback != NULL) { + if (attrib_list != NULL) { + unsigned int newEnabled = _eglGlobal.debugTypesEnabled; + int i; + + for (i = 0; attrib_list[i] != EGL_NONE; i += 2) { +if (attrib_list[i] >= EGL_DEBUG_MSG_CRITICAL_KHR && + attrib_list[i] <= EGL_DEBUG_MSG_INFO_KHR) { + if (attrib_list[i + 1]) { + newEnabled |= DebugBitFromType(attrib_list[i]); + } else { + newEnabled &= ~DebugBitFromType(attrib_list[i]); + } +} else { + // On error, set the last error code, call the current + // debug callback, and return the error code. + mtx_unlock(_eglGlobal.Mutex); + _eglReportError(EGL_BAD_ATTRIBUTE, "eglDebugMessageControlKHR", NULL, + "Invalid attribute 0x%04lx", (unsigned long) attrib_list[i]); + return EGL_BAD_ATTRIBUTE; +} + } + + _eglGlobal.debugCallback = callback; + _eglGlobal.debugTypesEnabled = newEnabled; + } + } else { + _eglGlobal.debugCallback = NULL; + _eglGlobal.debugTypesEnabled = _EGL_DEBUG_BIT_CRITICAL | _EGL_DEBUG_BIT_ERROR; + } + + mtx_unlock(_eglGlobal.Mutex); + return EGL_SUCCESS; +} + +static EGLBoolean +eglQueryDebugKHR(EGLint attribute, EGLAttrib *value) +{ + mtx_lock(_eglGlobal.Mutex); + if (attribute >= EGL_DEBUG_MSG_CRITICAL_KHR && + attribute <= EGL_DEBUG_MSG_INFO_KHR) { + if (_eglGlobal.debugTypesEnabled & DebugBitFromType(attribute)) { + *value = EGL_TRUE; + } else { + *value = EGL_FALSE; + } + } else if (attribute == EGL_DEBUG_CALLBACK_KHR) { + *value = (EGLAttrib) _eglGlobal.debugCallback; + } else { + mtx_unlock(_eglGlobal.Mutex); + _eglReportError(EGL_BAD_ATTRIBUTE, "eglQueryDebugKHR", NULL, + "Invalid attribute 0x%04lx", (unsigned long) attribute); + return EGL_FALSE; + } + mtx_unlock(_eglGlobal.Mutex); + return EGL_TRUE; +} __eglMustCastToProperFunctionPointerType EGLAPIENTRY eglGetProcAddress(const char *procname) @@ -1951,6 +2013,8 @@ eglGetProcAddress(const char *procname) { "eglExportDMABUFImageQueryMESA", (_EGLProc) eglExportDMABUFImageQueryMESA }, { "eglExportDMABUFImageMESA", (_EGLProc) eglExportDMABUFImageMESA }, { "eglLabelObjectKHR", (_EGLProc) eglLabelObjectKHR }, + { "eglDebugMessageControlKHR", (_EGLProc) eglDebugMessageControlKHR }, + { "eglQueryDebugKHR", (_EGLProc) eglQueryDebugKHR }, { NULL, NULL } }; EGLint i; diff --git a/src/egl/main/eglcurrent.c b/src/egl/main/eglcurrent.c index e75e804..5816967 100644 --- a/src/egl/main/eglcurrent.c +++ b/src/egl/main/eglcurrent.c @@ -26,8 +26,10 @@ **/ +#include #include #include +#include #include "c99_compat.h" #include "c11/threads.h" @@ -35,7 +37,6 @@ #include "eglcurrent.h" #include "eglglobals.h" - /* This should be kept in sync with _eglInitThreadInfo() */ #define _EGL_THREAD_INFO_INITIALIZER \ { EGL_SUCCESS, { NULL }, 0 } @@ -294,9 +295,40 @@ _eglError(EGLint errCode, const char *msg) /** * Returns the label set for the current thread. */ -EGLLabelKHR _eglGetThreadLabel(void) +EGLLabelKHR +_eglGetThreadLabel(void) { _EGLThreadInfo *t = _eglGetCurrentThread(); return t->Label; } +void +_eglDebugReport(EGLenum error, const char *command, EGLint type, EGLLabelKHR objectLabel, const char *message, ...) +{ + EGLDEBUGPROCKHR callback = NULL; + + mtx_lock(_eglGlobal.Mutex); + if (_eglGlobal.debugTypesEnabled & DebugBitFromType(type)) { + callback = _eglGlobal.debugCallback; + } + mtx_unlock(_eglGlobal.Mutex); + + if (callback != NULL) { + char *buf = NULL; + + if (message != NULL) { + va_list args; + va_start(args, message); + if (vasprintf(, message,
[Mesa-dev] [PATCH 1/6] EGL: Update eglext.h.
Updated eglext.h to revision 32074 from the Khronos repository. Added two #includes to egltypedefs.h. Both were in the previous version of eglext.h but not in the new one. --- include/EGL/eglext.h | 36 ++-- src/egl/main/egltypedefs.h | 2 ++ 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/include/EGL/eglext.h b/include/EGL/eglext.h index 6043b37..40a2233 100644 --- a/include/EGL/eglext.h +++ b/include/EGL/eglext.h @@ -6,7 +6,7 @@ extern "C" { #endif /* -** Copyright (c) 2013-2014 The Khronos Group Inc. +** Copyright (c) 2013-2015 The Khronos Group Inc. ** ** Permission is hereby granted, free of charge, to any person obtaining a ** copy of this software and/or associated documentation files (the @@ -33,12 +33,12 @@ extern "C" { ** used to make the header, and the header can be found at ** http://www.opengl.org/registry/ ** -** Khronos $Revision$ on $Date$ +** Khronos $Revision: 32074 $ on $Date: 2015-09-30 10:36:02 -0700 (Wed, 30 Sep 2015) $ */ #include -#define EGL_EGLEXT_VERSION 20150508 +#define EGL_EGLEXT_VERSION 20150930 /* Generated C header for: * API: egl @@ -99,6 +99,33 @@ EGLAPI EGLSyncKHR EGLAPIENTRY eglCreateSync64KHR (EGLDisplay dpy, EGLenum type, #define EGL_CONTEXT_OPENGL_NO_ERROR_KHR 0x31B3 #endif /* EGL_KHR_create_context_no_error */ +#ifndef EGL_KHR_debug +#define EGL_KHR_debug 1 +typedef void *EGLLabelKHR; +typedef void *EGLObjectKHR; +typedef void (EGLAPIENTRY *EGLDEBUGPROCKHR)(EGLenum error,const char *command,EGLint messageType,EGLLabelKHR threadLabel,EGLLabelKHR objectLabel,const char* message); +#define EGL_OBJECT_THREAD_KHR 0x33B0 +#define EGL_OBJECT_DISPLAY_KHR0x33B1 +#define EGL_OBJECT_CONTEXT_KHR0x33B2 +#define EGL_OBJECT_SURFACE_KHR0x33B3 +#define EGL_OBJECT_IMAGE_KHR 0x33B4 +#define EGL_OBJECT_SYNC_KHR 0x33B5 +#define EGL_OBJECT_STREAM_KHR 0x33B6 +#define EGL_DEBUG_MSG_CRITICAL_KHR0x33B9 +#define EGL_DEBUG_MSG_ERROR_KHR 0x33BA +#define EGL_DEBUG_MSG_WARN_KHR0x33BB +#define EGL_DEBUG_MSG_INFO_KHR0x33BC +#define EGL_DEBUG_CALLBACK_KHR0x33B8 +typedef EGLint (EGLAPIENTRYP PFNEGLDEBUGMESSAGECONTROLKHRPROC) (EGLDEBUGPROCKHR callback, const EGLAttrib *attrib_list); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYDEBUGKHRPROC) (EGLint attribute, EGLAttrib *value); +typedef EGLint (EGLAPIENTRYP PFNEGLLABELOBJECTKHRPROC) (EGLDisplay display, EGLenum objectType, EGLObjectKHR object, EGLLabelKHR label); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLint EGLAPIENTRY eglDebugMessageControlKHR (EGLDEBUGPROCKHR callback, const EGLAttrib *attrib_list); +EGLAPI EGLBoolean EGLAPIENTRY eglQueryDebugKHR (EGLint attribute, EGLAttrib *value); +EGLAPI EGLint EGLAPIENTRY eglLabelObjectKHR (EGLDisplay display, EGLenum objectType, EGLObjectKHR object, EGLLabelKHR label); +#endif +#endif /* EGL_KHR_debug */ + #ifndef EGL_KHR_fence_sync #define EGL_KHR_fence_sync 1 typedef khronos_utime_nanoseconds_t EGLTimeKHR; @@ -879,9 +906,6 @@ EGLAPI EGLuint64NV EGLAPIENTRY eglGetSystemTimeNV (void); #define EGL_NATIVE_SURFACE_TIZEN 0x32A1 #endif /* EGL_TIZEN_image_native_surface */ -#include -#include - #ifdef __cplusplus } #endif diff --git a/src/egl/main/egltypedefs.h b/src/egl/main/egltypedefs.h index 7facdb4..f20af44 100644 --- a/src/egl/main/egltypedefs.h +++ b/src/egl/main/egltypedefs.h @@ -33,6 +33,8 @@ #include #include +#include +#include #include "eglcompiler.h" -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/6] EGL: Call the EGL_KHR_debug callback on errors.
Added a member to _EGLThreadInfo to hold the name of the current EGL function. Each EGL entrypoint will now set that at the beginning. _eglError will now call the debug callback function, using the function name stored in the current _EGLThreadInfo struct. This should allow the EGL_KHR_debug callback to work correctly without having to rewrite all of the _eglError calls. It also avoids having to pass the EGL function names down to driver and platform functions that may be called from multiple entrypoints. This is really the bare minimum functionality for EGL_KHR_debug, since the callback will be missing object labels and messages in most cases. Later changes can update the _eglError calls to provide more info. --- src/egl/main/eglapi.c | 142 -- src/egl/main/eglcurrent.c | 35 ++-- src/egl/main/eglcurrent.h | 26 + src/egl/main/eglglobals.c | 5 +- 4 files changed, 188 insertions(+), 20 deletions(-) diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index 5220f98..6e39bca 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -250,6 +250,27 @@ _eglUnlockDisplay(_EGLDisplay *dpy) mtx_unlock(>Mutex); } +#define _EGL_FUNC_START(disp, ret) \ + do { \ + if (!_eglSetFuncName(__func__)) { \ + if (disp) \ +_eglUnlockDisplay(disp); \ + return ret; \ + } \ + } while(0) + +static EGLBoolean +_eglSetFuncName(const char *funcName) +{ + _EGLThreadInfo *thr = _eglGetCurrentThread(); + if (!_eglIsCurrentThreadDummy()) { + thr->CurrentFuncName = funcName; + return EGL_TRUE; + } else { + _eglDebugReport(EGL_BAD_ALLOC, funcName, funcName, EGL_DEBUG_MSG_CRITICAL_KHR, NULL, NULL); + return EGL_FALSE; + } +} static EGLint * _eglConvertAttribsToInt(const EGLAttrib *attr_list) @@ -287,6 +308,8 @@ eglGetDisplay(EGLNativeDisplayType nativeDisplay) _EGLDisplay *dpy; void *native_display_ptr; + _EGL_FUNC_START(NULL, EGL_NO_DISPLAY); + STATIC_ASSERT(sizeof(void*) == sizeof(nativeDisplay)); native_display_ptr = (void*) nativeDisplay; @@ -301,6 +324,8 @@ eglGetPlatformDisplayEXT(EGLenum platform, void *native_display, { _EGLDisplay *dpy; + _EGL_FUNC_START(NULL, EGL_NO_DISPLAY); + switch (platform) { #ifdef HAVE_X11_PLATFORM case EGL_PLATFORM_X11_EXT: @@ -331,8 +356,11 @@ eglGetPlatformDisplay(EGLenum platform, void *native_display, const EGLAttrib *attrib_list) { EGLDisplay display; - EGLint *int_attribs = _eglConvertAttribsToInt(attrib_list); + EGLint *int_attribs; + + _EGL_FUNC_START(NULL, EGL_NO_DISPLAY); + int_attribs = _eglConvertAttribsToInt(attrib_list); if (attrib_list && !int_attribs) RETURN_EGL_ERROR(NULL, EGL_BAD_ALLOC, NULL); @@ -473,6 +501,8 @@ eglInitialize(EGLDisplay dpy, EGLint *major, EGLint *minor) { _EGLDisplay *disp = _eglLockDisplay(dpy); + _EGL_FUNC_START(disp, EGL_FALSE); + if (!disp) RETURN_EGL_ERROR(NULL, EGL_BAD_DISPLAY, EGL_FALSE); @@ -523,6 +553,8 @@ eglTerminate(EGLDisplay dpy) { _EGLDisplay *disp = _eglLockDisplay(dpy); + _EGL_FUNC_START(disp, EGL_FALSE); + if (!disp) RETURN_EGL_ERROR(NULL, EGL_BAD_DISPLAY, EGL_FALSE); @@ -545,6 +577,8 @@ eglQueryString(EGLDisplay dpy, EGLint name) _EGLDisplay *disp; _EGLDriver *drv; + _EGL_FUNC_START(NULL, NULL); + if (dpy == EGL_NO_DISPLAY && name == EGL_EXTENSIONS) { RETURN_EGL_SUCCESS(NULL, _eglGlobal.ClientExtensionString); } @@ -575,6 +609,8 @@ eglGetConfigs(EGLDisplay dpy, EGLConfig *configs, _EGLDriver *drv; EGLBoolean ret; + _EGL_FUNC_START(disp, EGL_FALSE); + _EGL_CHECK_DISPLAY(disp, EGL_FALSE, drv); ret = drv->API.GetConfigs(drv, disp, configs, config_size, num_config); @@ -590,6 +626,8 @@ eglChooseConfig(EGLDisplay dpy, const EGLint *attrib_list, EGLConfig *configs, _EGLDriver *drv; EGLBoolean ret; + _EGL_FUNC_START(disp, EGL_FALSE); + _EGL_CHECK_DISPLAY(disp, EGL_FALSE, drv); ret = drv->API.ChooseConfig(drv, disp, attrib_list, configs, config_size, num_config); @@ -607,6 +645,8 @@ eglGetConfigAttrib(EGLDisplay dpy, EGLConfig config, _EGLDriver *drv; EGLBoolean ret; + _EGL_FUNC_START(disp, EGL_FALSE); + _EGL_CHECK_CONFIG(disp, conf, EGL_FALSE, drv); ret = drv->API.GetConfigAttrib(drv, disp, conf, attribute, value); @@ -625,6 +665,8 @@ eglCreateContext(EGLDisplay dpy, EGLConfig config, EGLContext share_list, _EGLContext *context; EGLContext ret; + _EGL_FUNC_START(disp, EGL_NO_CONTEXT); + _EGL_CHECK_DISPLAY(disp, EGL_NO_CONTEXT, drv); if (!config && !disp->Extensions.MESA_configless_context) @@ -648,6 +690,8 @@ eglDestroyContext(EGLDisplay dpy, EGLContext ctx) _EGLDriver *drv; EGLBoolean ret; + _EGL_FUNC_START(disp, EGL_FALSE); +
[Mesa-dev] EGL: Implement EGL_KHR_debug
This is a set of patches to implement the EGL_KHR_debug extension. In addition to hopefully being useful in its own right, the current draft of the libglvnd interface for EGL requires each vendor library to support EGL_KHR_debug. The same functions should work for both a normal and libglvnd-based driver. Rather than try to update every _eglError call all at once, I used the _EGLThreadInfo struct to record the current function name and object label, so all the existing _eglError calls will call the debug callback with the correct command and label. They won't have any error messages with them, but those messages can be added in later changes. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 6/6] EGL: Fix some command names for EGL_KHR_debug.
Change a few EGL entrypoints to call a common internal function instead of forwarding to another entrypoint. If one EGL entrypoint calls another, then the second entrypoint would overwrite the current function name in the _EGLThreadInfo struct. That would cause it to pass the wrong function name to the EGL_KHR_debug callback. --- src/egl/main/eglapi.c | 194 ++ 1 file changed, 115 insertions(+), 79 deletions(-) diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index 038cea0..de37120 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -326,7 +326,7 @@ eglGetDisplay(EGLNativeDisplayType nativeDisplay) _EGLDisplay *dpy; void *native_display_ptr; - _EGL_FUNC_START(NULL, EGL_NONE, NULL, EGL_NO_DISPLAY); + _EGL_FUNC_START(NULL, EGL_OBJECT_THREAD_KHR, NULL, EGL_NO_DISPLAY); STATIC_ASSERT(sizeof(void*) == sizeof(nativeDisplay)); native_display_ptr = (void*) nativeDisplay; @@ -336,14 +336,12 @@ eglGetDisplay(EGLNativeDisplayType nativeDisplay) return _eglGetDisplayHandle(dpy); } -static EGLDisplay EGLAPIENTRY -eglGetPlatformDisplayEXT(EGLenum platform, void *native_display, +static EGLDisplay +_eglGetPlatformDisplayCommon(EGLenum platform, void *native_display, const EGLint *attrib_list) { _EGLDisplay *dpy; - _EGL_FUNC_START(NULL, EGL_NONE, NULL, EGL_NO_DISPLAY); - switch (platform) { #ifdef HAVE_X11_PLATFORM case EGL_PLATFORM_X11_EXT: @@ -369,6 +367,14 @@ eglGetPlatformDisplayEXT(EGLenum platform, void *native_display, return _eglGetDisplayHandle(dpy); } +static EGLDisplay EGLAPIENTRY +eglGetPlatformDisplayEXT(EGLenum platform, void *native_display, + const EGLint *attrib_list) +{ + _EGL_FUNC_START(NULL, EGL_OBJECT_THREAD_KHR, NULL, EGL_NO_DISPLAY); + return _eglGetPlatformDisplayCommon(platform, native_display, attrib_list); +} + EGLDisplay EGLAPIENTRY eglGetPlatformDisplay(EGLenum platform, void *native_display, const EGLAttrib *attrib_list) @@ -376,13 +382,13 @@ eglGetPlatformDisplay(EGLenum platform, void *native_display, EGLDisplay display; EGLint *int_attribs; - _EGL_FUNC_START(NULL, EGL_NONE, NULL, EGL_NO_DISPLAY); + _EGL_FUNC_START(NULL, EGL_OBJECT_THREAD_KHR, NULL, EGL_NO_DISPLAY); int_attribs = _eglConvertAttribsToInt(attrib_list); if (attrib_list && !int_attribs) RETURN_EGL_ERROR(NULL, EGL_BAD_ALLOC, NULL); - display = eglGetPlatformDisplayEXT(platform, native_display, int_attribs); + display = _eglGetPlatformDisplayCommon(platform, native_display, int_attribs); free(int_attribs); return display; } @@ -788,7 +794,8 @@ eglQueryContext(EGLDisplay dpy, EGLContext ctx, static EGLSurface _eglCreateWindowSurfaceCommon(_EGLDisplay *disp, EGLConfig config, - void *native_window, const EGLint *attrib_list) + void *native_window, const EGLint *attrib_list, + EGLBoolean fromPlatform) { _EGLConfig *conf = _eglLookupConfig(config, disp); _EGLDriver *drv; @@ -797,6 +804,19 @@ _eglCreateWindowSurfaceCommon(_EGLDisplay *disp, EGLConfig config, _EGL_CHECK_CONFIG(disp, conf, EGL_NO_SURFACE, drv); +#ifdef HAVE_X11_PLATFORM + if (fromPlatform && disp->Platform == _EGL_PLATFORM_X11 && native_window != NULL) { + /* The `native_window` parameter for the X11 platform differs between + * eglCreateWindowSurface() and eglCreatePlatformPixmapSurfaceEXT(). In + * eglCreateWindowSurface(), the type of `native_window` is an Xlib + * `Window`. In eglCreatePlatformWindowSurfaceEXT(), the type is + * `Window*`. Convert `Window*` to `Window` because that's what + * dri2_x11_create_window_surface() expects. + */ + native_window = (void*) (* (Window*) native_window); + } +#endif + if (native_window == NULL) RETURN_EGL_ERROR(disp, EGL_BAD_NATIVE_WINDOW, EGL_NO_SURFACE); @@ -816,7 +836,7 @@ eglCreateWindowSurface(EGLDisplay dpy, EGLConfig config, _EGL_FUNC_START(disp, EGL_OBJECT_DISPLAY_KHR, NULL, EGL_NO_SURFACE); STATIC_ASSERT(sizeof(void*) == sizeof(window)); return _eglCreateWindowSurfaceCommon(disp, config, (void*) window, -attrib_list); +attrib_list, EGL_FALSE); } @@ -827,22 +847,8 @@ eglCreatePlatformWindowSurfaceEXT(EGLDisplay dpy, EGLConfig config, { _EGLDisplay *disp = _eglLockDisplay(dpy); _EGL_FUNC_START(disp, EGL_OBJECT_DISPLAY_KHR, NULL, EGL_NO_SURFACE); - -#ifdef HAVE_X11_PLATFORM - if (disp->Platform == _EGL_PLATFORM_X11 && native_window != NULL) { - /* The `native_window` parameter for the X11 platform differs between - * eglCreateWindowSurface() and eglCreatePlatformPixmapSurfaceEXT(). In - * eglCreateWindowSurface(), the type of `native_window` is an Xlib - * `Window`.
[Mesa-dev] [PATCH mesa] i965/blorp: add missing braces
Signed-off-by: Eric Engestrom--- I know nothing about blorp, but GCC6 noticed the weird indentation, and my best guess looking at the code is that there are missing braces. CC gen7_blorp.lo gen7_blorp.c: In function ‘gen7_blorp_exec’: gen7_blorp.c:797:4: warning: this ‘if’ clause does not guard... [-Wmisleading-indentation] if (params->wm_prog_data) ^~ gen7_blorp.c:800:7: note: ...this statement, but the latter is misleadingly indented as if it is guarded by the ‘if’ gen7_blorp_emit_constant_ps_disable(brw); ^~~ If this is wrong, then the second line should be de-indented. --- src/intel/vulkan/Makefile.sources | 3 ++- src/mesa/drivers/dri/i965/gen7_blorp.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/intel/vulkan/Makefile.sources b/src/intel/vulkan/Makefile.sources index aa1459a..7303995 100644 --- a/src/intel/vulkan/Makefile.sources +++ b/src/intel/vulkan/Makefile.sources @@ -66,7 +66,8 @@ VULKAN_GEM_STUB_FILES := \ VULKAN_GENERATED_FILES := \ anv_entrypoints.c \ - anv_entrypoints.h + anv_entrypoints.h \ + anv_timestamp.h GEN7_FILES := \ diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.c b/src/mesa/drivers/dri/i965/gen7_blorp.c index 7201549..f40e445 100644 --- a/src/mesa/drivers/dri/i965/gen7_blorp.c +++ b/src/mesa/drivers/dri/i965/gen7_blorp.c @@ -794,10 +794,11 @@ gen7_blorp_exec(struct brw_context *brw, gen6_blorp_emit_clip_disable(brw); gen7_blorp_emit_sf_config(brw, params); gen7_blorp_emit_wm_config(brw, params); - if (params->wm_prog_data) + if (params->wm_prog_data) { gen7_blorp_emit_binding_table_pointers_ps(brw, wm_bind_bo_offset); gen7_blorp_emit_constant_ps_disable(brw); + } if (params->src.mt) { const uint32_t sampler_offset = -- 2.9.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/4] anv/image: Simplify initialization of the isl_tiling
On Fri 01 Jul 2016, Nanley Chery wrote: > On Fri, Jul 01, 2016 at 02:24:19PM -0700, Chad Versace wrote: > > I don't agree with this patch. > > > > Locally, the patch look correct. But when you consider that > > anv_image_create() is public within the driver, the patch makes the code > > fragile. Pre-patch, if the caller of anv_image_create() sets > > anv_image_create_info::vk_info::tiling and leaves > > anv_image_create_info::isl_tiling_flags unset (which I believe should be > > a valid combination), then anv_image_create() correctly converts the > > VkImageTilingFlags to isl_tiling_flags. Post-patch, that's no longer the > > case; anv_image_create() ignores its VkImageTiling input. > > Thanks for finding that bug. > > Your description has actually pointed out an issue in the current code: > If an internal caller specifies > anv_image_create_info::vk_info::tiling = VK_IMAGE_TILING_OPTIMAL > and leaves anv_image_create_info::isl_tiling_flags unset, then > anv_image_create() ignores the VkImageTiling input and causes ISL to > fail image creation later. > > To solve this problem, I think we should define ::isl_tiling_flags to be a > opt-in bit-mask which works with the requested ::vk_info::tiling to provide > more specificity on the actual desired tiling. With this in mind, we can drop > the last two hunks from the above patch and replace the first with the > following: > ` > isl_tiling_flags_t tiling_flags = > (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR ? > ISL_TILING_LINEAR_BIT : ISL_TILING_ANY_MASK); > if (anv_info->isl_tiling_flags) > tiling_flags &= anv_info->isl_tiling_flags; > assert (tiling_flags); > ` > What do you think? Yes, I like that change. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH mesa] i965/docs: update Intel Linux Graphics URLs
Signed-off-by: Eric Engestrom--- docs/developers.html | 2 +- docs/faq.html| 2 +- src/mesa/drivers/dri/i965/brw_defines.h | 2 +- src/mesa/drivers/dri/i965/brw_sf_state.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/developers.html b/docs/developers.html index 86844cc..85b6979 100644 --- a/docs/developers.html +++ b/docs/developers.html @@ -38,7 +38,7 @@ Other companies including -http://www.intellinuxgraphics.org/index.html;>Intel +https://01.org/linuxgraphics;>Intel and RedHat also actively contribute to the project. Intel has recently contributed the new GLSL compiler in Mesa 7.9. diff --git a/docs/faq.html b/docs/faq.html index b7c6fbf..4e79bb1 100644 --- a/docs/faq.html +++ b/docs/faq.html @@ -57,7 +57,7 @@ 1. High-level Questions and Answers See the http://dri.freedesktop.org/;>DRI website for more information. - See http://intellinuxgraphics.org;>intellinuxgraphics.org + See https://01.org/linuxgraphics;>01.org for more information about Intel drivers. See http://nouveau.freedesktop.org;>nouveau.freedesktop.org for more information about Nouveau drivers. diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index e7d1a9f..d2cd53a 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -247,7 +247,7 @@ * Chipset Graphics Controller Programmer's Reference Manual, * Volume 2: 3D/Media", Revision 1.0b as of January 2008, * available at - * http://intellinuxgraphics.org/documentation.html + * https://01.org/linuxgraphics/documentation/hardware-specification-prms * at the time of this writing). * * These appear to be supported on at least some diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index 8eab671..89406fc 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -245,7 +245,7 @@ static void upload_sf_unit( struct brw_context *brw ) * Chipset Graphics Controller Programmer's Reference Manual, * Volume 2: 3D/Media", Revision 1.0b as of January 2008, * available at - * http://intellinuxgraphics.org/documentation.html + * https://01.org/linuxgraphics/documentation/hardware-specification-prms * at the time of this writing). * * It does work on at least some devices, if not all; -- 2.9.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/3] radeonsi: catch a potential state tracker error with non-MSAA FBs
From: Nicolai HähnleAt least st/mesa ensures this, so I'd rather not handle deviations in radeonsi. --- src/gallium/drivers/radeonsi/si_state.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index ee92f15..df6b610 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -3193,6 +3193,12 @@ static void si_emit_sample_mask(struct si_context *sctx, struct r600_atom *atom) struct radeon_winsys_cs *cs = sctx->b.gfx.cs; unsigned mask = sctx->sample_mask.sample_mask; + /* Needed for line and polygon smoothing as well as for the Polaris +* small primitive filter. We expect the state tracker to take care of +* this for us. +*/ + assert(mask == 0x || sctx->framebuffer.nr_samples > 1); + radeon_set_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2); radeon_emit(cs, mask | (mask << 16)); radeon_emit(cs, mask | (mask << 16)); -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/3] radeonsi: explicitly choose center locations for 1xAA on Polaris
From: Nicolai HähnleUnlike SC, the small primitive filter does not automatically use center locations in 1xAA mode, so this is needed to avoid artifacts caused by the small primitive filter discarding triangles that it shouldn't. As a side effect of how the effective number of samples is now calculated, this patch also avoids submitting the sample locations for line/poly smoothing when they're not really needed. Cc: 12.0 --- src/gallium/drivers/radeon/cayman_msaa.c| 7 src/gallium/drivers/radeonsi/si_hw_context.c| 3 +- src/gallium/drivers/radeonsi/si_pipe.h | 7 +++- src/gallium/drivers/radeonsi/si_state.c | 45 - src/gallium/drivers/radeonsi/si_state_shaders.c | 4 +++ 5 files changed, 48 insertions(+), 18 deletions(-) diff --git a/src/gallium/drivers/radeon/cayman_msaa.c b/src/gallium/drivers/radeon/cayman_msaa.c index 89c4937..33f1040 100644 --- a/src/gallium/drivers/radeon/cayman_msaa.c +++ b/src/gallium/drivers/radeon/cayman_msaa.c @@ -143,6 +143,13 @@ void cayman_init_msaa(struct pipe_context *ctx) void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples) { switch (nr_samples) { + default: + case 1: + radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 0); + radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, 0); + radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, 0); + radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, 0); + break; case 2: radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_2x[0]); radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_2x[1]); diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index 500eca7..f36a7a0 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -207,7 +207,8 @@ void si_begin_new_cs(struct si_context *ctx) si_mark_atom_dirty(ctx, >clip_regs); si_mark_atom_dirty(ctx, >clip_state.atom); - si_mark_atom_dirty(ctx, >msaa_sample_locs); + ctx->msaa_sample_locs.nr_samples = 0; + si_mark_atom_dirty(ctx, >msaa_sample_locs.atom); si_mark_atom_dirty(ctx, >msaa_config); si_mark_atom_dirty(ctx, >sample_mask.atom); si_mark_atom_dirty(ctx, >cb_render_state); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 1f63c12..326b819 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -181,6 +181,11 @@ struct si_clip_state { struct pipe_clip_state state; }; +struct si_sample_locs { + struct r600_atomatom; + unsignednr_samples; +}; + struct si_sample_mask { struct r600_atomatom; uint16_tsample_mask; @@ -225,7 +230,7 @@ struct si_context { /* Atom declarations. */ struct r600_atomcache_flush; struct si_framebuffer framebuffer; - struct r600_atommsaa_sample_locs; + struct si_sample_locs msaa_sample_locs; struct r600_atomdb_render_state; struct r600_atommsaa_config; struct si_sample_mask sample_mask; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 4182906..ee92f15 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -856,9 +856,13 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state) return; if (sctx->framebuffer.nr_samples > 1 && - (!old_rs || old_rs->multisample_enable != rs->multisample_enable)) + (!old_rs || old_rs->multisample_enable != rs->multisample_enable)) { si_mark_atom_dirty(sctx, >db_render_state); + if (sctx->b.family >= CHIP_POLARIS10) + si_mark_atom_dirty(sctx, >msaa_sample_locs.atom); + } + r600_set_scissor_enable(>b, rs->scissor_enable); si_pm4_bind_state(sctx, rasterizer, rs); @@ -2380,18 +2384,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4; si_set_rw_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS, ); - /* Smoothing (only possible with nr_samples == 1) uses the same -* sample locations as the MSAA it simulates. -* -* Therefore, don't update the sample locations when -* transitioning from
[Mesa-dev] [PATCH 1/3] r600g: call cayman_emit_msaa_sample_locs only when needed
From: Nicolai HähnleIn the case of nr_samples <= 1, that function is (currently) a no-op anyway. --- src/gallium/drivers/r600/evergreen_state.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index fe4f14c..463dc15 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1654,7 +1654,8 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) | EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1); - cayman_emit_msaa_sample_locs(cs, rctx->framebuffer.nr_samples); + if (rctx->framebuffer.nr_samples > 1) + cayman_emit_msaa_sample_locs(cs, rctx->framebuffer.nr_samples); cayman_emit_msaa_config(cs, rctx->framebuffer.nr_samples, rctx->ps_iter_samples, 0, sc_mode_cntl_1); } -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 00/12] render reordering for optimized tile buffer usage
On Sat, Jul 2, 2016 at 12:52 PM, Rob Clarkwrote: > So, games/apps that are aware of how a tiler gpu works will make an > effort to avoid mid-batch (tile pass) updates to textures, UBOs, etc, > since this will force a flush, and extra resolve (tile->mem) and > restore (mem->tile) in the next batch. They also avoid unnecessary > framebuffer switches, for the same reason. > > But turns out that many games, benchmarks, etc, aren't very good at > this. But what if we could re-order the batches (and potentially > shadow texture/UBO/etc resources) to minimize the tile passes and > unnecessary resolve/restore? > > This is based on a rough idea that Eric suggested a while back, and > a few other experiments that I have been trying recently. It boils > down to three parts: > > 1) Add an fd_batch object, which tracks cmdstream being built for that >particular tile pass. State that is global to the tile pass is >move from fd_context to fd_batch. (Mostly the framebuffer state, >but also so internal tracking that is done to decide whether to >use GMEM or sysmem/bypass mode, etc.) > >Tracking of resources written/read in the batch is also moved from >ctx to batch. > > 2) Add a batch-cache. Previously, whenever new framebuffer state is >set, it forced a flush. Now (if reordering is enabled), we use >the framebuffer state as key into a hashtable to map it to an >existing batch (if there is one, otherwise construct a new batch >and add it to the table). > >When a resource is marked as read/written by a batch, which is >already pending access by another batch, a dependency between the >two batches is added. so I noticed one slightly annoying thing.. u_blitter doesn't clear the vtx sampler state, and only updates the first sampler for frag state.. resulting in us adding a bunch of unneeded dependencies during a blitter draw.. not sure if it is better to fix u_blitter or hack around it in the driver.. BR, -R >TODO there is probably a bit more room for improvement here. See >below analysis of supertuxkart. > > 3) Shadow resources. Mid-batch UBO updates or uploading new contents >to an in-use texture is sadly too common. Traditional (non-tiler) >gpu's could solve this with a staging buffer, and blitting from the >staging to real buffer at the appropriate spot in the cmdstream. >But this doesn't work for a tiling gpu, since we'll need the old >contents again when we move on to the next tile. To solve this, >allocate a new buffer and back-blit the previous contents to the >new buffer. The existing buffer becomes a shadow and is unref'd >(the backing GEM object is kept alive since it is referenced by >the cmdstream). > >For example, a texture upload + mipmap gen turns into transfer_map >for level zero (glTexSubImage*, etc), followed by blits to the >remaining mipmap levels (glGenerateMipmap()). So in transfer_map() >if writing new contents into the buffer would trigger a flush or >stall, we shadow the existing buffer, and blit the remaining levels >from old to new. Each blit turns into a batch (different frame- >buffer state), and is not immediately flushed, but just hangs out >in the batch cache. When the next blit (from glGenerateMipmap() >overwrites the contents from the back-blit, we realize this and >drop the previous rendering to the batch, so in many cases the >back-blit ends up discarded. > > > > Results: > > supertuxkart was a big winner, with an overall ~30% boost, making the > new render engine finally playable on most levels. Fps varies a lot > by level, but on average going from 14-19fps to 20-25fps. > > (Sadly, the old render engine, which was much faster on lower end hw, > seems to be in disrepair.) > > I did also add some instrumentation to collect some stats on # of > different sorts of batches. Since supertuxkart --profile-laps is > not repeatable, I could not directly compare results there, but I > could compare an apitrace replay of stk level: > > normal: batch_sysmem=10398, batch_gmem=6958, batch_restore=3864 > reorder: batch_sysmem=16825, batch_gmem=6956, batch_restore=3863 > (for 792 frames) > > I was expecting a drop in gmem batches, and restores, because stk > does two problematic things: (1) render target switches, ie. clear, > switch fb, clear, switch fb, draw, etc., and (2) mid-batch UBO > update. > > I've looked a bit into the render target switches, but it seems like > it is mixing/matching zsbuf and cbuf's in a way that makes them map > to different batches. Ie: > >set fb: zsbuf=A, cbuf[0]=B >clear color0 >clear stencil >set fb: zsbuf=A, cbuf[0]=C >draw > > Not entirely sure what to do about that. I suppose I could track the > cmdstream for the clears individually, and juggle them between batches > somehow to avoid the flush? > > The mid-batch UBO update seems to actually happen between two
Re: [Mesa-dev] [Mesa-stable] [PATCH] radeon uvd add uvd fw version for amdgpu
On 6 July 2016 at 16:09, Jiang, Sonnywrote: > Hi Emil, > > > Are you Okay with these? > It'll do for now. General suggestion - do not try to justifying the patch on the grounds that it fixes a bug. Mention what was wrong and/or why it was resolved in a particular way. A style example/suggestion, which I believe your colleagues will be quite pleased to see. "[PATCH 2/2] radeon: sepArate uvd context buffer from DPB Older Polaris FW does not handle XXX leading to bug in YYY. Newer one resolves that by exposing/requiring the user to separate uvd context buffer from DPB, and provide the AAA data via BBB." Thanks Emil P.S. HTML emails and attached patches is a bad idea for mesa-dev@ (and many other MLs). ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] anv: vulkan: remove the anv_device.$(OBJEXT) rule
So here's a thought: could we make anv_timestamp.h generation trigger off of libvulkan_intel.so getting rebuilt? I'm not quite sure how one would even do that but it seems like a thing you might be able to do... On Jul 6, 2016 8:18 AM, "Emil Velikov"wrote: > From: Emil Velikov > > Atm the actual rule will expand to foo.o which is used for static > libraries only. > > Thus the automake manual recommendation [to use OBJEXT] won't help us, > since since we're working with a shared library. > > Thus let's 'demote' the file and add it back to BUILT_SOURCES. This will > manage all the complexity for us, at the (existing expense) of working > only with the all, check and install targets. > > The crazy (why the issue was hard to spot): > If the dependencies (.deps/*.Plo) are already created one can alter the > anv_device.$(OBJEXT) line and/or nuke it all together. That won't lead > to any warnings/issues, even though the Makefile is regenerated. > > Moral of the story: > Always rm -rf top_builddir or don't resolve the dependencies manually > and use BUILT_SOURCES. > > Cc: "12.0" > Cc: Vinson Lee > Cc: Kenneth Graunke > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96825 > Fixes: d7a604c3f7a ("anv: use cache uuid based on the build timestamp.") > Signed-off-by: Emil Velikov > --- > Just checking if make distcheck will find any fall-outs, but should work > like a charm. > > src/intel/vulkan/Makefile.am | 4 +--- > src/intel/vulkan/Makefile.sources | 3 ++- > 2 files changed, 3 insertions(+), 4 deletions(-) > > diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am > index b605c08..0e521cf 100644 > --- a/src/intel/vulkan/Makefile.am > +++ b/src/intel/vulkan/Makefile.am > @@ -144,10 +144,8 @@ anv_timestamp.h: > @echo "Updating anv_timestamp.h" > $(AM_V_GEN) echo "#define ANV_TIMESTAMP \"$(TIMESTAMP_CMD)\"" > $@ > > -anv_device.$(OBJEXT): anv_timestamp.h > - > BUILT_SOURCES = $(VULKAN_GENERATED_FILES) > -CLEANFILES = $(BUILT_SOURCES) dev_icd.json anv_timestamp.h > +CLEANFILES = $(BUILT_SOURCES) dev_icd.json > EXTRA_DIST = \ > $(top_srcdir)/include/vulkan/vk_icd.h \ > anv_entrypoints_gen.py \ > diff --git a/src/intel/vulkan/Makefile.sources > b/src/intel/vulkan/Makefile.sources > index aa1459a..7303995 100644 > --- a/src/intel/vulkan/Makefile.sources > +++ b/src/intel/vulkan/Makefile.sources > @@ -66,7 +66,8 @@ VULKAN_GEM_STUB_FILES := \ > > VULKAN_GENERATED_FILES := \ > anv_entrypoints.c \ > - anv_entrypoints.h > + anv_entrypoints.h \ > + anv_timestamp.h > > > GEN7_FILES := \ > -- > 2.8.2 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 96765] BindFragDataLocationIndexed on array fragment shader output.
https://bugs.freedesktop.org/show_bug.cgi?id=96765 --- Comment #3 from Ilia Mirkin--- (In reply to Corentin Wallez from comment #2) > Thanks for taking a look, indeed my understanding was wrong but I found a > Khronos bug related to this that doesn't have a formal resolution but > indicates that while you can't bind array elements individually, "array[0]" > should be equivalent to "array". See > https://cvs.khronos.org/bugzilla/show_bug.cgi?id=7829 > > Unfortunately Chrome has been testing this on few platforms, so far it only > tested NVIDIA proprietary (passes), fglrx (doesn't pass). It also fails on > the OSX drivers, seemingly for the same reason as for Mesa it seems. > > Given all the above, I will fix Chromium's tests to not use indices, can you > still consider making the [0] equivalent to no subscript? > > Thank you for your time. That does seem reasonable. As I'm not one of the cool kids with KHR access, mind letting me know what that bug says about what to do when someone binds both "array" and "array[0]"? Right now we store these in a map, and resolve locations at link time (as per the spec). We don't know what's an array and what's not at bind time. Also, does the bug make any comments about AoA? i.e. out vec4 foo[2][2][2] Should you be able to bind with "foo[0][0][0]"? What about "foo[0][0]" and "foo[0]" in that case? [Perhaps the AoA spec says something about this, I'm not up on all the details myself.] -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] anv: vulkan: remove the anv_device.$(OBJEXT) rule
Fine by me On Jul 6, 2016 8:18 AM, "Emil Velikov"wrote: > From: Emil Velikov > > Atm the actual rule will expand to foo.o which is used for static > libraries only. > > Thus the automake manual recommendation [to use OBJEXT] won't help us, > since since we're working with a shared library. > > Thus let's 'demote' the file and add it back to BUILT_SOURCES. This will > manage all the complexity for us, at the (existing expense) of working > only with the all, check and install targets. > > The crazy (why the issue was hard to spot): > If the dependencies (.deps/*.Plo) are already created one can alter the > anv_device.$(OBJEXT) line and/or nuke it all together. That won't lead > to any warnings/issues, even though the Makefile is regenerated. > > Moral of the story: > Always rm -rf top_builddir or don't resolve the dependencies manually > and use BUILT_SOURCES. > > Cc: "12.0" > Cc: Vinson Lee > Cc: Kenneth Graunke > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96825 > Fixes: d7a604c3f7a ("anv: use cache uuid based on the build timestamp.") > Signed-off-by: Emil Velikov > --- > Just checking if make distcheck will find any fall-outs, but should work > like a charm. > > src/intel/vulkan/Makefile.am | 4 +--- > src/intel/vulkan/Makefile.sources | 3 ++- > 2 files changed, 3 insertions(+), 4 deletions(-) > > diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am > index b605c08..0e521cf 100644 > --- a/src/intel/vulkan/Makefile.am > +++ b/src/intel/vulkan/Makefile.am > @@ -144,10 +144,8 @@ anv_timestamp.h: > @echo "Updating anv_timestamp.h" > $(AM_V_GEN) echo "#define ANV_TIMESTAMP \"$(TIMESTAMP_CMD)\"" > $@ > > -anv_device.$(OBJEXT): anv_timestamp.h > - > BUILT_SOURCES = $(VULKAN_GENERATED_FILES) > -CLEANFILES = $(BUILT_SOURCES) dev_icd.json anv_timestamp.h > +CLEANFILES = $(BUILT_SOURCES) dev_icd.json > EXTRA_DIST = \ > $(top_srcdir)/include/vulkan/vk_icd.h \ > anv_entrypoints_gen.py \ > diff --git a/src/intel/vulkan/Makefile.sources > b/src/intel/vulkan/Makefile.sources > index aa1459a..7303995 100644 > --- a/src/intel/vulkan/Makefile.sources > +++ b/src/intel/vulkan/Makefile.sources > @@ -66,7 +66,8 @@ VULKAN_GEM_STUB_FILES := \ > > VULKAN_GENERATED_FILES := \ > anv_entrypoints.c \ > - anv_entrypoints.h > + anv_entrypoints.h \ > + anv_timestamp.h > > > GEN7_FILES := \ > -- > 2.8.2 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] anv: vulkan: remove the anv_device.$(OBJEXT) rule
From: Emil VelikovAtm the actual rule will expand to foo.o which is used for static libraries only. Thus the automake manual recommendation [to use OBJEXT] won't help us, since since we're working with a shared library. Thus let's 'demote' the file and add it back to BUILT_SOURCES. This will manage all the complexity for us, at the (existing expense) of working only with the all, check and install targets. The crazy (why the issue was hard to spot): If the dependencies (.deps/*.Plo) are already created one can alter the anv_device.$(OBJEXT) line and/or nuke it all together. That won't lead to any warnings/issues, even though the Makefile is regenerated. Moral of the story: Always rm -rf top_builddir or don't resolve the dependencies manually and use BUILT_SOURCES. Cc: "12.0" Cc: Vinson Lee Cc: Kenneth Graunke Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96825 Fixes: d7a604c3f7a ("anv: use cache uuid based on the build timestamp.") Signed-off-by: Emil Velikov --- Just checking if make distcheck will find any fall-outs, but should work like a charm. src/intel/vulkan/Makefile.am | 4 +--- src/intel/vulkan/Makefile.sources | 3 ++- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am index b605c08..0e521cf 100644 --- a/src/intel/vulkan/Makefile.am +++ b/src/intel/vulkan/Makefile.am @@ -144,10 +144,8 @@ anv_timestamp.h: @echo "Updating anv_timestamp.h" $(AM_V_GEN) echo "#define ANV_TIMESTAMP \"$(TIMESTAMP_CMD)\"" > $@ -anv_device.$(OBJEXT): anv_timestamp.h - BUILT_SOURCES = $(VULKAN_GENERATED_FILES) -CLEANFILES = $(BUILT_SOURCES) dev_icd.json anv_timestamp.h +CLEANFILES = $(BUILT_SOURCES) dev_icd.json EXTRA_DIST = \ $(top_srcdir)/include/vulkan/vk_icd.h \ anv_entrypoints_gen.py \ diff --git a/src/intel/vulkan/Makefile.sources b/src/intel/vulkan/Makefile.sources index aa1459a..7303995 100644 --- a/src/intel/vulkan/Makefile.sources +++ b/src/intel/vulkan/Makefile.sources @@ -66,7 +66,8 @@ VULKAN_GEM_STUB_FILES := \ VULKAN_GENERATED_FILES := \ anv_entrypoints.c \ - anv_entrypoints.h + anv_entrypoints.h \ + anv_timestamp.h GEN7_FILES := \ -- 2.8.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [Mesa-stable] [PATCH] radeon uvd add uvd fw version for amdgpu
On Wed, Jul 6, 2016 at 11:09 AM, Jiang, Sonnywrote: > Hi Emil, > > > Are you Okay with these? Without these patches UVD won't work on Polaris. Alex > > > Thanks, > > Sonny > > > From: Jiang, Sonny > Sent: Monday, July 4, 2016 5:33:29 PM > To: Christian König; Emil Velikov > Cc: 12.0; mesa-dev@lists.freedesktop.org > > Subject: Re: [Mesa-dev] [Mesa-stable] [PATCH] radeon uvd add uvd fw version > for amdgpu > > > Hi Emil, > > > I have added comments for each patch, and applied for branch 12.0. > > Please find attached patches. > > > Thanks, > > Sonny > > > From: mesa-dev on behalf of > Christian König > Sent: Friday, July 1, 2016 8:07:51 AM > To: Emil Velikov > Cc: Jiang, Sonny; 12.0; mesa-dev@lists.freedesktop.org > Subject: Re: [Mesa-dev] [Mesa-stable] [PATCH] radeon uvd add uvd fw version > for amdgpu > > Am 01.07.2016 um 13:14 schrieb Emil Velikov: >> Hi all, >> >> On 29 June 2016 at 20:20, Christian König wrote: >>> Am 29.06.2016 um 18:35 schrieb Alex Deucher: On Wed, Jun 29, 2016 at 11:38 AM, Leo Liu wrote: > From: sonjiang > > Signed-off-by: sonjiang > Cc: "12.0" For the series: Reviewed-by: Alex Deucher >>> >>> Reviewed-by: Christian König as well. >>> >> Here we have three patches, suggesting a bug with absolutely no >> information what the issue is and/or why this approach is correct. >> >> I'm sorry to say this, but as is, this series is not landing in >> stable. Sonjiang, being the author of these please reply with a brief >> justification why we want those. Before doing so I would strongly >> recommend reading this [1] blog post. > > Well to put a carrot on the front of your stick: I asked what the > firmware version patch is all about internally as well when I've seen > those patches. So it would have even made our internal review much > easier if Sonny added a commit message in the first place. > > My fault to not requesting that his answer is put as a commit message on > the patches. > > On the other hand this is for Polaris, we had time pressure to get it > out of the door and today is a public holiday in Canada. So you probably > won't get updated message before Monday. > > Is that soon enough? Otherwise UVD will be broken on Polaris in the > stable branch. > > Regards, > Christian. > >> >> Thanks >> Emil >> >> [1] http://who-t.blogspot.co.uk/2009/12/on-commit-messages.html > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [Mesa-stable] [PATCH] radeon uvd add uvd fw version for amdgpu
Hi Emil, Are you Okay with these? Thanks, Sonny From: Jiang, Sonny Sent: Monday, July 4, 2016 5:33:29 PM To: Christian König; Emil Velikov Cc: 12.0; mesa-dev@lists.freedesktop.org Subject: Re: [Mesa-dev] [Mesa-stable] [PATCH] radeon uvd add uvd fw version for amdgpu Hi Emil, I have added comments for each patch, and applied for branch 12.0. Please find attached patches. Thanks, Sonny From: mesa-devon behalf of Christian König Sent: Friday, July 1, 2016 8:07:51 AM To: Emil Velikov Cc: Jiang, Sonny; 12.0; mesa-dev@lists.freedesktop.org Subject: Re: [Mesa-dev] [Mesa-stable] [PATCH] radeon uvd add uvd fw version for amdgpu Am 01.07.2016 um 13:14 schrieb Emil Velikov: > Hi all, > > On 29 June 2016 at 20:20, Christian König wrote: >> Am 29.06.2016 um 18:35 schrieb Alex Deucher: >>> On Wed, Jun 29, 2016 at 11:38 AM, Leo Liu wrote: From: sonjiang Signed-off-by: sonjiang Cc: "12.0" >>> For the series: >>> Reviewed-by: Alex Deucher >> >> Reviewed-by: Christian König as well. >> > Here we have three patches, suggesting a bug with absolutely no > information what the issue is and/or why this approach is correct. > > I'm sorry to say this, but as is, this series is not landing in > stable. Sonjiang, being the author of these please reply with a brief > justification why we want those. Before doing so I would strongly > recommend reading this [1] blog post. Well to put a carrot on the front of your stick: I asked what the firmware version patch is all about internally as well when I've seen those patches. So it would have even made our internal review much easier if Sonny added a commit message in the first place. My fault to not requesting that his answer is put as a commit message on the patches. On the other hand this is for Polaris, we had time pressure to get it out of the door and today is a public holiday in Canada. So you probably won't get updated message before Monday. Is that soon enough? Otherwise UVD will be broken on Polaris in the stable branch. Regards, Christian. > > Thanks > Emil > > [1] http://who-t.blogspot.co.uk/2009/12/on-commit-messages.html ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 96765] BindFragDataLocationIndexed on array fragment shader output.
https://bugs.freedesktop.org/show_bug.cgi?id=96765 --- Comment #2 from Corentin Wallez--- Thanks for taking a look, indeed my understanding was wrong but I found a Khronos bug related to this that doesn't have a formal resolution but indicates that while you can't bind array elements individually, "array[0]" should be equivalent to "array". See https://cvs.khronos.org/bugzilla/show_bug.cgi?id=7829 Unfortunately Chrome has been testing this on few platforms, so far it only tested NVIDIA proprietary (passes), fglrx (doesn't pass). It also fails on the OSX drivers, seemingly for the same reason as for Mesa it seems. Given all the above, I will fix Chromium's tests to not use indices, can you still consider making the [0] equivalent to no subscript? Thank you for your time. -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] anv: Add anv_timestamp.h to VULKAN_GENERATED_SOURCES.
On 6 July 2016 at 14:47, Emil Velikovwrote: > On 6 July 2016 at 12:46, Emil Velikov wrote: >> On 5 July 2016 at 22:13, Kenneth Graunke wrote: >>> Otherwise it doesn't get automatically generated in a clean build. >>> >>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96825 >>> Signed-off-by: Kenneth Graunke >>> --- >>> src/intel/vulkan/Makefile.sources | 3 ++- >>> 1 file changed, 2 insertions(+), 1 deletion(-) >>> >>> diff --git a/src/intel/vulkan/Makefile.sources >>> b/src/intel/vulkan/Makefile.sources >>> index aa1459a..7303995 100644 >>> --- a/src/intel/vulkan/Makefile.sources >>> +++ b/src/intel/vulkan/Makefile.sources >>> @@ -66,7 +66,8 @@ VULKAN_GEM_STUB_FILES := \ >>> >>> VULKAN_GENERATED_FILES := \ >>> anv_entrypoints.c \ >>> - anv_entrypoints.h >>> + anv_entrypoints.h \ >>> + anv_timestamp.h >>> >> The rule below (already in the makefile) should handle the dependency >> and trigger the generation. Is that one failing even on clean builds ? >> >> anv_device.$(OBJEXT): anv_timestamp.h >> > To put some actual reasoning behind my query (slight reluctance): > - The above rule is taken from the automake manual [1] and should just > work(?) > - Using the above (proposed patch) will recreate anv_timestamp.h > again on the second `make {,install}' invocation, thus it the > anv_device.o (and final binary) will always get rebuild/relinked. Even > if there hasn't been any changes. > Noticed what's happening here, will send out a patch just after a fresh cup of coffee. Sadly the second point stands regardless of the method used, so we'll just have to live with it. -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] vbo: fix attr reset
On Wed, Jul 6, 2016 at 9:53 AM, Brian Paulwrote: > On 07/05/2016 08:57 PM, Mathias Fröhlich wrote: >> >> On Tuesday, July 05, 2016 15:47:45 Rob Clark wrote: >> >> > In bc4e0c4 (vbo: Use a bitmask to track the active arrays in >> vbo_exec*.) >> >> > we stopped looping over all the attributes and resetting all slots. >> >> > Which exposed an issue in vbo_exec_bind_arrays() for handling GENERIC0 >> >> > vs. POS. >> >> > >> >> > Split out a helper which can reset a particular slot, so that >> >> > vbo_exec_bind_arrays() can re-use it to reset POS. > > > Rob, can you mention in the commit msg that this fixes 0ad (I think that was > it)? yup, it was 0ad.. I'll add a note in the commit msg BR, -R > -Brian > > > >> >> I had a similar problem with the pos/generic0 handling >> >> fixed, but obviously missed this! >> >> The fix looks good to me. >> >> Thanks for taking care! >> >> Reviewed-by: Mathias Fröhlich >> >> > >> >> > Signed-off-by: Rob Clark >> >> > --- >> >> > src/mesa/vbo/vbo_exec.h | 1 + >> >> > src/mesa/vbo/vbo_exec_api.c | 14 -- >> >> > src/mesa/vbo/vbo_exec_draw.c | 2 +- >> >> > 3 files changed, 10 insertions(+), 7 deletions(-) >> >> > >> >> > diff --git a/src/mesa/vbo/vbo_exec.h b/src/mesa/vbo/vbo_exec.h >> >> > index 5e20cf6..4f11f17 100644 >> >> > --- a/src/mesa/vbo/vbo_exec.h >> >> > +++ b/src/mesa/vbo/vbo_exec.h >> >> > @@ -152,6 +152,7 @@ void vbo_exec_invalidate_state( struct gl_context >> *ctx, GLbitfield new_state ); >> >> > >> >> > /* Internal functions: >> >> > */ >> >> > +void vbo_reset_attr(struct vbo_exec_context *exec, GLuint attr); >> >> > >> >> > void vbo_exec_vtx_init( struct vbo_exec_context *exec ); >> >> > void vbo_exec_vtx_destroy( struct vbo_exec_context *exec ); >> >> > diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c >> >> > index e02bb90..32f15db 100644 >> >> > --- a/src/mesa/vbo/vbo_exec_api.c >> >> > +++ b/src/mesa/vbo/vbo_exec_api.c >> >> > @@ -1275,16 +1275,18 @@ void vbo_exec_FlushVertices( struct >> gl_context *ctx, GLuint flags ) >> >> > #endif >> >> > } >> >> > >> >> > +void vbo_reset_attr(struct vbo_exec_context *exec, GLuint attr) >> >> > +{ >> >> > + exec->vtx.attrsz[attr] = 0; >> >> > + exec->vtx.attrtype[attr] = GL_FLOAT; >> >> > + exec->vtx.active_sz[attr] = 0; >> >> > +} >> >> > >> >> > static void reset_attrfv( struct vbo_exec_context *exec ) >> >> > -{ >> >> > +{ >> >> > while (exec->vtx.enabled) { >> >> > const int i = u_bit_scan64(>vtx.enabled); >> >> > - assert(exec->vtx.attrsz[i]); >> >> > - >> >> > - exec->vtx.attrsz[i] = 0; >> >> > - exec->vtx.attrtype[i] = GL_FLOAT; >> >> > - exec->vtx.active_sz[i] = 0; >> >> > + vbo_reset_attr(exec, i); >> >> > } >> >> > >> >> > exec->vtx.vertex_size = 0; >> >> > diff --git a/src/mesa/vbo/vbo_exec_draw.c >> b/src/mesa/vbo/vbo_exec_draw.c >> >> > index 8d1b2c0..0ef3081 100644 >> >> > --- a/src/mesa/vbo/vbo_exec_draw.c >> >> > +++ b/src/mesa/vbo/vbo_exec_draw.c >> >> > @@ -213,7 +213,7 @@ vbo_exec_bind_arrays( struct gl_context *ctx ) >> >> > exec->vtx.inputs[VERT_ATTRIB_GENERIC0] = exec->vtx.inputs[0]; >> >> > exec->vtx.attrsz[VERT_ATTRIB_GENERIC0] = exec->vtx.attrsz[0]; >> >> > exec->vtx.attrptr[VERT_ATTRIB_GENERIC0] = exec->vtx.attrptr[0]; >> >> > - exec->vtx.attrsz[0] = 0; >> >> > + vbo_reset_attr(exec, VERT_ATTRIB_POS); >> >> > exec->vtx.enabled &= (~BITFIELD64_BIT(VBO_ATTRIB_POS)); >> >> > exec->vtx.enabled |= BITFIELD64_BIT(VBO_ATTRIB_GENERIC0); >> >> > } >> >> > >> > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] vbo: fix attr reset
On 07/05/2016 08:57 PM, Mathias Fröhlich wrote: On Tuesday, July 05, 2016 15:47:45 Rob Clark wrote: > In bc4e0c4 (vbo: Use a bitmask to track the active arrays in vbo_exec*.) > we stopped looping over all the attributes and resetting all slots. > Which exposed an issue in vbo_exec_bind_arrays() for handling GENERIC0 > vs. POS. > > Split out a helper which can reset a particular slot, so that > vbo_exec_bind_arrays() can re-use it to reset POS. Rob, can you mention in the commit msg that this fixes 0ad (I think that was it)? -Brian I had a similar problem with the pos/generic0 handling fixed, but obviously missed this! The fix looks good to me. Thanks for taking care! Reviewed-by: Mathias Fröhlich> > Signed-off-by: Rob Clark > --- > src/mesa/vbo/vbo_exec.h | 1 + > src/mesa/vbo/vbo_exec_api.c | 14 -- > src/mesa/vbo/vbo_exec_draw.c | 2 +- > 3 files changed, 10 insertions(+), 7 deletions(-) > > diff --git a/src/mesa/vbo/vbo_exec.h b/src/mesa/vbo/vbo_exec.h > index 5e20cf6..4f11f17 100644 > --- a/src/mesa/vbo/vbo_exec.h > +++ b/src/mesa/vbo/vbo_exec.h > @@ -152,6 +152,7 @@ void vbo_exec_invalidate_state( struct gl_context *ctx, GLbitfield new_state ); > > /* Internal functions: > */ > +void vbo_reset_attr(struct vbo_exec_context *exec, GLuint attr); > > void vbo_exec_vtx_init( struct vbo_exec_context *exec ); > void vbo_exec_vtx_destroy( struct vbo_exec_context *exec ); > diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c > index e02bb90..32f15db 100644 > --- a/src/mesa/vbo/vbo_exec_api.c > +++ b/src/mesa/vbo/vbo_exec_api.c > @@ -1275,16 +1275,18 @@ void vbo_exec_FlushVertices( struct gl_context *ctx, GLuint flags ) > #endif > } > > +void vbo_reset_attr(struct vbo_exec_context *exec, GLuint attr) > +{ > + exec->vtx.attrsz[attr] = 0; > + exec->vtx.attrtype[attr] = GL_FLOAT; > + exec->vtx.active_sz[attr] = 0; > +} > > static void reset_attrfv( struct vbo_exec_context *exec ) > -{ > +{ > while (exec->vtx.enabled) { > const int i = u_bit_scan64(>vtx.enabled); > - assert(exec->vtx.attrsz[i]); > - > - exec->vtx.attrsz[i] = 0; > - exec->vtx.attrtype[i] = GL_FLOAT; > - exec->vtx.active_sz[i] = 0; > + vbo_reset_attr(exec, i); > } > > exec->vtx.vertex_size = 0; > diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c > index 8d1b2c0..0ef3081 100644 > --- a/src/mesa/vbo/vbo_exec_draw.c > +++ b/src/mesa/vbo/vbo_exec_draw.c > @@ -213,7 +213,7 @@ vbo_exec_bind_arrays( struct gl_context *ctx ) > exec->vtx.inputs[VERT_ATTRIB_GENERIC0] = exec->vtx.inputs[0]; > exec->vtx.attrsz[VERT_ATTRIB_GENERIC0] = exec->vtx.attrsz[0]; > exec->vtx.attrptr[VERT_ATTRIB_GENERIC0] = exec->vtx.attrptr[0]; > - exec->vtx.attrsz[0] = 0; > + vbo_reset_attr(exec, VERT_ATTRIB_POS); > exec->vtx.enabled &= (~BITFIELD64_BIT(VBO_ATTRIB_POS)); > exec->vtx.enabled |= BITFIELD64_BIT(VBO_ATTRIB_GENERIC0); > } > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] osmesa: Export OSMesaCreateContextAttribs.
On 07/05/2016 09:08 PM, mathias.froehl...@gmx.net wrote: From: Mathias FröhlichHi, Since the function is exported like any other public api fucnntion and put in the header as if you could link against it export it also from shared objects. Please review! Does this need to be tagged for the stable branches? Reviewed-by: Brian Paul Thanks! Mathias --- src/gallium/targets/osmesa/osmesa.def | 1 + src/gallium/targets/osmesa/osmesa.mingw.def | 1 + src/gallium/targets/osmesa/osmesa.sym | 1 + src/mesa/drivers/osmesa/osmesa.def | 1 + 4 files changed, 4 insertions(+) diff --git a/src/gallium/targets/osmesa/osmesa.def b/src/gallium/targets/osmesa/osmesa.def index e347463..f6d09b8 100644 --- a/src/gallium/targets/osmesa/osmesa.def +++ b/src/gallium/targets/osmesa/osmesa.def @@ -3,6 +3,7 @@ VERSION 4.1 EXPORTS OSMesaCreateContext + OSMesaCreateContextAttribs OSMesaCreateContextExt OSMesaDestroyContext OSMesaMakeCurrent diff --git a/src/gallium/targets/osmesa/osmesa.mingw.def b/src/gallium/targets/osmesa/osmesa.mingw.def index 945201c..b77af60 100644 --- a/src/gallium/targets/osmesa/osmesa.mingw.def +++ b/src/gallium/targets/osmesa/osmesa.mingw.def @@ -1,5 +1,6 @@ EXPORTS OSMesaCreateContext = OSMesaCreateContext@8 + OSMesaCreateContextAttribs = OSMesaCreateContextAttribs@8 OSMesaCreateContextExt = OSMesaCreateContextExt@20 OSMesaDestroyContext = OSMesaDestroyContext@4 OSMesaMakeCurrent = OSMesaMakeCurrent@20 diff --git a/src/gallium/targets/osmesa/osmesa.sym b/src/gallium/targets/osmesa/osmesa.sym index d4b963d..59beab3 100644 --- a/src/gallium/targets/osmesa/osmesa.sym +++ b/src/gallium/targets/osmesa/osmesa.sym @@ -2,6 +2,7 @@ global: OSMesaColorClamp; OSMesaCreateContext; + OSMesaCreateContextAttribs; OSMesaCreateContextExt; OSMesaDestroyContext; OSMesaGetColorBuffer; diff --git a/src/mesa/drivers/osmesa/osmesa.def b/src/mesa/drivers/osmesa/osmesa.def index 06afab7..f7dcd59 100644 --- a/src/mesa/drivers/osmesa/osmesa.def +++ b/src/mesa/drivers/osmesa/osmesa.def @@ -4,6 +4,7 @@ VERSION 4.1 EXPORTS OSMesaColorClamp OSMesaCreateContext + OSMesaCreateContextAttribs OSMesaCreateContextExt OSMesaDestroyContext OSMesaMakeCurrent ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] anv: Add anv_timestamp.h to VULKAN_GENERATED_SOURCES.
On 6 July 2016 at 12:46, Emil Velikovwrote: > On 5 July 2016 at 22:13, Kenneth Graunke wrote: >> Otherwise it doesn't get automatically generated in a clean build. >> >> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96825 >> Signed-off-by: Kenneth Graunke >> --- >> src/intel/vulkan/Makefile.sources | 3 ++- >> 1 file changed, 2 insertions(+), 1 deletion(-) >> >> diff --git a/src/intel/vulkan/Makefile.sources >> b/src/intel/vulkan/Makefile.sources >> index aa1459a..7303995 100644 >> --- a/src/intel/vulkan/Makefile.sources >> +++ b/src/intel/vulkan/Makefile.sources >> @@ -66,7 +66,8 @@ VULKAN_GEM_STUB_FILES := \ >> >> VULKAN_GENERATED_FILES := \ >> anv_entrypoints.c \ >> - anv_entrypoints.h >> + anv_entrypoints.h \ >> + anv_timestamp.h >> > The rule below (already in the makefile) should handle the dependency > and trigger the generation. Is that one failing even on clean builds ? > > anv_device.$(OBJEXT): anv_timestamp.h > To put some actual reasoning behind my query (slight reluctance): - The above rule is taken from the automake manual [1] and should just work(?) - Using the above (proposed patch) will recreate anv_timestamp.h again on the second `make {,install}' invocation, thus it the anv_device.o (and final binary) will always get rebuild/relinked. Even if there hasn't been any changes. Example: make && make install | egrep "anv_timestamp|anv_device" GEN anv_timestamp.h CC anv_device.lo GEN anv_timestamp.h CC anv_device.lo -Emil [1] https://www.gnu.org/software/automake/manual/html_node/Built-Sources-Example.html#Built-Sources-Example ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH mesa] egl/display: remove unnecessary code and make it easier to read
Remove the two first level `if` as they will always be true, and flatten the two remaining `if`. No functional change. Signed-off-by: Eric Engestrom--- src/egl/main/egldisplay.c | 29 ++--- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/src/egl/main/egldisplay.c b/src/egl/main/egldisplay.c index f6db03a..bbc3063 100644 --- a/src/egl/main/egldisplay.c +++ b/src/egl/main/egldisplay.c @@ -178,25 +178,24 @@ _eglNativePlatformDetectNativeDisplay(void *nativeDisplay) _EGLPlatformType _eglGetNativePlatform(void *nativeDisplay) { - static _EGLPlatformType native_platform = _EGL_INVALID_PLATFORM; - char *detection_method = NULL; + static _EGLPlatformType native_platform; + char *detection_method; - if (native_platform == _EGL_INVALID_PLATFORM) { - native_platform = _eglGetNativePlatformFromEnv(); - detection_method = "environment overwrite"; - if (native_platform == _EGL_INVALID_PLATFORM) { - native_platform = _eglNativePlatformDetectNativeDisplay(nativeDisplay); - detection_method = "autodetected"; - if (native_platform == _EGL_INVALID_PLATFORM) { -native_platform = _EGL_NATIVE_PLATFORM; -detection_method = "build-time configuration"; - } - } - } + native_platform = _eglGetNativePlatformFromEnv(); + detection_method = "environment overwrite"; + + if (native_platform == _EGL_INVALID_PLATFORM) { + native_platform = _eglNativePlatformDetectNativeDisplay(nativeDisplay); + detection_method = "autodetected"; + } + + if (native_platform == _EGL_INVALID_PLATFORM) { + native_platform = _EGL_NATIVE_PLATFORM; + detection_method = "build-time configuration"; + } - if (detection_method != NULL) - _eglLog(_EGL_DEBUG, "Native platform type: %s (%s)", - egl_platforms[native_platform].name, detection_method); + _eglLog(_EGL_DEBUG, "Native platform type: %s (%s)", + egl_platforms[native_platform].name, detection_method); return native_platform; } -- 2.9.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] anv: Add anv_timestamp.h to VULKAN_GENERATED_SOURCES.
On 5 July 2016 at 22:13, Kenneth Graunkewrote: > Otherwise it doesn't get automatically generated in a clean build. > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96825 > Signed-off-by: Kenneth Graunke > --- > src/intel/vulkan/Makefile.sources | 3 ++- > 1 file changed, 2 insertions(+), 1 deletion(-) > > diff --git a/src/intel/vulkan/Makefile.sources > b/src/intel/vulkan/Makefile.sources > index aa1459a..7303995 100644 > --- a/src/intel/vulkan/Makefile.sources > +++ b/src/intel/vulkan/Makefile.sources > @@ -66,7 +66,8 @@ VULKAN_GEM_STUB_FILES := \ > > VULKAN_GENERATED_FILES := \ > anv_entrypoints.c \ > - anv_entrypoints.h > + anv_entrypoints.h \ > + anv_timestamp.h > The rule below (already in the makefile) should handle the dependency and trigger the generation. Is that one failing even on clean builds ? anv_device.$(OBJEXT): anv_timestamp.h -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 96825] anv_device.c:31:27: fatal error: anv_timestamp.h: No such file or directory
https://bugs.freedesktop.org/show_bug.cgi?id=96825 --- Comment #5 from Emil Velikov--- > Does the system in question have access to 'date' ? If not I'd strongly > recommend installing a new enough version of the coreutil package. > > If `date' is available, please provide the output of `make V=1' after the > failed build (to keep the log short). If you do a quick search in srcdir & > builddir for anv_timestamp.h that'll also be nice. > Vinson, can you help out with these ? Looking at the successful oibaf PPA builds [1], I'm inclined that this is not something affecting wide range of people/setups, but specific to your setup(s) ? The above questions/suggestions will help track it down. Can one have access to the affected setups/machines ? Alternatively is there an vmware/vbox image that one could use to achieve the above results ? [1] https://launchpad.net/~oibaf/+archive/ubuntu/graphics-drivers/+packages -- You are receiving this mail because: You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/6] i965/fs/gen7: split instructions that run into exec masking bugs
From: Iago Toral QuirogaIn fp64 we can produce code like this: mov(16) vgrf2<2>:UD, vgrf3<2>:UD That our simd lowering pass would typically split in instructions with a width of 8, writing to two consecutive registers each. Unfortunately, gen7 hardware has a bug affecting execution masking and as a result, the second GRF register write won't work properly. Curro verified this: "The problem is that pre-Gen8 EUs are hardwired to use the QtrCtrl+1 (where QtrCtrl is the 8-bit quarter of the execution mask signals specified in the instruction control fields) for the second compressed half of any single-precision instruction (for double-precision instructions it's hardwired to use NibCtrl+1), which means that the EU will apply the wrong execution controls for the second sequential GRF write if the number of channels per GRF is not exactly eight in single-precision mode (or four in double-float mode)." In practice, this means that we cannot write more than one consecutive GRF in a single instruction if the number of channels per GRF is not exactly eight in single-precision mode (or four in double-float mode). This patch makes our SIMD lowering pass split this kind of instructions so that the split versions only write to a single register. In the example above this means that we split the write in 4 instructions, each one writing 4 UD elements (width = 4) to a single register. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 20 1 file changed, 20 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 2f473cc..caf88d1 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -4677,6 +4677,26 @@ static unsigned get_fpu_lowered_simd_width(const struct brw_device_info *devinfo, const fs_inst *inst) { + /* Pre-Gen8 EUs are hardwired to use the QtrCtrl+1 (where QtrCtrl is +* the 8-bit quarter of the execution mask signals specified in the +* instruction control fields) for the second compressed half of any +* single-precision instruction (for double-precision instructions +* it's hardwired to use NibCtrl+1), which means that the EU will +* apply the wrong execution controls for the second sequential GRF +* write if the number of channels per GRF is not exactly eight in +* single-precision mode (or four in double-float mode). +* +* In this situation we calculate the maximum size of the split +* instructions so they only ever write to a single register. +*/ + unsigned type_size = type_sz(inst->dst.type); + unsigned channels_per_grf = inst->exec_size / inst->regs_written; + assert(channels_per_grf > 0); + if (devinfo->gen < 8 && inst->regs_written > 1 && + channels_per_grf != REG_SIZE / type_size) { + return channels_per_grf; + } + /* Maximum execution size representable in the instruction controls. */ unsigned max_width = MIN2(32, inst->exec_size); -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 6/6] i965/fs: don't copy propagate if the instruction writes to more than two adjacent GRFs
This is not allowed by the HW and copy propagation can hide this issue to lower_simd_width pass, which is going to fix it. Signed-off-by: Samuel Iglesias Gonsálvez--- src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index 438f681..c7f7628 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -752,6 +752,7 @@ can_propagate_from(fs_inst *inst) inst->src[0].file == UNIFORM || inst->src[0].file == IMM) && inst->src[0].type == inst->dst.type && + inst->regs_written <= 2 && !inst->is_partial_write()); } -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/6] i965/fs: use the new helper function to create double immediates
From: Iago Toral Quiroga--- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 268c847..d805d95 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -832,7 +832,7 @@ fs_visitor::nir_emit_alu(const fs_builder , nir_alu_instr *instr) * a register and compare with that. */ fs_reg tmp = vgrf(glsl_type::double_type); - bld.MOV(tmp, brw_imm_df(0.0)); + bld.MOV(tmp, setup_imm_df(0.0)); /* A direct DF CMP using the flag register (null dst) won't work in * SIMD16 because the CMP will be split in two by lower_simd_width, @@ -1171,7 +1171,7 @@ fs_visitor::nir_emit_alu(const fs_builder , nir_alu_instr *instr) case nir_op_d2b: { /* two-argument instructions can't take 64-bit immediates */ fs_reg zero = vgrf(glsl_type::double_type); - bld.MOV(zero, brw_imm_df(0.0)); + bld.MOV(zero, setup_imm_df(0.0)); /* A SIMD16 execution needs to be split in two instructions, so use * a vgrf instead of the flag register as dst so instruction splitting * works @@ -1483,7 +1483,7 @@ fs_visitor::nir_emit_load_const(const fs_builder , case 64: for (unsigned i = 0; i < instr->def.num_components; i++) - bld.MOV(offset(reg, bld, i), brw_imm_df(instr->value.f64[i])); + bld.MOV(offset(reg, bld, i), setup_imm_df(instr->value.f64[i])); break; default: -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/6] i965/fs: do pack lowering before simd splitting
From: Iago Toral QuirogaSo that we can have gen7 split large writes produced by the pack lowering. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index caf88d1..0d4eb51 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -5830,6 +5830,11 @@ fs_visitor::optimize() progress = false; pass_num = 0; + if (OPT(lower_pack)) { + OPT(register_coalesce); + OPT(dead_code_eliminate); + } + OPT(lower_simd_width); /* After SIMD lowering just in case we had to unroll the EOT send. */ @@ -5866,11 +5871,6 @@ fs_visitor::optimize() OPT(dead_code_eliminate); } - if (OPT(lower_pack)) { - OPT(register_coalesce); - OPT(dead_code_eliminate); - } - if (OPT(lower_d2x)) { OPT(opt_copy_propagate); OPT(dead_code_eliminate); -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/6] i965/fs: do not require force_writemask_all with exec_size 4
So far we only used instructions with this size in situations where we did not operate per-channel and we wanted to ignore the execution mask, but gen7 fp64 will need to emit code with a width of 4 that needs normal execution masking. --- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index d25d26a..07581d2 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1649,7 +1649,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) brw_set_default_acc_write_control(p, inst->writes_accumulator); brw_set_default_exec_size(p, cvt(inst->exec_size) - 1); - assert(inst->force_writemask_all || inst->exec_size >= 8); assert(inst->force_writemask_all || inst->group % inst->exec_size == 0); assert(inst->base_mrf + inst->mlen <= BRW_MAX_MRF(devinfo->gen)); assert(inst->mlen <= BRW_MAX_MSG_LENGTH); -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/6] i965/fs: add a helper function to create double immediates
From: Iago Toral QuirogaGen7 hardware does not support double immediates so these need to be moved in 32-bit chunks to a regular vgrf instead. Instead of doing this every time we need to create a DF immediate, create a helper function that does the right thing depending on the hardware generation. --- src/mesa/drivers/dri/i965/brw_fs.h | 2 ++ src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 43 2 files changed, 45 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 4237197..dd7ce7d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -167,6 +167,8 @@ public: bool lower_simd_width(); bool opt_combine_constants(); + fs_reg setup_imm_df(double v); + void emit_dummy_fs(); void emit_repclear_shader(); fs_reg *emit_fragcoord_interpolation(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index b3f5dfd..268c847 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -616,6 +616,49 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr, return true; } +fs_reg +fs_visitor::setup_imm_df(double v) +{ + assert(devinfo->gen >= 7); + + if (devinfo->gen >= 8) + return brw_imm_df(v); + + /* gen7 does not support DF immediates, so we generate a 64-bit constant by +* writing the low 32-bit of the constant to suboffset 0 of a VGRF and +* the high 32-bit to suboffset 4 and then applying a stride of 0. +* +* Alternatively, we could also produce a normal VGRF (without stride 0) +* by writing to all the channels in the VGRF, however, that would hit the +* gen7 bug where we have to split writes that span more than 1 register +* into instructions with a width of 4 (otherwise the write to the second +* register written runs into an execmask hardware bug) which isn't very +* nice. +*/ + union { + double d; + struct { + uint32_t i1; + uint32_t i2; + }; + } di; + + di.d = v; + + fs_reg tmp = vgrf(glsl_type::uint_type); + fs_inst *inst = bld.MOV(tmp, brw_imm_ud(di.i1)); + inst->force_writemask_all = true; + inst->exec_size = 1; + inst->regs_written = 1; + + inst = bld.MOV(horiz_offset(tmp, 1), brw_imm_ud(di.i2)); + inst->force_writemask_all = true; + inst->exec_size = 1; + inst->regs_written = 1; + + return component(retype(tmp, BRW_REGISTER_TYPE_DF), 0); +} + void fs_visitor::nir_emit_alu(const fs_builder , nir_alu_instr *instr) { -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev