Re: [Mesa-dev] [PATCH 4/4] radeonsi/nir: gather buffers declared more accurately and use const fast path
On 30/03/18 13:52, Marek Olšák wrote: On Tue, Mar 27, 2018 at 12:19 AM, Timothy Arceri> wrote: For now we skip SI && HAVE_LLVM < 0x0600 for simplicity. We also skip setting the more accurate masks for some builtin uniforms for now as it causes some piglit regressions. --- src/gallium/drivers/radeonsi/si_shader.c | 8 +++ src/gallium/drivers/radeonsi/si_shader_nir.c | 82 ++-- 2 files changed, 84 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 62cb7ea7eb5..9a12f9ee8f2 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2377,8 +2377,16 @@ static LLVMValueRef load_const_buffer_desc(struct si_shader_context *ctx, int i) static LLVMValueRef load_ubo(struct ac_shader_abi *abi, LLVMValueRef index) { struct si_shader_context *ctx = si_shader_context_from_abi(abi); + struct si_shader_selector *sel = ctx->shader->selector; + LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, ctx->param_const_and_shader_buffers); + if (sel->info.const_buffers_declared == 1 && + sel->info.shader_buffers_declared == 0 && + !(ctx->screen->info.chip_class == SI && HAVE_LLVM < 0x0600)) { You don't have to check SI and LLVM here. (because const_buffers_declared > 1) Right. Will fix. + return load_const_buffer_desc_fast_path(ctx); + } + index = si_llvm_bound_index(ctx, index, ctx->num_const_buffers); index = LLVMBuildAdd(ctx->ac.builder, index, LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS, 0), ""); diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index 52950668714..595f376f6a2 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -611,23 +611,91 @@ void si_nir_scan_shader(const struct nir_shader *nir, info->num_outputs = num_outputs; + struct set *ubo_set = _mesa_set_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + unsigned ubo_idx = 1; nir_foreach_variable(variable, >uniforms) { const struct glsl_type *type = variable->type; enum glsl_base_type base_type = glsl_get_base_type(glsl_without_array(type)); unsigned aoa_size = MAX2(1, glsl_get_aoa_size(type)); + /* Gather buffers declared bitmasks. Note: radeonsi doesn't + * really use the mask (other than ubo_idx == 1 for regular + * uniforms) its really only used for getting the buffer count + * so we don't need to worry about the ordering. + */ + if (variable->interface_type != NULL) { + if (variable->data.mode == nir_var_uniform) { + + unsigned block_size; + if (base_type != GLSL_TYPE_INTERFACE) { + struct set_entry *entry = + _mesa_set_search(ubo_set, variable->interface_type); + + /* Check if we have already processed + * a member from this ubo. + */ + if (entry) + continue; + + block_size = 1; + } else { + block_size = aoa_size; + } + + info->const_buffers_declared |= u_bit_consecutive(ubo_idx, block_size); + ubo_idx += block_size; Can you explain what this does? Sets the mask for an array of blocks e.g. uniform block { vec4 a; vec4 b; } name[4]; Since each block array element is considered a separate buffer. I should probably rename block_size -> block_count + + _mesa_set_add(ubo_set, variable->interface_type); + } + + if (variable->data.mode == nir_var_shader_storage) { + /* TODO: make this more accurate */ + info->shader_buffers_declared = +
Re: [Mesa-dev] [PATCH] nir: s/uint/unsigned/ to fix MSVC/MinGW build
Reviewed-by: Neha BhendeFrom: Brian Paul Sent: Thursday, March 29, 2018 9:02:45 PM To: mesa-dev@lists.freedesktop.org Cc: Neha Bhende; Charmaine Lee Subject: [PATCH] nir: s/uint/unsigned/ to fix MSVC/MinGW build --- src/compiler/glsl/glsl_to_nir.cpp | 2 +- src/compiler/nir/nir_gather_info.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index c4a6d52..dbb58d8 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -369,7 +369,7 @@ nir_visitor::visit(ir_variable *ir) /* Mark all the locations that require two slots */ if (shader->info.stage == MESA_SHADER_VERTEX && glsl_type_is_dual_slot(glsl_without_array(var->type))) { - for (uint i = 0; i < glsl_count_attribute_slots(var->type, true); i++) { + for (unsigned i = 0; i < glsl_count_attribute_slots(var->type, true); i++) { uint64_t bitfield = BITFIELD64_BIT(var->data.location + i); shader->info.vs.double_inputs |= bitfield; } diff --git a/src/compiler/nir/nir_gather_info.c b/src/compiler/nir/nir_gather_info.c index 743f968..5530009 100644 --- a/src/compiler/nir/nir_gather_info.c +++ b/src/compiler/nir/nir_gather_info.c @@ -250,7 +250,7 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader) if (shader->info.stage == MESA_SHADER_VERTEX && var->data.mode == nir_var_shader_in && glsl_type_is_dual_slot(glsl_without_array(var->type))) { -for (uint i = 0; i < glsl_count_attribute_slots(var->type, false); i++) { +for (unsigned i = 0; i < glsl_count_attribute_slots(var->type, false); i++) { int idx = var->data.location + i; shader->info.vs.double_inputs |= BITFIELD64_BIT(idx); } -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] nir: s/uint/unsigned/ to fix MSVC/MinGW build
Looks good to me, though I wonder what broke it - must have been some include file reshuffling? Reviewed-by: Roland ScheideggerAm 30.03.2018 um 06:02 schrieb Brian Paul: > --- > src/compiler/glsl/glsl_to_nir.cpp | 2 +- > src/compiler/nir/nir_gather_info.c | 2 +- > 2 files changed, 2 insertions(+), 2 deletions(-) > > diff --git a/src/compiler/glsl/glsl_to_nir.cpp > b/src/compiler/glsl/glsl_to_nir.cpp > index c4a6d52..dbb58d8 100644 > --- a/src/compiler/glsl/glsl_to_nir.cpp > +++ b/src/compiler/glsl/glsl_to_nir.cpp > @@ -369,7 +369,7 @@ nir_visitor::visit(ir_variable *ir) >/* Mark all the locations that require two slots */ >if (shader->info.stage == MESA_SHADER_VERTEX && >glsl_type_is_dual_slot(glsl_without_array(var->type))) { > - for (uint i = 0; i < glsl_count_attribute_slots(var->type, true); > i++) { > + for (unsigned i = 0; i < glsl_count_attribute_slots(var->type, > true); i++) { > uint64_t bitfield = BITFIELD64_BIT(var->data.location + i); > shader->info.vs.double_inputs |= bitfield; > } > diff --git a/src/compiler/nir/nir_gather_info.c > b/src/compiler/nir/nir_gather_info.c > index 743f968..5530009 100644 > --- a/src/compiler/nir/nir_gather_info.c > +++ b/src/compiler/nir/nir_gather_info.c > @@ -250,7 +250,7 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, > nir_shader *shader) > if (shader->info.stage == MESA_SHADER_VERTEX && > var->data.mode == nir_var_shader_in && > glsl_type_is_dual_slot(glsl_without_array(var->type))) { > -for (uint i = 0; i < glsl_count_attribute_slots(var->type, > false); i++) { > +for (unsigned i = 0; i < glsl_count_attribute_slots(var->type, > false); i++) { > int idx = var->data.location + i; > shader->info.vs.double_inputs |= BITFIELD64_BIT(idx); > } > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] nir: s/uint/unsigned/ to fix MSVC/MinGW build
--- src/compiler/glsl/glsl_to_nir.cpp | 2 +- src/compiler/nir/nir_gather_info.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index c4a6d52..dbb58d8 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -369,7 +369,7 @@ nir_visitor::visit(ir_variable *ir) /* Mark all the locations that require two slots */ if (shader->info.stage == MESA_SHADER_VERTEX && glsl_type_is_dual_slot(glsl_without_array(var->type))) { - for (uint i = 0; i < glsl_count_attribute_slots(var->type, true); i++) { + for (unsigned i = 0; i < glsl_count_attribute_slots(var->type, true); i++) { uint64_t bitfield = BITFIELD64_BIT(var->data.location + i); shader->info.vs.double_inputs |= bitfield; } diff --git a/src/compiler/nir/nir_gather_info.c b/src/compiler/nir/nir_gather_info.c index 743f968..5530009 100644 --- a/src/compiler/nir/nir_gather_info.c +++ b/src/compiler/nir/nir_gather_info.c @@ -250,7 +250,7 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader) if (shader->info.stage == MESA_SHADER_VERTEX && var->data.mode == nir_var_shader_in && glsl_type_is_dual_slot(glsl_without_array(var->type))) { -for (uint i = 0; i < glsl_count_attribute_slots(var->type, false); i++) { +for (unsigned i = 0; i < glsl_count_attribute_slots(var->type, false); i++) { int idx = var->data.location + i; shader->info.vs.double_inputs |= BITFIELD64_BIT(idx); } -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/4] radeonsi: create load_const_buffer_desc_fast_path() helper
Ignore my comment on this patch. For patches 1 - 3: Reviewed-by: Marek OlšákMarek On Tue, Mar 27, 2018 at 12:19 AM, Timothy Arceri wrote: > This will be shared by the TGSI and NIR backends. For simplicity > we leave the SI LLVM 5.0 and lower work around only in the TGSI > backend. > --- > src/gallium/drivers/radeonsi/si_shader.c | 88 > ++-- > 1 file changed, 49 insertions(+), 39 deletions(-) > > diff --git a/src/gallium/drivers/radeonsi/si_shader.c > b/src/gallium/drivers/radeonsi/si_shader.c > index d5607a99d32..62cb7ea7eb5 100644 > --- a/src/gallium/drivers/radeonsi/si_shader.c > +++ b/src/gallium/drivers/radeonsi/si_shader.c > @@ -2322,6 +2322,49 @@ void si_tgsi_declare_compute_memory(struct > si_shader_context *ctx, > si_declare_compute_memory(ctx); > } > > +static LLVMValueRef load_const_buffer_desc_fast_path(struct > si_shader_context *ctx) > +{ > + LLVMValueRef ptr = > + LLVMGetParam(ctx->main_fn, ctx->param_const_and_shader_bu > ffers); > + struct si_shader_selector *sel = ctx->shader->selector; > + > + /* Do the bounds checking with a descriptor, because > +* doing computation and manual bounds checking of 64-bit > +* addresses generates horrible VALU code with very high > +* VGPR usage and very low SIMD occupancy. > +*/ > + ptr = LLVMBuildPtrToInt(ctx->ac.builder, ptr, ctx->ac.intptr, ""); > + > + LLVMValueRef desc0, desc1; > + if (HAVE_32BIT_POINTERS) { > + desc0 = ptr; > + desc1 = LLVMConstInt(ctx->i32, > + > S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), > 0); > + } else { > + ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ctx->v2i32, > ""); > + desc0 = LLVMBuildExtractElement(ctx->ac.builder, ptr, > ctx->i32_0, ""); > + desc1 = LLVMBuildExtractElement(ctx->ac.builder, ptr, > ctx->i32_1, ""); > + /* Mask out all bits except BASE_ADDRESS_HI. */ > + desc1 = LLVMBuildAnd(ctx->ac.builder, desc1, > +LLVMConstInt(ctx->i32, > ~C_008F04_BASE_ADDRESS_HI, 0), ""); > + } > + > + LLVMValueRef desc_elems[] = { > + desc0, > + desc1, > + LLVMConstInt(ctx->i32, (sel->info.const_file_max[0] + 1) * > 16, 0), > + LLVMConstInt(ctx->i32, > + S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | > + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | > + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | > + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | > + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) > | > + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32), > 0) > + }; > + > + return ac_build_gather_values(>ac, desc_elems, 4); > +} > + > static LLVMValueRef load_const_buffer_desc(struct si_shader_context *ctx, > int i) > { > LLVMValueRef list_ptr = LLVMGetParam(ctx->main_fn, > @@ -2400,8 +2443,6 @@ static LLVMValueRef fetch_constant( > /* Fast path when user data SGPRs point to constant buffer 0 > directly. */ > if (sel->info.const_buffers_declared == 1 && > sel->info.shader_buffers_declared == 0) { > - LLVMValueRef ptr = > - LLVMGetParam(ctx->main_fn, > ctx->param_const_and_shader_buffers); > > /* This enables use of s_load_dword and flat_load_dword > for const buffer 0 > * loads, and up to x4 load opcode merging. However, it > leads to horrible > @@ -2416,48 +2457,17 @@ static LLVMValueRef fetch_constant( > * s_buffer_load_dword (that we have to prevent) is when > we use use > * a literal offset where we don't need bounds checking. > */ > - if (ctx->screen->info.chip_class == SI && > -HAVE_LLVM < 0x0600 && > -!reg->Register.Indirect) { > + if (ctx->screen->info.chip_class == SI && HAVE_LLVM < > 0x0600 && > + !reg->Register.Indirect) { > + LLVMValueRef ptr = > + LLVMGetParam(ctx->main_fn, > ctx->param_const_and_shader_buffers); > + > addr = LLVMBuildLShr(ctx->ac.builder, addr, > LLVMConstInt(ctx->i32, 2, 0), ""); > LLVMValueRef result = > ac_build_load_invariant(>ac, ptr, addr); > return bitcast(bld_base, type, result); > } > > - /* Do the bounds checking with a descriptor, because > -* doing computation and manual bounds checking of 64-bit > -* addresses generates horrible VALU code with very high > -* VGPR usage and very low SIMD occupancy. > -
Re: [Mesa-dev] [PATCH 4/4] radeonsi/nir: gather buffers declared more accurately and use const fast path
On Tue, Mar 27, 2018 at 12:19 AM, Timothy Arceriwrote: > For now we skip SI && HAVE_LLVM < 0x0600 for simplicity. We also skip > setting the more accurate masks for some builtin uniforms for now as > it causes some piglit regressions. > --- > src/gallium/drivers/radeonsi/si_shader.c | 8 +++ > src/gallium/drivers/radeonsi/si_shader_nir.c | 82 > ++-- > 2 files changed, 84 insertions(+), 6 deletions(-) > > diff --git a/src/gallium/drivers/radeonsi/si_shader.c > b/src/gallium/drivers/radeonsi/si_shader.c > index 62cb7ea7eb5..9a12f9ee8f2 100644 > --- a/src/gallium/drivers/radeonsi/si_shader.c > +++ b/src/gallium/drivers/radeonsi/si_shader.c > @@ -2377,8 +2377,16 @@ static LLVMValueRef load_const_buffer_desc(struct > si_shader_context *ctx, int i) > static LLVMValueRef load_ubo(struct ac_shader_abi *abi, LLVMValueRef > index) > { > struct si_shader_context *ctx = si_shader_context_from_abi(abi); > + struct si_shader_selector *sel = ctx->shader->selector; > + > LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, > ctx->param_const_and_shader_buffers); > > + if (sel->info.const_buffers_declared == 1 && > + sel->info.shader_buffers_declared == 0 && > + !(ctx->screen->info.chip_class == SI && HAVE_LLVM < 0x0600)) { > You don't have to check SI and LLVM here. (because const_buffers_declared > 1) > + return load_const_buffer_desc_fast_path(ctx); > + } > + > index = si_llvm_bound_index(ctx, index, ctx->num_const_buffers); > index = LLVMBuildAdd(ctx->ac.builder, index, > LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS, > 0), ""); > diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c > b/src/gallium/drivers/radeonsi/si_shader_nir.c > index 52950668714..595f376f6a2 100644 > --- a/src/gallium/drivers/radeonsi/si_shader_nir.c > +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c > @@ -611,23 +611,91 @@ void si_nir_scan_shader(const struct nir_shader *nir, > > info->num_outputs = num_outputs; > > + struct set *ubo_set = _mesa_set_create(NULL, _mesa_hash_pointer, > + _mesa_key_pointer_equal); > + > + unsigned ubo_idx = 1; > nir_foreach_variable(variable, >uniforms) { > const struct glsl_type *type = variable->type; > enum glsl_base_type base_type = > glsl_get_base_type(glsl_without_array(type)); > unsigned aoa_size = MAX2(1, glsl_get_aoa_size(type)); > > + /* Gather buffers declared bitmasks. Note: radeonsi doesn't > +* really use the mask (other than ubo_idx == 1 for regular > +* uniforms) its really only used for getting the buffer > count > +* so we don't need to worry about the ordering. > +*/ > + if (variable->interface_type != NULL) { > + if (variable->data.mode == nir_var_uniform) { > + > + unsigned block_size; > + if (base_type != GLSL_TYPE_INTERFACE) { > + struct set_entry *entry = > + _mesa_set_search(ubo_set, > variable->interface_type); > + > + /* Check if we have already > processed > +* a member from this ubo. > +*/ > + if (entry) > + continue; > + > + block_size = 1; > + } else { > + block_size = aoa_size; > + } > + > + info->const_buffers_declared |= > u_bit_consecutive(ubo_idx, block_size); > + ubo_idx += block_size; > Can you explain what this does? > + > + _mesa_set_add(ubo_set, > variable->interface_type); > + } > + > + if (variable->data.mode == nir_var_shader_storage) > { > + /* TODO: make this more accurate */ > + info->shader_buffers_declared = > + u_bit_consecutive(0, > SI_NUM_SHADER_BUFFERS); > + } > + > + continue; > + } > + > /* We rely on the fact that nir_lower_samplers_as_deref has > * eliminated struct dereferences. > */ > - if (base_type == GLSL_TYPE_SAMPLER) > + if (base_type == GLSL_TYPE_SAMPLER) { > info->samplers_declared |= > u_bit_consecutive(variable->data.binding, >
Re: [Mesa-dev] [PATCH 3/4] radeonsi: create load_const_buffer_desc_fast_path() helper
The driver should ignore the nir option on SI with LLVM <= 0x0500. Marek On Tue, Mar 27, 2018 at 12:19 AM, Timothy Arceriwrote: > This will be shared by the TGSI and NIR backends. For simplicity > we leave the SI LLVM 5.0 and lower work around only in the TGSI > backend. > --- > src/gallium/drivers/radeonsi/si_shader.c | 88 > ++-- > 1 file changed, 49 insertions(+), 39 deletions(-) > > diff --git a/src/gallium/drivers/radeonsi/si_shader.c > b/src/gallium/drivers/radeonsi/si_shader.c > index d5607a99d32..62cb7ea7eb5 100644 > --- a/src/gallium/drivers/radeonsi/si_shader.c > +++ b/src/gallium/drivers/radeonsi/si_shader.c > @@ -2322,6 +2322,49 @@ void si_tgsi_declare_compute_memory(struct > si_shader_context *ctx, > si_declare_compute_memory(ctx); > } > > +static LLVMValueRef load_const_buffer_desc_fast_path(struct > si_shader_context *ctx) > +{ > + LLVMValueRef ptr = > + LLVMGetParam(ctx->main_fn, ctx->param_const_and_shader_ > buffers); > + struct si_shader_selector *sel = ctx->shader->selector; > + > + /* Do the bounds checking with a descriptor, because > +* doing computation and manual bounds checking of 64-bit > +* addresses generates horrible VALU code with very high > +* VGPR usage and very low SIMD occupancy. > +*/ > + ptr = LLVMBuildPtrToInt(ctx->ac.builder, ptr, ctx->ac.intptr, ""); > + > + LLVMValueRef desc0, desc1; > + if (HAVE_32BIT_POINTERS) { > + desc0 = ptr; > + desc1 = LLVMConstInt(ctx->i32, > + > S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), > 0); > + } else { > + ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ctx->v2i32, > ""); > + desc0 = LLVMBuildExtractElement(ctx->ac.builder, ptr, > ctx->i32_0, ""); > + desc1 = LLVMBuildExtractElement(ctx->ac.builder, ptr, > ctx->i32_1, ""); > + /* Mask out all bits except BASE_ADDRESS_HI. */ > + desc1 = LLVMBuildAnd(ctx->ac.builder, desc1, > +LLVMConstInt(ctx->i32, > ~C_008F04_BASE_ADDRESS_HI, 0), ""); > + } > + > + LLVMValueRef desc_elems[] = { > + desc0, > + desc1, > + LLVMConstInt(ctx->i32, (sel->info.const_file_max[0] + 1) * > 16, 0), > + LLVMConstInt(ctx->i32, > + S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | > + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | > + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | > + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | > + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) > | > + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32), > 0) > + }; > + > + return ac_build_gather_values(>ac, desc_elems, 4); > +} > + > static LLVMValueRef load_const_buffer_desc(struct si_shader_context *ctx, > int i) > { > LLVMValueRef list_ptr = LLVMGetParam(ctx->main_fn, > @@ -2400,8 +2443,6 @@ static LLVMValueRef fetch_constant( > /* Fast path when user data SGPRs point to constant buffer 0 > directly. */ > if (sel->info.const_buffers_declared == 1 && > sel->info.shader_buffers_declared == 0) { > - LLVMValueRef ptr = > - LLVMGetParam(ctx->main_fn, > ctx->param_const_and_shader_buffers); > > /* This enables use of s_load_dword and flat_load_dword > for const buffer 0 > * loads, and up to x4 load opcode merging. However, it > leads to horrible > @@ -2416,48 +2457,17 @@ static LLVMValueRef fetch_constant( > * s_buffer_load_dword (that we have to prevent) is when > we use use > * a literal offset where we don't need bounds checking. > */ > - if (ctx->screen->info.chip_class == SI && > -HAVE_LLVM < 0x0600 && > -!reg->Register.Indirect) { > + if (ctx->screen->info.chip_class == SI && HAVE_LLVM < > 0x0600 && > + !reg->Register.Indirect) { > + LLVMValueRef ptr = > + LLVMGetParam(ctx->main_fn, > ctx->param_const_and_shader_buffers); > + > addr = LLVMBuildLShr(ctx->ac.builder, addr, > LLVMConstInt(ctx->i32, 2, 0), ""); > LLVMValueRef result = > ac_build_load_invariant(>ac, ptr, addr); > return bitcast(bld_base, type, result); > } > > - /* Do the bounds checking with a descriptor, because > -* doing computation and manual bounds checking of 64-bit > -* addresses generates horrible VALU code with very high > -* VGPR usage and very low SIMD occupancy. > -*/ > -
Re: [Mesa-dev] [PATCH] ac/nir: Fix include for LLVMAddPromoteMemoryToRegisterPass
On Fri, 30 Mar 2018, 01:42 Dylan Baker,wrote: > Quoting Mike Lothian (2018-03-29 16:56:28) > > Include llvm-c/Transforms/Utils.h with the newest LLVM 7 > > > > Fixes: 2dd4f35c7fc llvm-c: Split Utils out of Scalar.h > > I have no comment on the patch itself. > > JFYI, this tag ("Fixes") has specific meaning in mesa and should only > refer to > mesa commits (we have scripts for stable releases that parse that > information). > > Dylan > Ah sorry about that I also think ac/nir only covers the first file and not the other two, which are gallivm and radeonsi respectively I think > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] ac/nir: Fix include for LLVMAddPromoteMemoryToRegisterPass
Quoting Mike Lothian (2018-03-29 16:56:28) > Include llvm-c/Transforms/Utils.h with the newest LLVM 7 > > Fixes: 2dd4f35c7fc llvm-c: Split Utils out of Scalar.h I have no comment on the patch itself. JFYI, this tag ("Fixes") has specific meaning in mesa and should only refer to mesa commits (we have scripts for stable releases that parse that information). Dylan signature.asc Description: signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 06/61] nir/validator: Validate that all used variables exist
On Thu, Mar 29, 2018 at 2:19 PM, Kenneth Graunkewrote: > On Friday, March 23, 2018 2:42:12 PM PDT Jason Ekstrand wrote: > > We were validating this for locals but nothing else. > > --- > > src/compiler/nir/nir_validate.c | 16 +--- > > 1 file changed, 9 insertions(+), 7 deletions(-) > > > > diff --git a/src/compiler/nir/nir_validate.c b/src/compiler/nir/nir_ > validate.c > > index a49948f..e9d6bd5 100644 > > --- a/src/compiler/nir/nir_validate.c > > +++ b/src/compiler/nir/nir_validate.c > > @@ -96,7 +96,9 @@ typedef struct { > > /* bitset of registers we have currently found; used to check > uniqueness */ > > BITSET_WORD *regs_found; > > > > - /* map of local variable -> function implementation where it is > defined */ > > + /* map of variable -> function implementation where it is defined or > NULL > > +* if it is a global variable > > +*/ > > struct hash_table *var_defs; > > > > /* map of instruction/var/etc to failed assert string */ > > @@ -448,12 +450,10 @@ validate_deref_chain(nir_deref *deref, > nir_variable_mode mode, > > static void > > validate_var_use(nir_variable *var, validate_state *state) > > { > > - if (var->data.mode == nir_var_local) { > > - struct hash_entry *entry = _mesa_hash_table_search(state->var_defs, > var); > > - > > - validate_assert(state, entry); > > + struct hash_entry *entry = _mesa_hash_table_search(state->var_defs, > var); > > + validate_assert(state, entry); > > + if (var->data.mode == nir_var_local) > >validate_assert(state, (nir_function_impl *) entry->data == > state->impl); > > - } > > } > > > > static void > > @@ -1000,7 +1000,9 @@ validate_var_decl(nir_variable *var, bool > is_global, validate_state *state) > > * support) > > */ > > > > - if (!is_global) { > > + if (is_global) { > > + _mesa_hash_table_insert(state->var_defs, var, NULL); > > + } else { > >_mesa_hash_table_insert(state->var_defs, var, state->impl); > > } > > I'd personally do > >_mesa_hash_table_insert(state->var_defs, var, >is_global ? NULL : state->impl); > > since we want to insert into the same set either way, just with NULL for > the impl if there isn't one. Doesn't matter though, your call. > Good call > Patches 1-6 are: > Reviewed-by: Kenneth Graunke > Thanks! ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] ac/nir: Fix include for LLVMAddPromoteMemoryToRegisterPass
Include llvm-c/Transforms/Utils.h with the newest LLVM 7 Fixes: 2dd4f35c7fc llvm-c: Split Utils out of Scalar.h Signed-of-by: Mike Lothian--- I don't have git commit rights If you're happy with this patch please can you commit it Cheers Mike --- src/amd/vulkan/radv_nir_to_llvm.c | 3 +++ src/gallium/auxiliary/gallivm/lp_bld_init.c | 3 +++ src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c | 3 +++ 3 files changed, 9 insertions(+) diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index c6b4e8b532..2f0864da46 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -32,6 +32,9 @@ #include #include #include +#if HAVE_LLVM >= 0x0700 +#include +#endif #include "sid.h" #include "gfx9d.h" diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c index 6ddc509a81..dae9d01552 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -40,6 +40,9 @@ #include #include +#if HAVE_LLVM >= 0x0700 +#include +#endif #include diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c index 79fdebe838..622fd26950 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c @@ -40,6 +40,9 @@ #include #include #include +#if HAVE_LLVM >= 0x0700 +#include +#endif enum si_llvm_calling_convention { RADEON_LLVM_AMDGPU_VS = 87, -- 2.16.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH, v2] CHROMIUM: configure.ac/meson.build: Fix -latomic test
On Fri, Mar 30, 2018 at 2:26 AM, Matt Turnerwrote: > On Thu, Mar 29, 2018 at 1:31 AM, Nicolas Boichat > wrote: >> From: Nicolas Boichat >> >> When compiling with LLVM 6.0, the test fails to detect that >> -latomic is actually required, as the atomic call is inlined. >> >> In the code itself (src/util/disk_cache.c), we see this pattern: >> p_atomic_add(cache->size, - (uint64_t)size); >> where cache->size is an uint64_t *, and results in the following >> link time error without -latomic: >> src/util/disk_cache.c:628: error: undefined reference to >> '__atomic_fetch_add_8' >> >> Fix the configure/meson test to replicate this pattern, which then >> correctly realizes the need for -latomic. >> >> Signed-off-by: Nicolas Boichat >> --- >> >> Changes since v1: >> - Updated meson.build as well (untested) >> >> configure.ac | 6 -- >> meson.build | 6 -- >> 2 files changed, 8 insertions(+), 4 deletions(-) >> >> diff --git a/configure.ac b/configure.ac >> index e874f8ebfb2..eff9a0ef88f 100644 >> --- a/configure.ac >> +++ b/configure.ac >> @@ -445,9 +445,11 @@ if test "x$GCC_ATOMIC_BUILTINS_SUPPORTED" = x1; then >> AC_MSG_CHECKING(whether -latomic is needed) >> AC_LINK_IFELSE([AC_LANG_SOURCE([[ >> #include >> -uint64_t v; >> +struct { >> +uint64_t* v; > > I wouldn't care expect that you put the * with the v in the Meson case. :) Argh ,-( I'll send a v3, let's see if anyone has further comments, first. > Also, on what platform does this occur? This is ARC++ (Android 32-bit x86) with clang version: Android (4639204 based on r316199) clang version 6.0.1 (https://android.googlesource.com/toolchain/clang 279c0d3a962121a6d1d535e7b0b5d9d36d3c829d) (https://android.googlesource.com/toolchain/llvm aadd87ffb6a2eafcb577913073d46b20195a9cdc) (based on LLVM 6.0.1svn) > Looking at this code, I would expect it to behave the same as before. > Do you have an idea why this fixes it, or why the original code didn't > work? I'm guess it's about the compiler's ability to recognize that it > knows the location of the variable. With the original code, objdump looks like this: 08048400 : 8048400: 53 push %ebx 8048401: 56 push %esi 8048402: e8 00 00 00 00 call 8048407 8048407: 5e pop%esi 8048408: 81 c6 ed 1b 00 00 add$0x1bed,%esi 804840e: 31 c0 xor%eax,%eax 8048410: 31 d2 xor%edx,%edx 8048412: 31 c9 xor%ecx,%ecx 8048414: 31 db xor%ebx,%ebx 8048416: f0 0f c7 8e 24 00 00lock cmpxchg8b 0x24(%esi) 804841d: 00 804841e: 5e pop%esi 804841f: 5b pop%ebx 8048420: c3 ret Looks like LLVM figures out that is constant, and uses some 64-bit atomic swap operations on it directly. With the updated code (building with -latomic, it fails otherwise) 08048480 : 8048480: 53 push %ebx 8048481: 83 ec 08sub$0x8,%esp 8048484: e8 00 00 00 00 call 8048489 8048489: 5b pop%ebx 804848a: 81 c3 6b 1b 00 00 add$0x1b6b,%ebx 8048490: 83 ec 08sub$0x8,%esp 8048493: 6a 02 push $0x2 8048495: ff b3 8c 10 00 00 pushl 0x108c(%ebx) 804849b: e8 05 00 00 00 call 80484a5 <__atomic_load_8> 80484a0: 83 c4 18add$0x18,%esp 80484a3: 5b pop%ebx 80484a4: c3 ret I think the the code is trying to protect both x.v (address) _and_ its value *x.v? Or maybe LLVM does not see the pattern... (I don't see why cmpxchg8b wouldn't work here too, otherwise...) Actually, the test can be made simpler, by just using: uint64_t *v; ... __atomic_load_n(v, ... But then it does not match the usage pattern in the code, so I feel a little bit more confident that the current test will actually capture when -latomic is needed. Thanks, ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [AppVeyor] mesa master #7334 failed
Build mesa 7334 failed Commit 19e0dd1ad3 by Ian Romanick on 10/18/2017 3:59 PM: i965: Don't request GLSL IR lowering of gl_VertexID\n\nLet the lowering in NIR handle it instead.\n\nThis hurts one shader that occurs twice in shader-db (SynMark GSCloth)\non IVB and HSW. No other shaders or platforms were affected.\n\ntotal cycles in shared programs: 253438422 -> 253438426 (0.00%)\ncycles in affected programs: 412 -> 416 (0.97%)\nhelped: 0\nHURT: 2\n\nSigned-off-by: Ian Romanick\nReviewed-by: Antia Puentes Configure your notification preferences ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] nir: add support for 4 constant offsets in tg4
Nvidia hardware can do that natively so there is no need to lower that to four TG4s instructions. Signed-off-by: Karol Herbst--- src/compiler/glsl/glsl_to_nir.cpp | 25 ++--- src/compiler/nir/nir.h| 9 - src/compiler/nir/nir_print.c | 9 + 3 files changed, 35 insertions(+), 8 deletions(-) diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index c4a6d52a5b2..4ea5f1616a7 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -2042,7 +2042,9 @@ nir_visitor::visit(ir_texture *ir) num_srcs++; if (ir->shadow_comparator != NULL) num_srcs++; - if (ir->offset != NULL) + if (ir->offset != NULL && ir->offset->type->is_array()) + num_srcs += ir->offset->type->array_size(); + else if (ir->offset != NULL) num_srcs++; nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs); @@ -2097,12 +2099,21 @@ nir_visitor::visit(ir_texture *ir) if (ir->offset != NULL) { /* we don't support multiple offsets yet */ - assert(ir->offset->type->is_vector() || ir->offset->type->is_scalar()); - - instr->src[src_number].src = - nir_src_for_ssa(evaluate_rvalue(ir->offset)); - instr->src[src_number].src_type = nir_tex_src_offset; - src_number++; + if (ir->offset->type->is_vector() || ir->offset->type->is_scalar()) { + instr->src[src_number].src = +nir_src_for_ssa(evaluate_rvalue(ir->offset)); + instr->src[src_number].src_type = nir_tex_src_offset; + src_number++; + } else if (ir->offset->type->is_array()) { + for (int i = 0; i < ir->offset->type->array_size(); i++) { +instr->src[src_number].src = + nir_src_for_ssa(evaluate_rvalue(ir->offset->as_constant()->get_array_element(i)->as_rvalue())); +instr->src[src_number].src_type = (nir_tex_src_type)(nir_tex_src_offset + i); +src_number++; + } + } else { + assert(false); + } } switch (ir->op) { diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 9fff1f4647d..7b02c4af05f 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -1175,6 +1175,9 @@ typedef enum { nir_tex_src_projector, nir_tex_src_comparator, /* shadow comparator */ nir_tex_src_offset, + nir_tex_src_offset1, + nir_tex_src_offset2, + nir_tex_src_offset3, nir_tex_src_bias, nir_tex_src_lod, nir_tex_src_ms_index, /* MSAA sample index */ @@ -1377,6 +1380,9 @@ nir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src) return nir_type_float; case nir_tex_src_offset: + case nir_tex_src_offset1: + case nir_tex_src_offset2: + case nir_tex_src_offset3: case nir_tex_src_ms_index: case nir_tex_src_texture_offset: case nir_tex_src_sampler_offset: @@ -1408,7 +1414,8 @@ nir_tex_instr_src_size(const nir_tex_instr *instr, unsigned src) /* Usual APIs don't allow cube + offset, but we allow it, with 2 coords for * the offset, since a cube maps to a single face. */ - if (instr->src[src].src_type == nir_tex_src_offset) { + if (instr->src[src].src_type >= nir_tex_src_offset && + instr->src[src].src_type <= nir_tex_src_offset3) { if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) return 2; else if (instr->is_array) diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index 21f13097651..e13a4f9aa6d 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -751,6 +751,15 @@ print_tex_instr(nir_tex_instr *instr, print_state *state) case nir_tex_src_offset: fprintf(fp, "(offset)"); break; + case nir_tex_src_offset1: + fprintf(fp, "(offset1)"); + break; + case nir_tex_src_offset2: + fprintf(fp, "(offset2)"); + break; + case nir_tex_src_offset3: + fprintf(fp, "(offset3)"); + break; case nir_tex_src_bias: fprintf(fp, "(bias)"); break; -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 06/61] nir/validator: Validate that all used variables exist
On Friday, March 23, 2018 2:42:12 PM PDT Jason Ekstrand wrote: > We were validating this for locals but nothing else. > --- > src/compiler/nir/nir_validate.c | 16 +--- > 1 file changed, 9 insertions(+), 7 deletions(-) > > diff --git a/src/compiler/nir/nir_validate.c b/src/compiler/nir/nir_validate.c > index a49948f..e9d6bd5 100644 > --- a/src/compiler/nir/nir_validate.c > +++ b/src/compiler/nir/nir_validate.c > @@ -96,7 +96,9 @@ typedef struct { > /* bitset of registers we have currently found; used to check uniqueness > */ > BITSET_WORD *regs_found; > > - /* map of local variable -> function implementation where it is defined */ > + /* map of variable -> function implementation where it is defined or NULL > +* if it is a global variable > +*/ > struct hash_table *var_defs; > > /* map of instruction/var/etc to failed assert string */ > @@ -448,12 +450,10 @@ validate_deref_chain(nir_deref *deref, > nir_variable_mode mode, > static void > validate_var_use(nir_variable *var, validate_state *state) > { > - if (var->data.mode == nir_var_local) { > - struct hash_entry *entry = _mesa_hash_table_search(state->var_defs, > var); > - > - validate_assert(state, entry); > + struct hash_entry *entry = _mesa_hash_table_search(state->var_defs, var); > + validate_assert(state, entry); > + if (var->data.mode == nir_var_local) >validate_assert(state, (nir_function_impl *) entry->data == > state->impl); > - } > } > > static void > @@ -1000,7 +1000,9 @@ validate_var_decl(nir_variable *var, bool is_global, > validate_state *state) > * support) > */ > > - if (!is_global) { > + if (is_global) { > + _mesa_hash_table_insert(state->var_defs, var, NULL); > + } else { >_mesa_hash_table_insert(state->var_defs, var, state->impl); > } I'd personally do _mesa_hash_table_insert(state->var_defs, var, is_global ? NULL : state->impl); since we want to insert into the same set either way, just with NULL for the impl if there isn't one. Doesn't matter though, your call. Patches 1-6 are: Reviewed-by: Kenneth Graunkesignature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 103999] 4x MSAA with RG32F shows garbage on triangle edges
https://bugs.freedesktop.org/show_bug.cgi?id=103999 --- Comment #6 from Clément Guérin--- @maister could you host your apitrace and share it here? -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] gallium/util: Don't stub u_debug_stack on Android
On Thu, Mar 29, 2018 at 4:38 PM, Eric Engestromwrote: > On Thursday, 2018-03-29 00:19:00 +0200, Stefan Schake wrote: >> The fallback path for no libunwind ends up being stubs for Android. >> Don't compile them in so we can provide our own implementation. >> >> Signed-off-by: Stefan Schake >> --- >> src/gallium/auxiliary/util/u_debug_stack.c | 2 +- >> 1 file changed, 1 insertion(+), 1 deletion(-) >> >> diff --git a/src/gallium/auxiliary/util/u_debug_stack.c >> b/src/gallium/auxiliary/util/u_debug_stack.c >> index 846f648..5cbb54f 100644 >> --- a/src/gallium/auxiliary/util/u_debug_stack.c >> +++ b/src/gallium/auxiliary/util/u_debug_stack.c >> @@ -194,7 +194,7 @@ debug_backtrace_print(FILE *f, >> } >> } >> >> -#else /* ! HAVE_LIBUNWIND */ >> +#elif !defined(ANDROID) /* ! HAVE_LIBUNWIND */ > > I think I would prefer this, which is easier to extend in the future: > > #elif defined(ANDROID) > /* Not implemented here; see u_debug_stack_android.cpp */ > #else /* ! HAVE_LIBUNWIND */ > > But other than this, the series looks good to me :) > > This patch (preferably with the amendment I suggested) is: > Reviewed-by: Eric Engestrom > Thanks! I like your suggestion a lot more. I'll let the other patch stew some more, then send a v2. Thanks, Stefan ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 105730] [llvmpipe] 109 piglit failures, 19264 crashes on ppc (ppc64, mesa-18.0.0)
https://bugs.freedesktop.org/show_bug.cgi?id=105730 --- Comment #5 from erhar...@mailbox.org --- Created attachment 138435 --> https://bugs.freedesktop.org/attachment.cgi?id=138435=edit html summary from 'pigllit run all' (18.0.0) -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] intel/compiler: Explicitly cast register type in switch
On 03/29/2018 12:32 PM, Ian Romanick wrote: From: Ian Romanickbrw_reg::type is "enum brw_reg_type type:4". For whatever reason, GCC is treating this as an int instead of an enum. As a result, it doesn't detect missing switch cases and it doesn't detect that flow can get out of the switch. This silences the warning: src/intel/compiler/brw_reg.h: In function ‘bool brw_regs_negative_equal(const brw_reg*, const brw_reg*)’: src/intel/compiler/brw_reg.h:305:1: warning: control reaches end of non-void function [-Wreturn-type] } ^ Signed-off-by: Ian Romanick --- src/intel/compiler/brw_reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/intel/compiler/brw_reg.h b/src/intel/compiler/brw_reg.h index 68158cc0cc8..62f76ac0fe0 100644 --- a/src/intel/compiler/brw_reg.h +++ b/src/intel/compiler/brw_reg.h @@ -262,7 +262,7 @@ brw_regs_negative_equal(const struct brw_reg *a, const struct brw_reg *b) if (a->bits != b->bits) return false; - switch (a->type) { + switch ((enum brw_reg_type) a->type) { case BRW_REGISTER_TYPE_UQ: case BRW_REGISTER_TYPE_Q: return a->d64 == -b->d64; Reviewed-by: Brian Paul Thanks. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 105730] [llvmpipe] 109 piglit failures, 19264 crashes on ppc (ppc64, mesa-18.0.0)
https://bugs.freedesktop.org/show_bug.cgi?id=105730 --- Comment #4 from erhar...@mailbox.org --- Created attachment 138432 --> https://bugs.freedesktop.org/attachment.cgi?id=138432=edit results from 'pigllit run all' (18.0.0) -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 105775] F1 2017 crashes on GCN 1.0 cards
https://bugs.freedesktop.org/show_bug.cgi?id=105775 --- Comment #20 from Samuel Pitoiset--- FYI, I can actually reproduce the crash on Polaris, I will investigate tomorrow. Thansk for all the details. -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 105730] [llvmpipe] 109 piglit failures, 19264 crashes on ppc (ppc64, mesa-18.0.0)
https://bugs.freedesktop.org/show_bug.cgi?id=105730 erhar...@mailbox.org changed: What|Removed |Added Summary|[llvmpipe] 116 piglit |[llvmpipe] 109 piglit |failures, 19197 crashes on |failures, 19264 crashes on |ppc (ppc64, |ppc (ppc64, mesa-18.0.0) |mesa-18.0.0_rc5)| --- Comment #3 from erhar...@mailbox.org --- Re-run of the piglit suite with 18.0.0: T801 ~/build/piglit # ./piglit run sanity results/sanity [1/1] pass: 1 Thank you for running Piglit! Results have been written to /root/build/piglit/results/sanity T801 ~/build/piglit # ./piglit run all results/OpenGL_all_llvm Skipping GL_ARB_gpu_shader5 tests [53127/53127] skip: 23711, pass: 10043, fail: 109, crash: 19264 Thank you for running Piglit! Results have been written to /root/build/piglit/results/OpenGL_all_llvm ef@T801 ~ $ glxinfo | grep -i opengl OpenGL vendor string: VMware, Inc. OpenGL renderer string: llvmpipe (LLVM 5.0, 128 bits) OpenGL core profile version string: 3.3 (Core Profile) Mesa 18.0.0 OpenGL core profile shading language version string: 3.30 OpenGL core profile context flags: (none) OpenGL core profile profile mask: core profile OpenGL core profile extensions: OpenGL version string: 3.0 Mesa 18.0.0 OpenGL shading language version string: 1.30 OpenGL context flags: (none) OpenGL extensions: OpenGL ES profile version string: OpenGL ES 3.0 Mesa 18.0.0 OpenGL ES profile shading language version string: OpenGL ES GLSL ES 3.00 OpenGL ES profile extensions: -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] intel/compiler: Explicitly cast register type in switch
On Thu, Mar 29, 2018 at 11:32 AM, Ian Romanickwrote: > From: Ian Romanick > > brw_reg::type is "enum brw_reg_type type:4". For whatever reason, GCC > is treating this as an int instead of an enum. As a result, it doesn't > detect missing switch cases and it doesn't detect that flow can get out > of the switch. Weird. That is not what I would have expected GCC to do. Reviewed-by: Matt Turner ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] intel/compiler: Explicitly cast register type in switch
From: Ian Romanickbrw_reg::type is "enum brw_reg_type type:4". For whatever reason, GCC is treating this as an int instead of an enum. As a result, it doesn't detect missing switch cases and it doesn't detect that flow can get out of the switch. This silences the warning: src/intel/compiler/brw_reg.h: In function ‘bool brw_regs_negative_equal(const brw_reg*, const brw_reg*)’: src/intel/compiler/brw_reg.h:305:1: warning: control reaches end of non-void function [-Wreturn-type] } ^ Signed-off-by: Ian Romanick --- src/intel/compiler/brw_reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/intel/compiler/brw_reg.h b/src/intel/compiler/brw_reg.h index 68158cc0cc8..62f76ac0fe0 100644 --- a/src/intel/compiler/brw_reg.h +++ b/src/intel/compiler/brw_reg.h @@ -262,7 +262,7 @@ brw_regs_negative_equal(const struct brw_reg *a, const struct brw_reg *b) if (a->bits != b->bits) return false; - switch (a->type) { + switch ((enum brw_reg_type) a->type) { case BRW_REGISTER_TYPE_UQ: case BRW_REGISTER_TYPE_Q: return a->d64 == -b->d64; -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH, v2] CHROMIUM: configure.ac/meson.build: Fix -latomic test
On Thu, Mar 29, 2018 at 1:31 AM, Nicolas Boichatwrote: > From: Nicolas Boichat > > When compiling with LLVM 6.0, the test fails to detect that > -latomic is actually required, as the atomic call is inlined. > > In the code itself (src/util/disk_cache.c), we see this pattern: > p_atomic_add(cache->size, - (uint64_t)size); > where cache->size is an uint64_t *, and results in the following > link time error without -latomic: > src/util/disk_cache.c:628: error: undefined reference to > '__atomic_fetch_add_8' > > Fix the configure/meson test to replicate this pattern, which then > correctly realizes the need for -latomic. > > Signed-off-by: Nicolas Boichat > --- > > Changes since v1: > - Updated meson.build as well (untested) > > configure.ac | 6 -- > meson.build | 6 -- > 2 files changed, 8 insertions(+), 4 deletions(-) > > diff --git a/configure.ac b/configure.ac > index e874f8ebfb2..eff9a0ef88f 100644 > --- a/configure.ac > +++ b/configure.ac > @@ -445,9 +445,11 @@ if test "x$GCC_ATOMIC_BUILTINS_SUPPORTED" = x1; then > AC_MSG_CHECKING(whether -latomic is needed) > AC_LINK_IFELSE([AC_LANG_SOURCE([[ > #include > -uint64_t v; > +struct { > +uint64_t* v; I wouldn't care expect that you put the * with the v in the Meson case. :) Looking at this code, I would expect it to behave the same as before. Do you have an idea why this fixes it, or why the original code didn't work? I'm guess it's about the compiler's ability to recognize that it knows the location of the variable. Also, on what platform does this occur? ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] meson: fix warnings about comparing unlike types
Quoting Caio Marcelo de Oliveira Filho (2018-03-29 11:13:51) > On Thu, Mar 29, 2018 at 09:31:13AM -0700, Dylan Baker wrote: > > In the old days (0.42.x), when mesa's meson system was written the > > recommendation for handling conditional dependencies was to define them > > as empty lists. When meson would evaluate the dependencies of a target > > it would recursively flatten all of the arguments, and empty lists would > > be removed. There are some problems with this, among them that lists and > > dependencies have different methods (namely .found()), so the > > recommendation changed to use `declare_dependency()` for such cases. > > This has the advantage of providing a .found() method, so there is no > > need to do things like `dep_foo != [] and dep_foo.found()`. > > What about using dependency('', required: false) instead? > > http://mesonbuild.com/Reference-manual.html#dependency > > If dependency_name is '', the dependency is always not found. So > with required: false, this always returns a dependency object for > which the found() method returns false, and which can be passed > like any other dependency to the dependencies: keyword argument of > a build_target. This can be used to implement a dependency which > is sometimes not required e.g. in some branches of a conditional. > > Maybe even having a variable named notfound (or optional, or something > similar) with such dependency, and assign it to the other variables. > > From reading the docs for declare_dependency() it doesn't really > guarantee that found() would be false, while the approach above is > documented as what we want. > > > Thanks, > Caio I'll look at it again, I seem to remember that being added relatively recently (I would like to bump the dependency to 0.44 anyway, so maybe that's as good of a reason). I'll check and see what it says. Dylan signature.asc Description: signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] intel/compiler: fix return statement warning in brw_regs_negative_equal()
I have a theory about this warning... I'll either have a patch or an R-b today. On 03/28/2018 06:48 PM, Brian Paul wrote: > Silence a gcc warning about missing return value in non-void function. > For some reason, gcc 5.4.0 (at least) can't deduce that all else/if > cases return a value. > --- > src/intel/compiler/brw_reg.h | 2 ++ > 1 file changed, 2 insertions(+) > > diff --git a/src/intel/compiler/brw_reg.h b/src/intel/compiler/brw_reg.h > index 68158cc..0d2900a 100644 > --- a/src/intel/compiler/brw_reg.h > +++ b/src/intel/compiler/brw_reg.h > @@ -302,6 +302,8 @@ brw_regs_negative_equal(const struct brw_reg *a, const > struct brw_reg *b) > >return brw_regs_equal(, b); > } > + > + return false; /* silence compiler warning */ > } > > struct brw_indirect { > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] meson: don't use compiler.has_header
This should be nominated for stable Quoting Dylan Baker (2018-03-12 11:23:23) > Meson's compiler.has_header is completely useless, it only checks that a > header exists, not whether it's usable. This creates problems if a > header contains a conditional #error declaration, like so: > > > #if __x86_64__ > > # error "Doesn't work with x86_64!" > > #endif > > Compiler.has_header will return true in this case, even when compiling > for x86_64. This is useless. > > Instead, we'll do a compile check so that any #error declarations will > be treated as errors, and compilation will work. > > Fixes compilation on x32 architecture. > > Gentoo Bugzilla: https://bugs.gentoo.org/show_bug.cgi?id=649746 > meson bug: https://github.com/mesonbuild/meson/issues/2246 > CC: Matt Turner> Signed-off-by: Dylan Baker > --- > meson.build | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/meson.build b/meson.build > index 3c63f384381..51b470253f5 100644 > --- a/meson.build > +++ b/meson.build > @@ -912,7 +912,7 @@ elif cc.has_header_symbol('sys/mkdev.h', 'major') > endif > > foreach h : ['xlocale.h', 'sys/sysctl.h', 'linux/futex.h'] > - if cc.has_header(h) > + if cc.compiles('#include <@0@>'.format(h), name : '@0@ works'.format(h)) > pre_args += '-DHAVE_@0@'.format(h.to_upper().underscorify()) >endif > endforeach > -- > 2.16.2 > signature.asc Description: signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] meson: fix warnings about comparing unlike types
On Thu, Mar 29, 2018 at 09:31:13AM -0700, Dylan Baker wrote: > In the old days (0.42.x), when mesa's meson system was written the > recommendation for handling conditional dependencies was to define them > as empty lists. When meson would evaluate the dependencies of a target > it would recursively flatten all of the arguments, and empty lists would > be removed. There are some problems with this, among them that lists and > dependencies have different methods (namely .found()), so the > recommendation changed to use `declare_dependency()` for such cases. > This has the advantage of providing a .found() method, so there is no > need to do things like `dep_foo != [] and dep_foo.found()`. What about using dependency('', required: false) instead? http://mesonbuild.com/Reference-manual.html#dependency If dependency_name is '', the dependency is always not found. So with required: false, this always returns a dependency object for which the found() method returns false, and which can be passed like any other dependency to the dependencies: keyword argument of a build_target. This can be used to implement a dependency which is sometimes not required e.g. in some branches of a conditional. Maybe even having a variable named notfound (or optional, or something similar) with such dependency, and assign it to the other variables. From reading the docs for declare_dependency() it doesn't really guarantee that found() would be false, while the approach above is documented as what we want. Thanks, Caio ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v5 12/19] i965: Add aux_buf variable to simplify code.
In a follow up patch, we make use of clear_color_bo, which is in mt->mcs_buf or mt->hiz_buf. To avoid duplicating more code that does the same thing on both aux buffers, just use aux_buf already. v5: Add aux_buf to brw_wm_surface_state too. Signed-off-by: Rafael Antognolli--- src/mesa/drivers/dri/i965/brw_blorp.c| 19 +++ src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 15 --- 2 files changed, 15 insertions(+), 19 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c b/src/mesa/drivers/dri/i965/brw_blorp.c index 44064fc0cf3..a0977598309 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.c +++ b/src/mesa/drivers/dri/i965/brw_blorp.c @@ -155,10 +155,13 @@ blorp_surf_for_miptree(struct brw_context *brw, }; struct isl_surf *aux_surf = NULL; + struct intel_miptree_aux_buffer *aux_buf = NULL; if (mt->mcs_buf) - aux_surf = >mcs_buf->surf; + aux_buf = mt->mcs_buf; else if (mt->hiz_buf) - aux_surf = >hiz_buf->surf; + aux_buf = mt->hiz_buf; + + aux_surf = _buf->surf; if (mt->format == MESA_FORMAT_S_UINT8 && is_render_target && devinfo->gen <= 7) @@ -180,16 +183,8 @@ blorp_surf_for_miptree(struct brw_context *brw, .mocs = surf->addr.mocs, }; - if (mt->mcs_buf) { - surf->aux_addr.buffer = mt->mcs_buf->bo; - surf->aux_addr.offset = mt->mcs_buf->offset; - } else { - assert(mt->hiz_buf); - assert(surf->aux_usage == ISL_AUX_USAGE_HIZ); - - surf->aux_addr.buffer = mt->hiz_buf->bo; - surf->aux_addr.offset = mt->hiz_buf->offset; - } + surf->aux_addr.buffer = aux_buf->bo; + surf->aux_addr.offset = aux_buf->offset; } else { surf->aux_addr = (struct blorp_address) { .buffer = NULL, diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index caa92d7d878..ea855916403 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -152,22 +152,19 @@ brw_emit_surface_state(struct brw_context *brw, union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } }; - struct brw_bo *aux_bo; + struct brw_bo *aux_bo = NULL; struct isl_surf *aux_surf = NULL; uint64_t aux_offset = 0; + struct intel_miptree_aux_buffer *aux_buf = NULL; switch (aux_usage) { case ISL_AUX_USAGE_MCS: case ISL_AUX_USAGE_CCS_D: case ISL_AUX_USAGE_CCS_E: - aux_surf = >mcs_buf->surf; - aux_bo = mt->mcs_buf->bo; - aux_offset = mt->mcs_buf->offset; + aux_buf = mt->mcs_buf; break; case ISL_AUX_USAGE_HIZ: - aux_surf = >hiz_buf->surf; - aux_bo = mt->hiz_buf->bo; - aux_offset = 0; + aux_buf = mt->hiz_buf; break; case ISL_AUX_USAGE_NONE: @@ -175,6 +172,10 @@ brw_emit_surface_state(struct brw_context *brw, } if (aux_usage != ISL_AUX_USAGE_NONE) { + aux_surf = _buf->surf; + aux_bo = aux_buf->bo; + aux_offset = aux_buf->offset; + /* We only really need a clear color if we also have an auxiliary * surface. Without one, it does nothing. */ -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v5 19/19] intel: Remove use_clear_address flag from isl_surf_fill_state_info.
This flag was used while porting parts of the code to use the clear color address, but other parts were not ported yet. So isl had to be flexible enough to support both cases. Now that the code is using exclusively clear color address for everything Gen10+, we don't need it anymore. Signed-off-by: Rafael AntognolliReviewed-by: Jason Ekstrand Reviewed-by: Jordan Justen --- src/intel/blorp/blorp_genX_exec.h| 4 src/intel/isl/isl.h | 7 +++ src/intel/isl/isl_surface_state.c| 21 +++-- src/intel/vulkan/anv_image.c | 1 - src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 7 ++- 5 files changed, 12 insertions(+), 28 deletions(-) diff --git a/src/intel/blorp/blorp_genX_exec.h b/src/intel/blorp/blorp_genX_exec.h index 7851228d8dc..889e206b72a 100644 --- a/src/intel/blorp/blorp_genX_exec.h +++ b/src/intel/blorp/blorp_genX_exec.h @@ -1314,15 +1314,11 @@ blorp_emit_surface_state(struct blorp_batch *batch, write_disable_mask |= ISL_CHANNEL_ALPHA_BIT; } - const bool use_clear_address = - GEN_GEN >= 10 && (surface->clear_color_addr.buffer != NULL); - isl_surf_fill_state(batch->blorp->isl_dev, state, .surf = , .view = >view, .aux_surf = >aux_surf, .aux_usage = aux_usage, .mocs = surface->addr.mocs, .clear_color = surface->clear_color, - .use_clear_address = use_clear_address, .write_disables = write_disable_mask); blorp_surface_reloc(batch, state_offset + isl_dev->ss.addr_offset, diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h index c50b78d4701..d65c621a732 100644 --- a/src/intel/isl/isl.h +++ b/src/intel/isl/isl.h @@ -1308,12 +1308,11 @@ struct isl_surf_fill_state_info { union isl_color_value clear_color; /** -* Send only the clear value address +* The address of the clear color state buffer * -* If set, we only pass the clear address to the GPU and it will fetch it -* from wherever it is. +* On gen10+, we use an address to the indirect clear color, stored in a +* state buffer. */ - bool use_clear_address; uint64_t clear_address; /** diff --git a/src/intel/isl/isl_surface_state.c b/src/intel/isl/isl_surface_state.c index bff9693f02d..77931f25aa3 100644 --- a/src/intel/isl/isl_surface_state.c +++ b/src/intel/isl/isl_surface_state.c @@ -637,21 +637,14 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state, #endif if (info->aux_usage != ISL_AUX_USAGE_NONE) { - if (info->use_clear_address) { #if GEN_GEN >= 10 - s.ClearValueAddressEnable = true; - s.ClearValueAddress = info->clear_address; -#else - unreachable("Gen9 and earlier do not support indirect clear colors"); -#endif - } -#if GEN_GEN >= 9 - if (!info->use_clear_address) { - s.RedClearColor = info->clear_color.u32[0]; - s.GreenClearColor = info->clear_color.u32[1]; - s.BlueClearColor = info->clear_color.u32[2]; - s.AlphaClearColor = info->clear_color.u32[3]; - } + s.ClearValueAddressEnable = true; + s.ClearValueAddress = info->clear_address; +#elif GEN_GEN >= 9 + s.RedClearColor = info->clear_color.u32[0]; + s.GreenClearColor = info->clear_color.u32[1]; + s.BlueClearColor = info->clear_color.u32[2]; + s.AlphaClearColor = info->clear_color.u32[3]; #elif GEN_GEN >= 7 /* Prior to Sky Lake, we only have one bit for the clear color which * gives us 0 or 1 in whatever the surface's format happens to be. diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index a941559eb3a..7f16b3dd5f2 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -1162,7 +1162,6 @@ anv_image_fill_surface_state(struct anv_device *device, .aux_usage = aux_usage, .aux_address = aux_address, .clear_address = clear_address.offset, - .use_clear_address = clear_address.bo != NULL, .mocs = device->default_mocs, .x_offset_sa = tile_x_sa, .y_offset_sa = tile_y_sa); diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 3fb101bf68b..d20d2b44e53 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -187,11 +187,9 @@ brw_emit_surface_state(struct brw_context *brw, brw->isl_dev.ss.align, surf_offset); - bool use_clear_address = devinfo->gen >= 10 && aux_surf; - struct brw_bo *clear_bo = NULL;
[Mesa-dev] [PATCH v5 17/19] anv: Use clear address for HiZ fast clears too.
Store the default clear address for HiZ fast clears on a global bo, and point to it when needed. Signed-off-by: Rafael AntognolliReviewed-by: Jason Ekstrand Reviewed-by: Jordan Justen --- src/intel/vulkan/anv_device.c | 19 +++ src/intel/vulkan/anv_image.c | 10 +++--- src/intel/vulkan/anv_private.h | 1 + 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index d400a1328b4..7522b7865c2 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -1422,6 +1422,20 @@ vk_priority_to_gen(int priority) } } +static void +anv_device_init_hiz_clear_batch(struct anv_device *device) +{ + anv_bo_init_new(>hiz_clear_bo, device, 4096); + uint32_t *map = anv_gem_mmap(device, device->hiz_clear_bo.gem_handle, +0, 4096, 0); + + union isl_color_value hiz_clear = { .u32 = { 0, } }; + hiz_clear.f32[0] = ANV_HZ_FC_VAL; + + memcpy(map, hiz_clear.u32, sizeof(hiz_clear.u32)); + anv_gem_munmap(map, device->hiz_clear_bo.size); +} + VkResult anv_CreateDevice( VkPhysicalDevicephysicalDevice, const VkDeviceCreateInfo* pCreateInfo, @@ -1602,6 +1616,9 @@ VkResult anv_CreateDevice( anv_device_init_trivial_batch(device); + if (device->info.gen >= 10) + anv_device_init_hiz_clear_batch(device); + anv_scratch_pool_init(device, >scratch_pool); anv_queue_init(device, >queue); @@ -1695,6 +1712,8 @@ void anv_DestroyDevice( anv_gem_close(device, device->workaround_bo.gem_handle); anv_gem_close(device, device->trivial_batch_bo.gem_handle); + if (device->info.gen >= 10) + anv_gem_close(device, device->hiz_clear_bo.gem_handle); anv_state_pool_finish(>surface_state_pool); anv_state_pool_finish(>instruction_state_pool); diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index da4601ce20e..a941559eb3a 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -1061,9 +1061,13 @@ anv_image_fill_surface_state(struct anv_device *device, struct anv_address clear_address = { .bo = NULL }; state_inout->clear_address = 0; - if (device->info.gen >= 10 && aux_usage != ISL_AUX_USAGE_NONE && - aux_usage != ISL_AUX_USAGE_HIZ) { - clear_address = anv_image_get_clear_color_addr(device, image, aspect); + + if (device->info.gen >= 10 && aux_usage != ISL_AUX_USAGE_NONE) { + if (aux_usage == ISL_AUX_USAGE_HIZ) { + clear_address = (struct anv_address) { .bo = >hiz_clear_bo }; + } else { + clear_address = anv_image_get_clear_color_addr(device, image, aspect); + } } if (view_usage == ISL_SURF_USAGE_STORAGE_BIT && diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 1e352576e22..3d0adfda558 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -895,6 +895,7 @@ struct anv_device { struct anv_bo workaround_bo; struct anv_bo trivial_batch_bo; +struct anv_bo hiz_clear_bo; struct anv_pipeline_cache blorp_shader_cache; struct blorp_contextblorp; -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v5 07/19] intel/blorp: Add support for fast clear address.
On gen10+, if surface->clear_color_addr is present, use it directly intead of copying it to the surface state. v4: Remove redundant #if clause for GEN <= 10 (Jason) v5: Move flush after the reloc, and keep lower bits (Topi). Signed-off-by: Rafael AntognolliReviewed-by: Jason Ekstrand Reviewed-by: Jordan Justen --- src/intel/blorp/blorp_genX_exec.h | 18 +- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/intel/blorp/blorp_genX_exec.h b/src/intel/blorp/blorp_genX_exec.h index f3a96fbd58c..65c06c0ed5e 100644 --- a/src/intel/blorp/blorp_genX_exec.h +++ b/src/intel/blorp/blorp_genX_exec.h @@ -1313,11 +1313,15 @@ blorp_emit_surface_state(struct blorp_batch *batch, write_disable_mask |= ISL_CHANNEL_ALPHA_BIT; } + const bool use_clear_address = + GEN_GEN >= 10 && (surface->clear_color_addr.buffer != NULL); + isl_surf_fill_state(batch->blorp->isl_dev, state, .surf = , .view = >view, .aux_surf = >aux_surf, .aux_usage = aux_usage, .mocs = surface->addr.mocs, .clear_color = surface->clear_color, + .use_clear_address = use_clear_address, .write_disables = write_disable_mask); blorp_surface_reloc(batch, state_offset + isl_dev->ss.addr_offset, @@ -1334,12 +1338,14 @@ blorp_emit_surface_state(struct blorp_batch *batch, surface->aux_addr, *aux_addr); } - blorp_flush_range(batch, state, GENX(RENDER_SURFACE_STATE_length) * 4); - if (surface->clear_color_addr.buffer) { -#if GEN_GEN > 10 - unreachable("Implement indirect clear support on gen11+"); -#elif GEN_GEN >= 7 && GEN_GEN <= 10 +#if GEN_GEN >= 10 + assert((surface->clear_color_addr.offset & 0x3f) == 0); + uint32_t *clear_addr = state + isl_dev->ss.clear_color_state_offset; + blorp_surface_reloc(batch, state_offset + + isl_dev->ss.clear_color_state_offset, + surface->clear_color_addr, *clear_addr); +#elif GEN_GEN >= 7 struct blorp_address dst_addr = blorp_get_surface_base_address(batch); dst_addr.offset += state_offset + isl_dev->ss.clear_value_offset; blorp_emit_memcpy(batch, dst_addr, surface->clear_color_addr, @@ -1348,6 +1354,8 @@ blorp_emit_surface_state(struct blorp_batch *batch, unreachable("Fast clears are only supported on gen7+"); #endif } + + blorp_flush_range(batch, state, GENX(RENDER_SURFACE_STATE_length) * 4); } static void -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v5 14/19] i965/surface_state: Emit the clear color address instead of value.
On Gen10, when emitting the surface state, use the value stored in the clear color entry buffer by using a clear color address in the surface state. v4: Use the clear color offset from the clear_color_bo, when available. Signed-off-by: Rafael AntognolliReviewed-by: Jason Ekstrand Reviewed-by: Jordan Justen --- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 22 ++ 1 file changed, 22 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index ea855916403..3fb101bf68b 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -187,6 +187,15 @@ brw_emit_surface_state(struct brw_context *brw, brw->isl_dev.ss.align, surf_offset); + bool use_clear_address = devinfo->gen >= 10 && aux_surf; + + struct brw_bo *clear_bo = NULL; + uint32_t clear_offset = 0; + if (use_clear_address) { + clear_bo = aux_buf->clear_color_bo; + clear_offset = aux_buf->clear_color_offset; + } + isl_surf_fill_state(>isl_dev, state, .surf = , .view = , .address = brw_state_reloc(>batch, *surf_offset + brw->isl_dev.ss.addr_offset, @@ -195,6 +204,8 @@ brw_emit_surface_state(struct brw_context *brw, .aux_address = aux_offset, .mocs = brw_get_bo_mocs(devinfo, mt->bo), .clear_color = clear_color, + .use_clear_address = use_clear_address, + .clear_address = clear_offset, .x_offset_sa = tile_x, .y_offset_sa = tile_y); if (aux_surf) { /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the @@ -224,6 +235,17 @@ brw_emit_surface_state(struct brw_context *brw, } } + + if (use_clear_address) { + /* Make sure the offset is aligned with a cacheline. */ + assert((clear_offset & 0x3f) == 0); + uint32_t *clear_address = +state + brw->isl_dev.ss.clear_color_state_offset; + *clear_address = brw_state_reloc(>batch, + *surf_offset + + brw->isl_dev.ss.clear_color_state_offset, + clear_bo, *clear_address, reloc_flags); + } } static uint32_t -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v5 15/19] anv: Add a helper to extract clear color from the attachment.
Extract the code from color_attachment_compute_aux_usage, so we can later reuse it to update the clear color state buffer. Signed-off-by: Rafael AntognolliReviewed-by: Jason Ekstrand Reviewed-by: Jordan Justen --- src/intel/vulkan/anv_private.h | 20 src/intel/vulkan/genX_cmd_buffer.c | 14 +- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 08e4362b028..fcb1f8b54ef 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -2894,6 +2894,26 @@ void anv_fill_buffer_surface_state(struct anv_device *device, uint32_t offset, uint32_t range, uint32_t stride); +static inline void +anv_clear_color_from_att_state(union isl_color_value *clear_color, + const struct anv_attachment_state *att_state, + const struct anv_image_view *iview) +{ + const struct isl_format_layout *view_fmtl = + isl_format_get_layout(iview->planes[0].isl.format); + +#define COPY_CLEAR_COLOR_CHANNEL(c, i) \ + if (view_fmtl->channels.c.bits) \ + clear_color->u32[i] = att_state->clear_value.color.uint32[i] + + COPY_CLEAR_COLOR_CHANNEL(r, 0); + COPY_CLEAR_COLOR_CHANNEL(g, 1); + COPY_CLEAR_COLOR_CHANNEL(b, 2); + COPY_CLEAR_COLOR_CHANNEL(a, 3); + +#undef COPY_CLEAR_COLOR_CHANNEL +} + struct anv_ycbcr_conversion { const struct anv_format *format; diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 9411854b7e5..3bf71b073bf 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -272,20 +272,8 @@ color_attachment_compute_aux_usage(struct anv_device * device, assert(iview->image->planes[0].aux_surface.isl.usage & (ISL_SURF_USAGE_CCS_BIT | ISL_SURF_USAGE_MCS_BIT)); - const struct isl_format_layout *view_fmtl = - isl_format_get_layout(iview->planes[0].isl.format); union isl_color_value clear_color = {}; - -#define COPY_CLEAR_COLOR_CHANNEL(c, i) \ - if (view_fmtl->channels.c.bits) \ - clear_color.u32[i] = att_state->clear_value.color.uint32[i] - - COPY_CLEAR_COLOR_CHANNEL(r, 0); - COPY_CLEAR_COLOR_CHANNEL(g, 1); - COPY_CLEAR_COLOR_CHANNEL(b, 2); - COPY_CLEAR_COLOR_CHANNEL(a, 3); - -#undef COPY_CLEAR_COLOR_CHANNEL + anv_clear_color_from_att_state(_color, att_state, iview); att_state->clear_color_is_zero_one = isl_color_value_is_zero_one(clear_color, iview->planes[0].isl.format); -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v5 18/19] anv: Make blorp update the clear color.
Instead of updating the clear color in anv before a resolve, just let blorp handle that for us during fast clears. v5: Update comment about HiZ clear color (Jordan). Signed-off-by: Rafael AntognolliReviewed-by: Jason Ekstrand Reviewed-by: Jordan Justen --- src/intel/vulkan/anv_blorp.c | 69 +++--- src/intel/vulkan/anv_private.h | 6 ++-- src/intel/vulkan/genX_cmd_buffer.c | 54 +++-- 3 files changed, 66 insertions(+), 63 deletions(-) diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index 8f29bc8398f..68e2ed65c29 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -222,6 +222,28 @@ get_blorp_surf_for_anv_image(const struct anv_device *device, .mocs = device->default_mocs, }; blorp_surf->aux_usage = aux_usage; + + /* If we're doing a partial resolve, then we need the indirect clear + * color. If we are doing a fast clear and want to store/update the + * clear color, we also pass the address to blorp, otherwise it will only + * stomp the CCS to a particular value and won't care about format or + * clear value + */ + if (aspect & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) { + const struct anv_address clear_color_addr = +anv_image_get_clear_color_addr(device, image, aspect); + blorp_surf->clear_color_addr = anv_to_blorp_address(clear_color_addr); + } else if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT + && device->info.gen >= 10) { + /* Vulkan always clears to 1.0. On gen < 10, we set that directly in + * the state packet. For gen >= 10, must provide the clear value in a + * buffer. We have a single global buffer that stores the 1.0 value. + */ + const struct anv_address clear_color_addr = (struct anv_address) { +.bo = (struct anv_bo *)>hiz_clear_bo + }; + blorp_surf->clear_color_addr = anv_to_blorp_address(clear_color_addr); + } } } @@ -1594,7 +1616,8 @@ anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer, const struct anv_image *image, VkImageAspectFlagBits aspect, uint32_t base_layer, uint32_t layer_count, - enum isl_aux_op mcs_op, bool predicate) + enum isl_aux_op mcs_op, union isl_color_value *clear_value, + bool predicate) { assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT); assert(image->samples > 1); @@ -1612,14 +1635,18 @@ anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer, ANV_IMAGE_LAYOUT_EXPLICIT_AUX, ISL_AUX_USAGE_MCS, ); - if (mcs_op == ISL_AUX_OP_PARTIAL_RESOLVE) { - /* If we're doing a partial resolve, then we need the indirect clear - * color. The clear operation just stomps the CCS to a particular value - * and don't care about format or clear value. - */ - const struct anv_address clear_color_addr = - anv_image_get_clear_color_addr(cmd_buffer->device, image, aspect); - surf.clear_color_addr = anv_to_blorp_address(clear_color_addr); + /* Blorp will store the clear color for us if we provide the clear color +* address and we are doing a fast clear. So we save the clear value into +* the blorp surface. However, in some situations we want to do a fast clear +* without changing the clear value stored in the state buffer. For those +* cases, we set the clear color address pointer to NULL, so blorp will not +* try to store a garbage color. +*/ + if (mcs_op == ISL_AUX_OP_FAST_CLEAR) { + if (clear_value) + surf.clear_color = *clear_value; + else + surf.clear_color_addr.buffer = NULL; } /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear": @@ -1667,7 +1694,8 @@ anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer, const struct anv_image *image, VkImageAspectFlagBits aspect, uint32_t level, uint32_t base_layer, uint32_t layer_count, - enum isl_aux_op ccs_op, bool predicate) + enum isl_aux_op ccs_op, union isl_color_value *clear_value, + bool predicate) { assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV); assert(image->samples == 1); @@ -1693,15 +1721,18 @@ anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer, fast_clear_aux_usage(image, aspect), ); - if (ccs_op == ISL_AUX_OP_FULL_RESOLVE || - ccs_op == ISL_AUX_OP_PARTIAL_RESOLVE) { - /* If we're doing a resolve operation, then we need the indirect clear - * color. The clear and ambiguate operations just stomp the CCS to a - * particular value and don't care about format or
[Mesa-dev] [PATCH v5 10/19] i965/miptree: Add space to store the clear value in the aux surface.
Similarly to vulkan where we store the clear value in the aux surface, we can do the same in GL. v2: Remove unneeded extra function. v3: Use clear_value_state_size instead of clear_value_size. v4: - rename to clear_color_state_size - store clear_color_bo and clear_color_offset in the aux buf struct v5: Unreference clear color bo (Jordan) Signed-off-by: Rafael AntognolliReviewed-by: Jason Ekstrand Reviewed-by: Jordan Justen --- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 17 + src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 16 2 files changed, 33 insertions(+) diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 23cb40f3226..d11ae65243f 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -1211,6 +1211,7 @@ intel_miptree_aux_buffer_free(struct intel_miptree_aux_buffer *aux_buf) return; brw_bo_unreference(aux_buf->bo); + brw_bo_unreference(aux_buf->clear_color_bo); free(aux_buf); } @@ -1678,6 +1679,17 @@ intel_alloc_aux_buffer(struct brw_context *brw, return false; buf->size = aux_surf->size; + + const struct gen_device_info *devinfo = >screen->devinfo; + if (devinfo->gen >= 10) { + /* On CNL, instead of setting the clear color in the SURFACE_STATE, we + * will set a pointer to a dword somewhere that contains the color. So, + * allocate the space for the clear color value here on the aux buffer. + */ + buf->clear_color_offset = buf->size; + buf->size += brw->isl_dev.ss.clear_color_state_size; + } + buf->pitch = aux_surf->row_pitch; buf->qpitch = isl_surf_get_array_pitch_sa_rows(aux_surf); @@ -1692,6 +1704,11 @@ intel_alloc_aux_buffer(struct brw_context *brw, return NULL; } + if (devinfo->gen >= 10) { + buf->clear_color_bo = buf->bo; + brw_bo_reference(buf->clear_color_bo); + } + buf->surf = *aux_surf; return buf; diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h index 07c85807e80..54d36400757 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h @@ -180,6 +180,22 @@ struct intel_miptree_aux_buffer * @see 3DSTATE_HIER_DEPTH_BUFFER.SurfaceQPitch */ uint32_t qpitch; + + /** +* Buffer object containing the indirect clear color. +* +* @see create_ccs_buf_for_image +* @see RENDER_SURFACE_STATE.ClearValueAddress +*/ + struct brw_bo *clear_color_bo; + + /** +* Offset into bo where the clear color can be found. +* +* @see create_ccs_buf_for_image +* @see RENDER_SURFACE_STATE.ClearValueAddress +*/ + uint32_t clear_color_offset; }; struct intel_mipmap_tree -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v5 11/19] i965/miptree: Add new clear color BO for winsys aux buffers
Add an extra BO to store clear color when we receive the aux buffer from the window system. Since we have no control over the aux buffer size in this case, we need the new BO to store only the clear color. v5: - Better subject (Jordan). - Drop alignment from brw_bo_alloc(). Signed-off-by: Rafael AntognolliReviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 17 + 1 file changed, 17 insertions(+) diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index d11ae65243f..89074a64930 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -969,6 +969,23 @@ create_ccs_buf_for_image(struct brw_context *brw, return false; } + /* On gen10+ we start using an extra space in the aux buffer to store the +* indirect clear color. However, if we imported an image from the window +* system with CCS, we don't have the extra space at the end of the aux +* buffer. So create a new bo here that will store that clear color. +*/ + const struct gen_device_info *devinfo = >screen->devinfo; + if (devinfo->gen >= 10) { + mt->mcs_buf->clear_color_bo = + brw_bo_alloc(brw->bufmgr, "clear_color_bo", + brw->isl_dev.ss.clear_color_state_size); + if (!mt->mcs_buf->clear_color_bo) { + free(mt->mcs_buf); + mt->mcs_buf = NULL; + return false; + } + } + mt->mcs_buf->bo = image->bo; brw_bo_reference(image->bo); -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v5 13/19] i965/blorp: Update the fast clear value buffer.
On Gen10, whenever we do a fast clear, blorp will update the clear color state buffer for us, as long as we set the clear color address correctly. However, on a hiz clear, if the surface is already on the fast clear state we skip the actual fast clear operation and, before gen10, only updated the miptree. On gen10+ we need to update the clear value state buffer too, since blorp will not be doing a fast clear and updating it for us. v4: - do not use clear_value_size in the for loop - Get the address of the clear color from the aux buffer or the clear_color_bo, depending on which one is available. - let core blorp update the clear color, but also update it when we skip a fast clear depth. v5: Better subject (Jordan). Signed-off-by: Rafael Antognolli--- src/mesa/drivers/dri/i965/brw_blorp.c | 11 +++ src/mesa/drivers/dri/i965/brw_clear.c | 22 ++ 2 files changed, 33 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c b/src/mesa/drivers/dri/i965/brw_blorp.c index a0977598309..e2287cbad3b 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.c +++ b/src/mesa/drivers/dri/i965/brw_blorp.c @@ -185,6 +185,17 @@ blorp_surf_for_miptree(struct brw_context *brw, surf->aux_addr.buffer = aux_buf->bo; surf->aux_addr.offset = aux_buf->offset; + + if (devinfo->gen >= 10) { + /* If we have a CCS surface and clear_color_bo set, use that bo as + * storage for the indirect clear color. Otherwise, use the extra + * space at the end of the aux_buffer. + */ + surf->clear_color_addr = (struct blorp_address) { +.buffer = aux_buf->clear_color_bo, +.offset = aux_buf->clear_color_offset, + }; + } } else { surf->aux_addr = (struct blorp_address) { .buffer = NULL, diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c index 8aa83722ee9..63c0b241898 100644 --- a/src/mesa/drivers/dri/i965/brw_clear.c +++ b/src/mesa/drivers/dri/i965/brw_clear.c @@ -108,6 +108,7 @@ brw_fast_clear_depth(struct gl_context *ctx) struct intel_mipmap_tree *mt = depth_irb->mt; struct gl_renderbuffer_attachment *depth_att = >Attachment[BUFFER_DEPTH]; const struct gen_device_info *devinfo = >screen->devinfo; + bool same_clear_value = true; if (devinfo->gen < 6) return false; @@ -213,6 +214,7 @@ brw_fast_clear_depth(struct gl_context *ctx) } intel_miptree_set_depth_clear_value(ctx, mt, clear_value); + same_clear_value = false; } bool need_clear = false; @@ -232,6 +234,26 @@ brw_fast_clear_depth(struct gl_context *ctx) * state then simply updating the miptree fast clear value is sufficient * to change their clear value. */ + if (devinfo->gen >= 10 && !same_clear_value) { + /* Before gen10, it was enough to just update the clear value in the + * miptree. But on gen10+, we let blorp update the clear value state + * buffer when doing a fast clear. Since we are skipping the fast + * clear here, we need to update the clear color ourselves. + */ + uint32_t clear_offset = mt->hiz_buf->clear_color_offset; + union isl_color_value clear_color = { .f32 = { clear_value, } }; + + /* We can't update the clear color while the hardware is still using + * the previous one for a resolve or sampling from it. So make sure + * that there's no pending commands at this point. + */ + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL); + for (int i = 0; i < 4; i++) { +brw_store_data_imm32(brw, mt->hiz_buf->clear_color_bo, + clear_offset + i * 4, clear_color.u32[i]); + } + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE); + } return true; } -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v5 04/19] intel/genxml: Add Clear Color struct to gen10+.
v5: Split genxml changes into its own commit (Jason). Signed-off-by: Rafael Antognolli--- src/intel/genxml/gen10.xml | 8 src/intel/genxml/gen11.xml | 10 ++ 2 files changed, 18 insertions(+) diff --git a/src/intel/genxml/gen10.xml b/src/intel/genxml/gen10.xml index 4ad6f36ce1e..f6c5f86aed0 100644 --- a/src/intel/genxml/gen10.xml +++ b/src/intel/genxml/gen10.xml @@ -584,6 +584,14 @@ + + + + + + + + diff --git a/src/intel/genxml/gen11.xml b/src/intel/genxml/gen11.xml index 51f73ba80fa..2232132e0ff 100644 --- a/src/intel/genxml/gen11.xml +++ b/src/intel/genxml/gen11.xml @@ -586,6 +586,16 @@ + + + + + + + + + + -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v5 16/19] anv: Emit the fast clear color address, instead of value.
On Gen10+, instead of copying the clear color from the state buffer to the surface state, just use the address of the state buffer in the surface state directly. This way we can avoid the copy from state buffer to surface state. v4: - Remove use_clear_address from anv code. (Jason) - Use the helper to extract clear color from attachment (Jason) Signed-off-by: Rafael AntognolliReviewed-by: Jason Ekstrand Reviewed-by: Jordan Justen --- src/intel/vulkan/anv_image.c | 17 + src/intel/vulkan/anv_private.h | 5 src/intel/vulkan/genX_cmd_buffer.c | 52 +++--- 3 files changed, 70 insertions(+), 4 deletions(-) diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index d9b5d266020..da4601ce20e 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -1059,6 +1059,13 @@ anv_image_fill_surface_state(struct anv_device *device, const uint64_t aux_address = aux_usage == ISL_AUX_USAGE_NONE ? 0 : (image->planes[plane].bo_offset + aux_surface->offset); + struct anv_address clear_address = { .bo = NULL }; + state_inout->clear_address = 0; + if (device->info.gen >= 10 && aux_usage != ISL_AUX_USAGE_NONE && + aux_usage != ISL_AUX_USAGE_HIZ) { + clear_address = anv_image_get_clear_color_addr(device, image, aspect); + } + if (view_usage == ISL_SURF_USAGE_STORAGE_BIT && !(flags & ANV_IMAGE_VIEW_STATE_STORAGE_WRITE_ONLY) && !isl_has_matching_typed_storage_image_format(>info, @@ -1076,6 +1083,7 @@ anv_image_fill_surface_state(struct anv_device *device, .mocs = device->default_mocs); state_inout->address = address, state_inout->aux_address = 0; + state_inout->clear_address = 0; } else { if (view_usage == ISL_SURF_USAGE_STORAGE_BIT && !(flags & ANV_IMAGE_VIEW_STATE_STORAGE_WRITE_ONLY)) { @@ -1149,6 +1157,8 @@ anv_image_fill_surface_state(struct anv_device *device, .aux_surf = _surface->isl, .aux_usage = aux_usage, .aux_address = aux_address, + .clear_address = clear_address.offset, + .use_clear_address = clear_address.bo != NULL, .mocs = device->default_mocs, .x_offset_sa = tile_x_sa, .y_offset_sa = tile_y_sa); @@ -1163,6 +1173,13 @@ anv_image_fill_surface_state(struct anv_device *device, assert((aux_address & 0xfff) == 0); assert(aux_address == (*aux_addr_dw & 0xf000)); state_inout->aux_address = *aux_addr_dw; + + if (device->info.gen >= 10 && clear_address.bo) { + uint32_t *clear_addr_dw = state_inout->state.map + + device->isl_dev.ss.clear_color_state_offset; + assert((clear_address.offset & 0x3f) == 0); + state_inout->clear_address = *clear_addr_dw; + } } anv_state_flush(device, state_inout->state); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index fcb1f8b54ef..1e352576e22 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1704,6 +1704,11 @@ struct anv_surface_state { * gen8, the bottom 12 bits of this address include extra aux information. */ uint64_t aux_address; + /* Address of the clear color, if any +* +* This address is relative to the start of the BO. +*/ + uint64_t clear_address; }; /** diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 3bf71b073bf..483c0651a75 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -200,6 +200,17 @@ add_image_view_relocs(struct anv_cmd_buffer *cmd_buffer, if (result != VK_SUCCESS) anv_batch_set_error(_buffer->batch, result); } + + if (state.clear_address) { + VkResult result = + anv_reloc_list_add(_buffer->surface_relocs, +_buffer->pool->alloc, +state.state.offset + +isl_dev->ss.clear_color_state_offset, +image->planes[image_plane].bo, state.clear_address); + if (result != VK_SUCCESS) + anv_batch_set_error(_buffer->batch, result); + } } static void @@ -1124,6 +1135,34 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer, ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT; } +static void +update_fast_clear_color(struct anv_cmd_buffer *cmd_buffer, +const struct anv_attachment_state *att_state, +const struct anv_image_view *iview) +{ + assert(GEN_GEN >= 10); + assert(iview->image->aspects == VK_IMAGE_ASPECT_COLOR_BIT); + + struct anv_address
[Mesa-dev] [PATCH v5 06/19] intel/isl: Add support to emit clear value address.
gen10 can emit the clear color by setting it on a buffer somewhere, and then adding only the address to the surface state. This commit add support for that on isl_surf_fill_state, and if that is requested, skip setting the clear value itself. v2: Add assert to make sure we are at least on gen10. Signed-off-by: Rafael AntognolliReviewed-by: Jordan Justen --- src/intel/isl/isl.h | 9 + src/intel/isl/isl_surface_state.c | 18 ++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h index 2edf0522e32..c50b78d4701 100644 --- a/src/intel/isl/isl.h +++ b/src/intel/isl/isl.h @@ -1307,6 +1307,15 @@ struct isl_surf_fill_state_info { */ union isl_color_value clear_color; + /** +* Send only the clear value address +* +* If set, we only pass the clear address to the GPU and it will fetch it +* from wherever it is. +*/ + bool use_clear_address; + uint64_t clear_address; + /** * Surface write disables for gen4-5 */ diff --git a/src/intel/isl/isl_surface_state.c b/src/intel/isl/isl_surface_state.c index 32a5429f2bf..bff9693f02d 100644 --- a/src/intel/isl/isl_surface_state.c +++ b/src/intel/isl/isl_surface_state.c @@ -637,11 +637,21 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state, #endif if (info->aux_usage != ISL_AUX_USAGE_NONE) { + if (info->use_clear_address) { +#if GEN_GEN >= 10 + s.ClearValueAddressEnable = true; + s.ClearValueAddress = info->clear_address; +#else + unreachable("Gen9 and earlier do not support indirect clear colors"); +#endif + } #if GEN_GEN >= 9 - s.RedClearColor = info->clear_color.u32[0]; - s.GreenClearColor = info->clear_color.u32[1]; - s.BlueClearColor = info->clear_color.u32[2]; - s.AlphaClearColor = info->clear_color.u32[3]; + if (!info->use_clear_address) { + s.RedClearColor = info->clear_color.u32[0]; + s.GreenClearColor = info->clear_color.u32[1]; + s.BlueClearColor = info->clear_color.u32[2]; + s.AlphaClearColor = info->clear_color.u32[3]; + } #elif GEN_GEN >= 7 /* Prior to Sky Lake, we only have one bit for the clear color which * gives us 0 or 1 in whatever the surface's format happens to be. -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v5 08/19] intel/blorp: Only copy clear color when doing a resolve.
We only need to copy the clear color from the state buffer to the inlined surface state when doing a resolve. Signed-off-by: Rafael AntognolliReviewed-by: Jason Ekstrand Reviewed-by: Jordan Justen --- src/intel/blorp/blorp_genX_exec.h | 13 + 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/intel/blorp/blorp_genX_exec.h b/src/intel/blorp/blorp_genX_exec.h index 65c06c0ed5e..eb64eaff0c8 100644 --- a/src/intel/blorp/blorp_genX_exec.h +++ b/src/intel/blorp/blorp_genX_exec.h @@ -1283,6 +1283,7 @@ blorp_emit_memcpy(struct blorp_batch *batch, static void blorp_emit_surface_state(struct blorp_batch *batch, const struct brw_blorp_surface_info *surface, + enum isl_aux_op op, void *state, uint32_t state_offset, const bool color_write_disables[4], bool is_render_target) @@ -1346,10 +1347,12 @@ blorp_emit_surface_state(struct blorp_batch *batch, isl_dev->ss.clear_color_state_offset, surface->clear_color_addr, *clear_addr); #elif GEN_GEN >= 7 - struct blorp_address dst_addr = blorp_get_surface_base_address(batch); - dst_addr.offset += state_offset + isl_dev->ss.clear_value_offset; - blorp_emit_memcpy(batch, dst_addr, surface->clear_color_addr, -isl_dev->ss.clear_value_size); + if (op == ISL_AUX_OP_FULL_RESOLVE || op == ISL_AUX_OP_PARTIAL_RESOLVE) { + struct blorp_address dst_addr = blorp_get_surface_base_address(batch); + dst_addr.offset += state_offset + isl_dev->ss.clear_value_offset; + blorp_emit_memcpy(batch, dst_addr, surface->clear_color_addr, + isl_dev->ss.clear_value_size); + } #else unreachable("Fast clears are only supported on gen7+"); #endif @@ -1411,6 +1414,7 @@ blorp_emit_surface_states(struct blorp_batch *batch, if (params->dst.enabled) { blorp_emit_surface_state(batch, >dst, + params->fast_clear_op, surface_maps[BLORP_RENDERBUFFER_BT_INDEX], surface_offsets[BLORP_RENDERBUFFER_BT_INDEX], params->color_write_disable, true); @@ -1426,6 +1430,7 @@ blorp_emit_surface_states(struct blorp_batch *batch, if (params->src.enabled) { blorp_emit_surface_state(batch, >src, + params->fast_clear_op, surface_maps[BLORP_TEXTURE_BT_INDEX], surface_offsets[BLORP_TEXTURE_BT_INDEX], NULL, false); -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v5 05/19] intel: Use Clear Color struct size.
The size of the clear color struct (expected by the hardware) is 8 dwords (isl_dev.ss.clear_value_state_size here). But we still need to track the size of the clear color, used when memcopying it to/from the state buffer. For that we keep isl_dev.ss.clear_value_size. v4: - Add struct to gen11 too (Jason, Jordan) - Add field for Converted Clear Color to gen11 (Jason) - Add clear_color_state_offset to differentiate from clear_value_offset. - Fix all the places where clear_value_size was used. v5 (Jason): - Split genxml changes to another commit. - Remove unnecessary gen checks. - Bring back missing offset increment to init_fast_clear_color(). [jordan.l.jus...@intel.com: isl_device_init changes] Signed-off-by: Rafael AntognolliSigned-off-by: Jordan Justen --- src/intel/blorp/blorp_genX_exec.h | 5 +++-- src/intel/isl/isl.c| 4 src/intel/isl/isl.h| 6 ++ src/intel/vulkan/anv_image.c | 6 +- src/intel/vulkan/anv_private.h | 6 +- src/intel/vulkan/genX_cmd_buffer.c | 23 --- 6 files changed, 35 insertions(+), 15 deletions(-) diff --git a/src/intel/blorp/blorp_genX_exec.h b/src/intel/blorp/blorp_genX_exec.h index 992bc9959a1..f3a96fbd58c 100644 --- a/src/intel/blorp/blorp_genX_exec.h +++ b/src/intel/blorp/blorp_genX_exec.h @@ -310,10 +310,11 @@ blorp_emit_vertex_buffers(struct blorp_batch *batch, uint32_t num_vbs = 2; if (params->dst_clear_color_as_input) { + const unsigned clear_color_size = + GEN_GEN < 10 ? batch->blorp->isl_dev->ss.clear_value_size : 4 * 4; blorp_fill_vertex_buffer_state(batch, vb, num_vbs++, params->dst.clear_color_addr, - batch->blorp->isl_dev->ss.clear_value_size, - 0); + clear_color_size, 0); } const unsigned num_dwords = 1 + num_vbs * GENX(VERTEX_BUFFER_STATE_length); diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c index 1a32c028183..875c691b43e 100644 --- a/src/intel/isl/isl.c +++ b/src/intel/isl/isl.c @@ -73,6 +73,10 @@ isl_device_init(struct isl_device *dev, dev->ss.size = RENDER_SURFACE_STATE_length(info) * 4; dev->ss.align = isl_align(dev->ss.size, 32); + dev->ss.clear_color_state_size = CLEAR_COLOR_length(info) * 4; + dev->ss.clear_color_state_offset = + RENDER_SURFACE_STATE_ClearValueAddress_start(info) / 32 * 4; + dev->ss.clear_value_size = isl_align(RENDER_SURFACE_STATE_RedClearColor_bits(info) + RENDER_SURFACE_STATE_GreenClearColor_bits(info) + diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h index 0da6abb71d4..2edf0522e32 100644 --- a/src/intel/isl/isl.h +++ b/src/intel/isl/isl.h @@ -963,6 +963,12 @@ struct isl_device { uint8_t aux_addr_offset; /* Rounded up to the nearest dword to simplify GPU memcpy operations. */ + + /* size of the state buffer used to store the clear color + extra + * additional space used by the hardware */ + uint8_t clear_color_state_size; + uint8_t clear_color_state_offset; + /* size of the clear color itself - used to copy it to/from a BO */ uint8_t clear_value_size; uint8_t clear_value_offset; } ss; diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index b20d791751d..d9b5d266020 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -267,8 +267,12 @@ add_aux_state_tracking_buffer(struct anv_image *image, (image->planes[plane].offset + image->planes[plane].size)); } + const unsigned clear_color_state_size = device->info.gen >= 10 ? + device->isl_dev.ss.clear_color_state_size : + device->isl_dev.ss.clear_value_size; + /* Clear color and fast clear type */ - unsigned state_size = device->isl_dev.ss.clear_value_size + 4; + unsigned state_size = clear_color_state_size + 4; /* We only need to track compression on CCS_E surfaces. */ if (image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E) { diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 3c803178c41..08e4362b028 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -2606,7 +2606,11 @@ anv_image_get_fast_clear_type_addr(const struct anv_device *device, { struct anv_address addr = anv_image_get_clear_color_addr(device, image, aspect); - addr.offset += device->isl_dev.ss.clear_value_size; + + const unsigned clear_color_state_size = device->info.gen >= 10 ? + device->isl_dev.ss.clear_color_state_size : + device->isl_dev.ss.clear_value_size; + addr.offset += clear_color_state_size; return addr; } diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index b5741fb8dc1..9411854b7e5 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c
[Mesa-dev] [PATCH v5 09/19] intel/blorp: Update clear color state buffer during fast clears.
We always want to update the fast clear color during a fast clear on i965. On anv, we are doing that before a resolve, but by adding support to blorp, we can do a similar thing and update it during a fast clear instead. The goal is to remove some code from anv that does such update, and centralize everything in blorp, hopefully removing a lot of code duplication. It also allows us to have a similar behavior on gen < 9 and gen >= 10. v5: s/we/we are/ (Jordan) Signed-off-by: Rafael AntognolliReviewed-by: Jason Ekstrand Reviewed-by: Jordan Justen --- src/intel/blorp/blorp_genX_exec.h | 48 +++ 1 file changed, 48 insertions(+) diff --git a/src/intel/blorp/blorp_genX_exec.h b/src/intel/blorp/blorp_genX_exec.h index eb64eaff0c8..7851228d8dc 100644 --- a/src/intel/blorp/blorp_genX_exec.h +++ b/src/intel/blorp/blorp_genX_exec.h @@ -1643,6 +1643,51 @@ blorp_emit_gen8_hiz_op(struct blorp_batch *batch, } #endif +static void +blorp_update_clear_color(struct blorp_batch *batch, + const struct brw_blorp_surface_info *info, + enum isl_aux_op op) +{ + if (info->clear_color_addr.buffer && op == ISL_AUX_OP_FAST_CLEAR) { +#if GEN_GEN >= 9 + for (int i = 0; i < 4; i++) { + blorp_emit(batch, GENX(MI_STORE_DATA_IMM), sdi) { +sdi.Address = info->clear_color_addr; +sdi.Address.offset += i * 4; +sdi.ImmediateData = info->clear_color.u32[i]; + } + } +#elif GEN_GEN >= 7 + blorp_emit(batch, GENX(MI_STORE_DATA_IMM), sdi) { + sdi.Address = info->clear_color_addr; + sdi.ImmediateData = ISL_CHANNEL_SELECT_RED << 25 | + ISL_CHANNEL_SELECT_GREEN << 22 | + ISL_CHANNEL_SELECT_BLUE << 19 | + ISL_CHANNEL_SELECT_ALPHA << 16; + if (isl_format_has_int_channel(info->view.format)) { +for (unsigned i = 0; i < 4; i++) { + assert(info->clear_color.u32[i] == 0 || + info->clear_color.u32[i] == 1); +} +sdi.ImmediateData |= (info->clear_color.u32[0] != 0) << 31; +sdi.ImmediateData |= (info->clear_color.u32[1] != 0) << 30; +sdi.ImmediateData |= (info->clear_color.u32[2] != 0) << 29; +sdi.ImmediateData |= (info->clear_color.u32[3] != 0) << 28; + } else { +for (unsigned i = 0; i < 4; i++) { + assert(info->clear_color.f32[i] == 0.0f || + info->clear_color.f32[i] == 1.0f); +} +sdi.ImmediateData |= (info->clear_color.f32[0] != 0.0f) << 31; +sdi.ImmediateData |= (info->clear_color.f32[1] != 0.0f) << 30; +sdi.ImmediateData |= (info->clear_color.f32[2] != 0.0f) << 29; +sdi.ImmediateData |= (info->clear_color.f32[3] != 0.0f) << 28; + } + } +#endif + } +} + /** * \brief Execute a blit or render pass operation. * @@ -1655,6 +1700,9 @@ blorp_emit_gen8_hiz_op(struct blorp_batch *batch, static void blorp_exec(struct blorp_batch *batch, const struct blorp_params *params) { + blorp_update_clear_color(batch, >dst, params->fast_clear_op); + blorp_update_clear_color(batch, >depth, params->hiz_op); + #if GEN_GEN >= 8 if (params->hiz_op != ISL_AUX_OP_NONE) { blorp_emit_gen8_hiz_op(batch, params); -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v5 02/19] genxml: Preserve fields that share dword space with addresses.
Some instructions contain fields that are either an address or a value of some type based on the content of other fields, such as clear color values vs address. That works fine if these fields are in the less significant dword, the lower 32 bits of the address, because they get OR'ed with the address. But if they are in the higher 32 bits, they get discarded. On Gen10 we have fields that share space with the higher 16 bits of the address too. This commit makes sure those fields don't get discarded. v5: Remove spurious whitespace (Jason). Signed-off-by: Rafael AntognolliReviewed-by: Jason Ekstrand --- src/intel/genxml/gen_pack_header.py | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/intel/genxml/gen_pack_header.py b/src/intel/genxml/gen_pack_header.py index 7dcada86fae..8989f625d31 100644 --- a/src/intel/genxml/gen_pack_header.py +++ b/src/intel/genxml/gen_pack_header.py @@ -494,8 +494,12 @@ class Group(object): v_address = "v%d_address" % index print(" const uint64_t %s =\n __gen_combine_address(data, [%d], values->%s, %s);" % (v_address, index, dw.address.name + field.dim, v)) -v = v_address - +if len(dw.fields) > address_count: +print(" dw[%d] = %s;" % (index, v_address)) +print(" dw[%d] = (%s >> 32) | (%s >> 32);" % (index + 1, v_address, v)) +continue +else: +v = v_address print(" dw[%d] = %s;" % (index, v)) print(" dw[%d] = %s >> 32;" % (index + 1, v)) -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v5 03/19] intel/genxml: Use a single field for clear color address on gen10.
genxml does not support having two address fields with different names but same position in the state struct. Both "Clear Color Address" and "Clear Depth Address Low" mean the same thing, only for different surface types. To workaround this genxml limitation, rename "Clear Color Address" to "Clear Value Address" and use it for both color and depth. Do the same for the high bits. TODO: add support for multiple addresses at the same position in the xml. v2: Combine high and low order bits into a single address field. Signed-off-by: Rafael AntognolliReviewed-by: Jason Ekstrand Reviewed-by: Jordan Justen --- src/intel/genxml/gen10.xml | 7 +++ src/intel/genxml/gen11.xml | 7 +++ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/intel/genxml/gen10.xml b/src/intel/genxml/gen10.xml index aeb99667592..4ad6f36ce1e 100644 --- a/src/intel/genxml/gen10.xml +++ b/src/intel/genxml/gen10.xml @@ -575,12 +575,11 @@ + + - - - - diff --git a/src/intel/genxml/gen11.xml b/src/intel/genxml/gen11.xml index 6ca0e785ba0..51f73ba80fa 100644 --- a/src/intel/genxml/gen11.xml +++ b/src/intel/genxml/gen11.xml @@ -576,13 +576,12 @@ + + - - - - -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v5 00/19] Use clear color address in surface state.
Another revision, hopefully with all the last suggestions included. This revision of this series can be found here: https://cgit.freedesktop.org/~rantogno/mesa/log/?h=cnl/fast_clear_address_v5 Cc: Jason EkstrandCc: Jordan Justen Cc: "Pohjolainen, Topi" Rafael Antognolli (19): anv/image: Do not override lower bits of dword. genxml: Preserve fields that share dword space with addresses. intel/genxml: Use a single field for clear color address on gen10. intel/genxml: Add Clear Color struct to gen10+. intel: Use Clear Color struct size. intel/isl: Add support to emit clear value address. intel/blorp: Add support for fast clear address. intel/blorp: Only copy clear color when doing a resolve. intel/blorp: Update clear color state buffer during fast clears. i965/miptree: Add space to store the clear value in the aux surface. i965/miptree: Add new clear color BO for winsys aux buffers i965: Add aux_buf variable to simplify code. i965/blorp: Update the fast clear value buffer. i965/surface_state: Emit the clear color address instead of value. anv: Add a helper to extract clear color from the attachment. anv: Emit the fast clear color address, instead of value. anv: Use clear address for HiZ fast clears too. anv: Make blorp update the clear color. intel: Remove use_clear_address flag from isl_surf_fill_state_info. src/intel/blorp/blorp_genX_exec.h| 80 src/intel/genxml/gen10.xml | 15 +++-- src/intel/genxml/gen11.xml | 17 +++-- src/intel/genxml/gen_pack_header.py | 8 ++- src/intel/isl/isl.c | 4 ++ src/intel/isl/isl.h | 14 + src/intel/isl/isl_surface_state.c| 5 +- src/intel/vulkan/anv_blorp.c | 69 ++-- src/intel/vulkan/anv_device.c| 19 ++ src/intel/vulkan/anv_image.c | 47 ++ src/intel/vulkan/anv_private.h | 42 +++-- src/intel/vulkan/genX_cmd_buffer.c | 73 ++--- src/mesa/drivers/dri/i965/brw_blorp.c| 26 +--- src/mesa/drivers/dri/i965/brw_clear.c| 22 +++ src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 34 +++--- src/mesa/drivers/dri/i965/intel_mipmap_tree.c| 34 ++ src/mesa/drivers/dri/i965/intel_mipmap_tree.h| 16 + 17 files changed, 414 insertions(+), 111 deletions(-) -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v5 01/19] anv/image: Do not override lower bits of dword.
The lower bits seem to have extra fields in every platform but gen8 (even though we don't use them in gen9). So just go ahead and avoid using them for the address. v4: Use Jason's suggestion for comment explaining the change. v5: Fix aux_address comment in anv_private.h (Jason) Signed-off-by: Rafael AntognolliReviewed-by: Jason Ekstrand Reviewed-by: Topi Pohjolainen --- src/intel/vulkan/anv_image.c | 23 ++- src/intel/vulkan/anv_private.h | 4 ++-- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 4d60f872c1e..b20d791751d 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -1149,19 +1149,16 @@ anv_image_fill_surface_state(struct anv_device *device, .x_offset_sa = tile_x_sa, .y_offset_sa = tile_y_sa); state_inout->address = address + offset_B; - if (device->info.gen >= 8) { - state_inout->aux_address = aux_address; - } else { - /* On gen7 and prior, the bottom 12 bits of the MCS base address are - * used to store other information. This should be ok, however, - * because surface buffer addresses are always 4K page alinged. - */ - uint32_t *aux_addr_dw = state_inout->state.map + - device->isl_dev.ss.aux_addr_offset; - assert((aux_address & 0xfff) == 0); - assert(aux_address == (*aux_addr_dw & 0xf000)); - state_inout->aux_address = *aux_addr_dw; - } + + /* With the exception of gen8, the bottom 12 bits of the MCS base address + * are used to store other information. This should be ok, however, + * because the surface buffer addresses are always 4K page aligned. + */ + uint32_t *aux_addr_dw = state_inout->state.map + + device->isl_dev.ss.aux_addr_offset; + assert((aux_address & 0xfff) == 0); + assert(aux_address == (*aux_addr_dw & 0xf000)); + state_inout->aux_address = *aux_addr_dw; } anv_state_flush(device, state_inout->state); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index ee533581ab4..3c803178c41 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1700,8 +1700,8 @@ struct anv_surface_state { * * This field is 0 if and only if no aux surface exists. * -* This address is relative to the start of the BO. On gen7, the bottom 12 -* bits of this address include extra aux information. +* This address is relative to the start of the BO. With the exception of +* gen8, the bottom 12 bits of this address include extra aux information. */ uint64_t aux_address; }; -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] vbo: Use alloca for _vbo_draw_indirect.
On 03/28/2018 04:35 AM, mathias.froehl...@gmx.net wrote: From: Mathias FröhlichMarek, you mean with the below patch as the 9-th change in the series? I would like to keep that change seprarate from #3 since patch #3 just moves the already existing impelentation to the driver_functions level using the exactly identical implementation except calling into struct driver_functions instead of the vbo module draw function. Also I do not want to call just blindly into alloca with possibly large counts. So, the implementation uses an upper bound when to use malloc instead of alloca. Ok, with that? best Mathias Avoid using malloc in the draw path of mesa. Since the draw_count is a user api input, fall back to malloc if the amount of consumed stack space may get too high. Signed-off-by: Mathias Fröhlich --- src/mesa/vbo/vbo_context.c | 70 +++--- 1 file changed, 47 insertions(+), 23 deletions(-) diff --git a/src/mesa/vbo/vbo_context.c b/src/mesa/vbo/vbo_context.c index b8c28ceffb..06b8f820ee 100644 --- a/src/mesa/vbo/vbo_context.c +++ b/src/mesa/vbo/vbo_context.c @@ -233,25 +233,17 @@ _vbo_DestroyContext(struct gl_context *ctx) } -void -_vbo_draw_indirect(struct gl_context *ctx, GLuint mode, -struct gl_buffer_object *indirect_data, -GLsizeiptr indirect_offset, unsigned draw_count, -unsigned stride, -struct gl_buffer_object *indirect_draw_count_buffer, -GLsizeiptr indirect_draw_count_offset, -const struct _mesa_index_buffer *ib) +static void +draw_indirect(struct gl_context *ctx, GLuint mode, + struct gl_buffer_object *indirect_data, + GLsizeiptr indirect_offset, unsigned draw_count, + unsigned stride, + struct gl_buffer_object *indirect_draw_count_buffer, + GLsizeiptr indirect_draw_count_offset, + const struct _mesa_index_buffer *ib, + struct _mesa_prim *space) Can you just rename 'space' to 'prim' and rm the prim = space assignment below? Also, could you put a comment on this function to explain the draw_count and space/prim parameters, at least? Other than that, the series looks good. Reviewed-by: Brian Paul Sorry for the slow review, busy with other things. -Brian { - struct _mesa_prim *prim; - - prim = calloc(draw_count, sizeof(*prim)); - if (prim == NULL) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "gl%sDraw%sIndirect%s", - (draw_count > 1) ? "Multi" : "", - ib ? "Elements" : "Arrays", - indirect_data ? "CountARB" : ""); - return; - } + struct _mesa_prim *prim = space; prim[0].begin = 1; prim[draw_count - 1].end = 1; @@ -266,10 +258,42 @@ _vbo_draw_indirect(struct gl_context *ctx, GLuint mode, /* This should always be true at this time */ assert(indirect_data == ctx->DrawIndirectBuffer); - ctx->Driver.Draw(ctx, prim, draw_count, - ib, false, 0, ~0, - NULL, 0, - indirect_data); + ctx->Driver.Draw(ctx, prim, draw_count, ib, false, 0u, ~0u, +NULL, 0, indirect_data); +} + - free(prim); +void +_vbo_draw_indirect(struct gl_context *ctx, GLuint mode, + struct gl_buffer_object *indirect_data, + GLsizeiptr indirect_offset, unsigned draw_count, + unsigned stride, + struct gl_buffer_object *indirect_draw_count_buffer, + GLsizeiptr indirect_draw_count_offset, + const struct _mesa_index_buffer *ib) +{ + /* Use alloca for the prim space if we are somehow in bounds. */ + if (draw_count*sizeof(struct _mesa_prim) < 1024) { + struct _mesa_prim *space = alloca(draw_count*sizeof(struct _mesa_prim)); + memset(space, 0, draw_count*sizeof(struct _mesa_prim)); + + draw_indirect(ctx, mode, indirect_data, indirect_offset, draw_count, +stride, indirect_draw_count_buffer, +indirect_draw_count_offset, ib, space); + } else { + struct _mesa_prim *space = calloc(draw_count, sizeof(struct _mesa_prim)); + if (space == NULL) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "gl%sDraw%sIndirect%s", + (draw_count > 1) ? "Multi" : "", + ib ? "Elements" : "Arrays", + indirect_data ? "CountARB" : ""); + return; + } + + draw_indirect(ctx, mode, indirect_data, indirect_offset, draw_count, +stride, indirect_draw_count_buffer, +indirect_draw_count_offset, ib, space); + + free(space); + } } ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org
Re: [Mesa-dev] [PATCH 6/8] vbo: Readd the arrays argument to the legacy draw methods.
On 03/25/2018 12:42 PM, mathias.froehl...@gmx.net wrote: From: Mathias FröhlichThe legacy draw paths from back before 2012 contained a gl_vertex_array array for the inputs to be used for draw. So all draw methods from legacy drivers and evereything that goes through tnl are originally written "everything" -Brian for this calling convention. The same goes for tools like t_rebase or vbo_split*, that even partly still have the original calling convention with a currently unused such pointer. Back in 2012 patch 50f7e75 mesa: move gl_client_array*[] from vbo_draw_func into gl_context introduced Array._DrawArrays, which was something that was IMO aiming for a similar direction than Array._DrawVAO introduced recently. Now several tools like t_rebase and vbo_split*, which are mostly used by tnl based drivers, would need to be converted to use the internal Array._DrawVAO instead of Array._DrawArrays. The same goes for the driver backends that use any of these tools. Alternatively we can reintroduce the gl_vertex_array array in its call argument list and put these tools finally into the tnl directory. So this change reintroduces this gl_vertex_array array for the legacy draw paths that are still required for the tools t_rebase and vbo_split*. A followup will move vbo_split also into tnl. Note that none of the affected drivers use the DriverFlags.NewArray driver bit. So it should be safe to remove this also for the legacy draw path. Signed-off-by: Mathias Fröhlich --- src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c | 11 +++ src/mesa/tnl/t_draw.c| 7 --- src/mesa/tnl/t_rebase.c | 10 ++ src/mesa/tnl/tnl.h | 1 + src/mesa/vbo/vbo.h | 2 ++ src/mesa/vbo/vbo_split_copy.c| 8 +--- src/mesa/vbo/vbo_split_inplace.c | 8 +--- 7 files changed, 18 insertions(+), 29 deletions(-) diff --git a/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c b/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c index 10b5c15e41..4533069692 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c @@ -239,6 +239,7 @@ get_max_client_stride(struct gl_context *ctx, const struct gl_vertex_array *arra static void TAG(vbo_render_prims)(struct gl_context *ctx, + const struct gl_vertex_array *arrays, const struct _mesa_prim *prims, GLuint nr_prims, const struct _mesa_index_buffer *ib, GLboolean index_bounds_valid, @@ -476,6 +477,7 @@ vbo_draw_imm(struct gl_context *ctx, const struct gl_vertex_array *arrays, static void TAG(vbo_render_prims)(struct gl_context *ctx, + const struct gl_vertex_array *arrays, const struct _mesa_prim *prims, GLuint nr_prims, const struct _mesa_index_buffer *ib, GLboolean index_bounds_valid, @@ -485,7 +487,6 @@ TAG(vbo_render_prims)(struct gl_context *ctx, struct gl_buffer_object *indirect) { struct nouveau_render_state *render = to_render_state(ctx); - const struct gl_vertex_array *arrays = ctx->Array._DrawArrays; if (!index_bounds_valid) vbo_get_minmax_indices(ctx, prims, ib, _index, _index, @@ -514,6 +515,7 @@ TAG(vbo_render_prims)(struct gl_context *ctx, static void TAG(vbo_check_render_prims)(struct gl_context *ctx, + const struct gl_vertex_array *arrays, const struct _mesa_prim *prims, GLuint nr_prims, const struct _mesa_index_buffer *ib, GLboolean index_bounds_valid, @@ -527,12 +529,12 @@ TAG(vbo_check_render_prims)(struct gl_context *ctx, nouveau_validate_framebuffer(ctx); if (nctx->fallback == HWTNL) - TAG(vbo_render_prims)(ctx, prims, nr_prims, ib, + TAG(vbo_render_prims)(ctx, arrays, prims, nr_prims, ib, index_bounds_valid, min_index, max_index, tfb_vertcount, stream, indirect); if (nctx->fallback == SWTNL) - _tnl_draw_prims(ctx, prims, nr_prims, ib, + _tnl_draw_prims(ctx, arrays, prims, nr_prims, ib, index_bounds_valid, min_index, max_index, tfb_vertcount, stream, indirect); } @@ -550,7 +552,8 @@ TAG(vbo_draw)(struct gl_context *ctx, /* Borrow and update the inputs list from the tnl context */ _tnl_bind_inputs(ctx); - TAG(vbo_check_render_prims)(ctx, prims, nr_prims, ib, + TAG(vbo_check_render_prims)(ctx, ctx->Array._DrawArrays, + prims, nr_prims, ib, index_bounds_valid,
[Mesa-dev] [PATCH] meson: fix warnings about comparing unlike types
In the old days (0.42.x), when mesa's meson system was written the recommendation for handling conditional dependencies was to define them as empty lists. When meson would evaluate the dependencies of a target it would recursively flatten all of the arguments, and empty lists would be removed. There are some problems with this, among them that lists and dependencies have different methods (namely .found()), so the recommendation changed to use `declare_dependency()` for such cases. This has the advantage of providing a .found() method, so there is no need to do things like `dep_foo != [] and dep_foo.found()`. I've tested this with 0.42 (the minimum we claim to support) and 0.45. On 0.45 this removes warnings about comparing unlike types. Signed-off-by: Dylan Baker--- meson.build | 85 --- src/gallium/auxiliary/meson.build | 2 +- src/glx/apple/meson.build | 2 +- src/glx/meson.build | 2 +- 4 files changed, 46 insertions(+), 45 deletions(-) diff --git a/meson.build b/meson.build index f210eeb2530..0d195b1aba1 100644 --- a/meson.build +++ b/meson.build @@ -230,7 +230,7 @@ if with_gallium_tegra and not with_gallium_nouveau error('tegra driver requires nouveau driver') endif -dep_libdrm_intel = [] +dep_libdrm_intel = declare_dependency() if with_dri_i915 or with_gallium_i915 dep_libdrm_intel = dependency('libdrm_intel', version : '>= 2.4.75') endif @@ -427,7 +427,7 @@ elif _vdpau == 'auto' _vdpau = 'true' endif with_gallium_vdpau = _vdpau == 'true' -dep_vdpau = [] +dep_vdpau = declare_dependency() if with_gallium_vdpau dep_vdpau = dependency('vdpau', version : '>= 1.1') dep_vdpau = declare_dependency( @@ -466,7 +466,7 @@ elif _xvmc == 'auto' _xvmc = 'true' endif with_gallium_xvmc = _xvmc == 'true' -dep_xvmc = [] +dep_xvmc = declare_dependency() if with_gallium_xvmc dep_xvmc = dependency('xvmc', version : '>= 1.0.6') endif @@ -496,7 +496,8 @@ elif not (with_gallium_r600 or with_gallium_radeonsi or with_gallium_nouveau) error('OMX state tracker requires at least one of the following gallium drivers: r600, radeonsi, nouveau.') endif endif -dep_omx = [] +with_gallium_omx = _omx +dep_omx = declare_dependency() dep_omx_other = [] if ['auto', 'bellagio'].contains(_omx) dep_omx = dependency( @@ -584,7 +585,7 @@ elif _va == 'auto' _va = 'true' endif with_gallium_va = _va == 'true' -dep_va = [] +dep_va = declare_dependency() if with_gallium_va dep_va = dependency('libva', version : '>= 0.38.0') dep_va_headers = declare_dependency( @@ -643,7 +644,7 @@ if _opencl != 'disabled' with_gallium_opencl = true with_opencl_icd = _opencl == 'icd' else - dep_clc = [] + dep_clc = declare_dependency() with_gallium_opencl = false with_gallium_icd = false endif @@ -981,7 +982,7 @@ endif # check for dl support if cc.has_function('dlopen') - dep_dl = [] + dep_dl = declare_dependency() else dep_dl = cc.find_library('dl') endif @@ -1000,7 +1001,7 @@ endif # Determine whether or not the rt library is needed for time functions if cc.has_function('clock_gettime') - dep_clock = [] + dep_clock = declare_dependency() else dep_clock = cc.find_library('rt') endif @@ -1028,18 +1029,18 @@ if with_amd_vk or with_gallium_radeonsi or with_gallium_r600 or with_gallium_ope dep_elf = cc.find_library('elf') endif else - dep_elf = [] + dep_elf = declare_dependency() endif dep_expat = dependency('expat') # this only exists on linux so either this is linux and it will be found, or # its not linux and and wont dep_m = cc.find_library('m', required : false) -dep_libdrm_amdgpu = [] -dep_libdrm_radeon = [] -dep_libdrm_nouveau = [] -dep_libdrm_etnaviv = [] -dep_libdrm_freedreno = [] +dep_libdrm_amdgpu = declare_dependency() +dep_libdrm_radeon = declare_dependency() +dep_libdrm_nouveau = declare_dependency() +dep_libdrm_etnaviv = declare_dependency() +dep_libdrm_freedreno = declare_dependency() if with_amd_vk or with_gallium_radeonsi dep_libdrm_amdgpu = dependency('libdrm_amdgpu', version : '>= 2.4.91') endif @@ -1091,7 +1092,7 @@ elif _llvm == 'true' dep_llvm = dependency('llvm', version : _llvm_version, modules : llvm_modules) with_llvm = true else - dep_llvm = [] + dep_llvm = declare_dependency() with_llvm = false endif if with_llvm @@ -1121,7 +1122,7 @@ elif with_amd_vk or with_gallium_radeonsi or with_gallium_swr error('The following drivers require LLVM: Radv, RadeonSI, SWR. One of these is enabled, but LLVM is disabled.') endif -dep_glvnd = [] +dep_glvnd = declare_dependency() if with_glvnd dep_glvnd = dependency('libglvnd', version : '>= 0.2.0') pre_args += '-DUSE_LIBGLVND=1' @@ -1133,7 +1134,7 @@ if with_valgrind != 'false' pre_args += '-DHAVE_VALGRIND' endif else - dep_valgrind = [] + dep_valgrind = declare_dependency() endif # pthread stubs. Lets not and say we didn't @@
Re: [Mesa-dev] [PATCH] intel/compiler: fix return statement warning in brw_regs_negative_equal()
On 03/29/2018 03:49 AM, Emil Velikov wrote: Hi Brian, On 29 March 2018 at 02:48, Brian Paulwrote: Silence a gcc warning about missing return value in non-void function. For some reason, gcc 5.4.0 (at least) can't deduce that all else/if cases return a value. A small brain dump: Guessing that's because of the unreachable() cases at the end of the if (a->file == IMM) branch. Yeah, that's what I thought too. But if I comment out the unreachable() statement and put 'return false' in its place, I still get the warning. Ideally we'll update the macro to make things more obvious for the compiler. Otherwise we'll have to update dozens of similar instances through the code base. I don't think this particular issue is caused by unreachable(). -Brian ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 00/61] nir: Move to using instructions for derefs
On March 29, 2018 06:44:59 Rob Clarkwrote: On Wed, Mar 28, 2018 at 9:41 PM, Jason Ekstrand wrote: On March 28, 2018 17:43:31 Rob Clark wrote: On Wed, Mar 28, 2018 at 8:16 PM, Jason Ekstrand wrote: On March 28, 2018 16:54:33 Rob Clark wrote: I had noticed the code to remove dead deref's in a few of the passes (at least on your wip branch), and had wondered a bit about not just requiring all the deref related lowering to happen in ssa and possibly require dce after, although admittedly hadn't thought about it *too* much yet.. Yeah. Like I said below, it should be ready enough to just have a tiny clean-up pass instead of having to run full-on dce. Maybe just running dce is the right choice; I'm not sure. I kinda expected to use the dce clean things up once we are in a deref-instruction world.. re: validation passes, could we not just allow dead deref instructions to be ok. That seems like kind of a natural thing.. Making validation ignore them is easy. The trickier bit is that they can cause problems for any pass which works on all deref instructions as opposed to working on texture instructions or intrinsics and tracing the deref chain back. The later are ok because they'll never look at dead derefs. There former (which are likely to be more efficient if we've CSEd derefs) can run into trouble as it's not always obvious when a deref is dead. I'm sure I'll get a better feel for this whole mess as I continue to progress. Defn not trying to second guess you since you are deeper into it that I am.. But requiring dce (or a some sort of mini deref-dce) pass in various places seems reasonable.. I guess it would be nice (given the growing list of nir passes) to have some more formal way to require that some pass(es) is run prior to a whatever random pass driver wants to run would be nice. (Not sure if llvm's PassManager provides this.. if it doesn't, it should.) Yeah. We could theoretically use the metadata system for this but it seems like a bit of an abuse. I'll know more once I get done removing deref chains. That process is teaching me about all sorts of things I missed on the first pass. mmm, I wouldn't look at it as an abuse, as much as making the metadata system a bit more generic. Metadata is really a subset of "state of the IR". (Although perhaps that means nir_metadata_* is no longer a good name ;-)) By abuse, I mostly meant that the metadata system currently never alters the IR beyond filling in extra metadata. But extending it to run dead code is probably the safest IR-modifying thing one can do But this doesn't all have to be done at once.. merging addition of deref instructions with (nearly) immediate lowering to deref chains seems relatively safe (piglit was happy) and low impact on drivers. And would be helpful for clover / vtn pointer (ie. not having to rebase on top of two large patchsets) Yup. I would like to finish the removal (for Intel drivers) before anything gets merged just because it's been so informative when it comes to telling me about all the things I missed. Review can start at any time though. --Jason ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/7] vulkan: Add KHR_display extension to anv and radv using DRM
"Mao, David"writes: > Hi Keith, > If I read the patch correctly, the plane has been interpreted as the same as > connector, and the stackIndex is the index of connector of current device. > Is it by intentional? > If the hardware don't have underlay/overlay supported, is it better to > always report plane 0 rather than pretend to have multiple plane? Yes, you're right. I must have mis-read the specification when writing this function. Thanks for catching this. I've merged this fix into the patch and re-pushed it to my drm-lease-v2 branch. -- -keith signature.asc Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v4] i965: initialize SPIR-V capabilities
On March 29, 2018 02:45:36 Alejandro Piñeirowrote: Needed for ARB_gl_spirv. Those are not the same that the Intel vulkan driver. From the ARB_spirv_extensions spec: "3. If a new GL extension is added that includes SPIR-V support via a new SPIR-V extension does it's SPIR-V extension also get enumerated by the SPIR_V_EXTENSIONS_ARB query?. RESOLVED. Yes. It's good to include it for consistency. Any SPIR-V functionality supported beyond the SPIR-V version that is required for the GL API version should be enumerated." So in addition to the core SPIR-V support, there is the possibility of specific GL extensions enabling specific SPIR-V extensions (so capabilities). That would mean that it is possible that OpenGL and Vulkan not having the same capabilities supported, even for the same driver. For this reason it is better to keep them separated. As an example: at the time of this patch writing Intel vulkan driver support multiview, but there isn't any OpenGL multiview GL extension supported. Note: we initialize SPIR-V capabilities at brwCreateContext instead of the usual brw_initialize_context_constants because we want to do that only if the extension is enabled. v2: * Rebase update (SpirVCapabilities not a pointer anymore) * Fill spirv capabilities for OpenGL >= 3.3 (Ian Romanick) v3: * Drop multiview support, as i965 doesn't support any multiview GL extension (Jason) * Fill spirv capabilities only if the extension is enabled (Jason) --- Minor tweak after last rebase against master. src/mesa/drivers/dri/i965/brw_context.c | 19 +++ 1 file changed, 19 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index fca5c8e3072..11bd68ae61e 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -78,6 +78,7 @@ #include "common/gen_defines.h" +#include "compiler/spirv/nir_spirv.h" /*** * Mesa's Driver Functions ***/ @@ -343,6 +344,20 @@ brw_init_driver_functions(struct brw_context *brw, brw_deserialize_program_binary; } +static void +brw_initialize_spirv_supported_capabilities(struct brw_context *brw) +{ + const struct gen_device_info *devinfo = >screen->devinfo; + struct gl_context *ctx = >ctx; + Not all of this is supported on all hardware so I assume this extension is gen7+ only. If so, please add a comment it an assert to that effect and you'd patch will be Reviewed-by: Jason Ekstrand + ctx->Const.SpirVCapabilities.float64 = devinfo->gen >= 8; + ctx->Const.SpirVCapabilities.int64 = devinfo->gen >= 8; + ctx->Const.SpirVCapabilities.tessellation = true; + ctx->Const.SpirVCapabilities.draw_parameters = true; + ctx->Const.SpirVCapabilities.image_write_without_format = true; + ctx->Const.SpirVCapabilities.variable_pointers = true; +} + static void brw_initialize_context_constants(struct brw_context *brw) { @@ -1063,6 +1078,10 @@ brwCreateContext(gl_api api, _mesa_override_extensions(ctx); _mesa_compute_version(ctx); + /* GL_ARB_gl_spirv */ + if (ctx->Extensions.ARB_gl_spirv) + brw_initialize_spirv_supported_capabilities(brw); + _mesa_initialize_dispatch_tables(ctx); _mesa_initialize_vbo_vtxfmt(ctx); -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] gallium/util: Don't stub u_debug_stack on Android
On Thursday, 2018-03-29 00:19:00 +0200, Stefan Schake wrote: > The fallback path for no libunwind ends up being stubs for Android. > Don't compile them in so we can provide our own implementation. > > Signed-off-by: Stefan Schake> --- > src/gallium/auxiliary/util/u_debug_stack.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/gallium/auxiliary/util/u_debug_stack.c > b/src/gallium/auxiliary/util/u_debug_stack.c > index 846f648..5cbb54f 100644 > --- a/src/gallium/auxiliary/util/u_debug_stack.c > +++ b/src/gallium/auxiliary/util/u_debug_stack.c > @@ -194,7 +194,7 @@ debug_backtrace_print(FILE *f, > } > } > > -#else /* ! HAVE_LIBUNWIND */ > +#elif !defined(ANDROID) /* ! HAVE_LIBUNWIND */ I think I would prefer this, which is easier to extend in the future: #elif defined(ANDROID) /* Not implemented here; see u_debug_stack_android.cpp */ #else /* ! HAVE_LIBUNWIND */ But other than this, the series looks good to me :) This patch (preferably with the amendment I suggested) is: Reviewed-by: Eric Engestrom The other one is: Acked-by: Eric Engestrom (I don't know the Android API enough, but it all looks reasonable) > > #if defined(PIPE_OS_WINDOWS) > #include > -- > 2.7.4 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 08/12] etnaviv: support TX performance counters
Am Sonntag, den 25.03.2018, 22:30 +0200 schrieb Christian Gmeiner: > > Signed-off-by: Christian Gmeiner> --- > src/gallium/drivers/etnaviv/etnaviv_query_pm.c | 63 > ++ > src/gallium/drivers/etnaviv/etnaviv_query_pm.h | 10 > 2 files changed, 73 insertions(+) > > diff --git a/src/gallium/drivers/etnaviv/etnaviv_query_pm.c > b/src/gallium/drivers/etnaviv/etnaviv_query_pm.c > index 3601d4086c..4c2a2ac7de 100644 > --- a/src/gallium/drivers/etnaviv/etnaviv_query_pm.c > +++ b/src/gallium/drivers/etnaviv/etnaviv_query_pm.c > @@ -276,6 +276,69 @@ static const struct etna_perfmon_config query_config[] = > { > .source = (const struct etna_perfmon_source[]) { > { "RA", "CULLED_QUAD_COUNT" } > } > + }, > + { > + .name = "tx-total-bilinear-requests", > + .type = ETNA_QUERY_TX_TOTAL_BILINEAR_REQUESTS, > + .source = (const struct etna_perfmon_source[]) { > + { "TX", "TOTAL_BILINEAR_REQUESTS" } > + } > + }, > + { > + .name = "tx-total-trilinear-requests", > + .type = ETNA_QUERY_TX_TOTAL_TRILINEAR_REQUESTS, > + .source = (const struct etna_perfmon_source[]) { > + { "TX", "TOTAL_TRILINEAR_REQUESTS" } > + } > + }, > + { > + .name = "tx-total-discarded-texutre-requests", ^ wrong spelling of texture. > + .type = ETNA_QUERY_TX_TOTAL_DISCARDED_TEXTURE_REQUESTS, > + .source = (const struct etna_perfmon_source[]) { > + { "TX", "TOTAL_DISCARDED_TEXTURE_REQUESTS" } > + } > + }, > + { > + .name = "tx-total-texutre-requests", again > + .type = ETNA_QUERY_TX_TOTAL_TEXTURE_REQUESTS, > + .source = (const struct etna_perfmon_source[]) { > + { "TX", "TOTAL_TEXTURE_REQUESTS" } > + } > + }, > + { > + .name = "tx-mem-read-count", > + .type = ETNA_QUERY_TX_MEM_READ_COUNT, > + .source = (const struct etna_perfmon_source[]) { > + { "TX", "MEM_READ_COUNT" } > + } > + }, > + { > + .name = "tx-mem-read-in-8b-count", > + .type = ETNA_QUERY_TX_MEM_READ_IN_8B_COUNT, > + .source = (const struct etna_perfmon_source[]) { > + { "TX", "MEM_READ_IN_8B_COUNT" } > + } > + }, > + { > + .name = "tx-cache-miss-count", > + .type = ETNA_QUERY_TX_CACHE_MISS_COUNT, > + .source = (const struct etna_perfmon_source[]) { > + { "TX", "CACHE_MISS_COUNT" } > + } > + }, > + { > + .name = "tx-cache-hit-texel-count", > + .type = ETNA_QUERY_TX_CACHE_HIT_TEXEL_COUNT, > + .source = (const struct etna_perfmon_source[]) { > + { "TX", "CACHE_HIT_TEXEL_COUNT" } > + } > + }, > + { > + .name = "tx-cache-miss-texel-count", > + .type = ETNA_QUERY_TX_CACHE_MISS_TEXEL_COUNT, > + .source = (const struct etna_perfmon_source[]) { > + { "TX", "CACHE_MISS_TEXEL_COUNT" } > + } > } > }; > > diff --git a/src/gallium/drivers/etnaviv/etnaviv_query_pm.h > b/src/gallium/drivers/etnaviv/etnaviv_query_pm.h > index 01dd135392..9cfd06e0ec 100644 > --- a/src/gallium/drivers/etnaviv/etnaviv_query_pm.h > +++ b/src/gallium/drivers/etnaviv/etnaviv_query_pm.h > @@ -73,6 +73,16 @@ struct etna_screen; > #define ETNA_QUERY_RA_PREFETCH_CACHE_MISS_COUNTER(ETNA_PM_QUERY_BASE > + 32) > #define ETNA_QUERY_RA_CULLED_QUAD_COUNT (ETNA_PM_QUERY_BASE > + 33) > > +#define ETNA_QUERY_TX_TOTAL_BILINEAR_REQUESTS(ETNA_PM_QUERY_BASE > + 34) > +#define ETNA_QUERY_TX_TOTAL_TRILINEAR_REQUESTS (ETNA_PM_QUERY_BASE > + 35) > +#define ETNA_QUERY_TX_TOTAL_DISCARDED_TEXTURE_REQUESTS (ETNA_PM_QUERY_BASE > + 36) > +#define ETNA_QUERY_TX_TOTAL_TEXTURE_REQUESTS (ETNA_PM_QUERY_BASE > + 37) > +#define ETNA_QUERY_TX_MEM_READ_COUNT (ETNA_PM_QUERY_BASE > + 38) > +#define ETNA_QUERY_TX_MEM_READ_IN_8B_COUNT (ETNA_PM_QUERY_BASE > + 39) > +#define ETNA_QUERY_TX_CACHE_MISS_COUNT (ETNA_PM_QUERY_BASE > + 40) > +#define ETNA_QUERY_TX_CACHE_HIT_TEXEL_COUNT (ETNA_PM_QUERY_BASE > + 41) > +#define ETNA_QUERY_TX_CACHE_MISS_TEXEL_COUNT (ETNA_PM_QUERY_BASE > + 42) > + > struct etna_pm_query { > struct etna_query base; > struct etna_perfmon_signal *signal; ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/2] etnaviv: advertise YUV formats as external only
We only support importing YUV as OES external resources. This will change in the future, but for now this fixes the advertised capabilities in eglQueryDmaBufModifiersEXT. Signed-off-by: Lucas Stach--- src/gallium/drivers/etnaviv/etnaviv_screen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c b/src/gallium/drivers/etnaviv/etnaviv_screen.c index e38e48c89436..3c2addb4aa5a 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_screen.c +++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c @@ -632,7 +632,7 @@ etna_screen_query_dmabuf_modifiers(struct pipe_screen *pscreen, if (modifiers) modifiers[num_modifiers] = supported_modifiers[i]; if (external_only) - external_only[num_modifiers] = 0; + external_only[num_modifiers] = util_format_is_yuv(format) ? 1 : 0; num_modifiers++; } -- 2.16.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] gallium/util: implement util_format_is_yuv
This adds a helper to check if a pipe format is in YUV color space. Drivers want to know about this, as YUV mostly needs special handling. Signed-off-by: Lucas Stach--- src/gallium/auxiliary/util/u_format.h | 12 1 file changed, 12 insertions(+) diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h index 88bfd72d0538..e497b4b3375a 100644 --- a/src/gallium/auxiliary/util/u_format.h +++ b/src/gallium/auxiliary/util/u_format.h @@ -557,6 +557,18 @@ util_format_is_depth_and_stencil(enum pipe_format format) util_format_has_stencil(desc); } +static inline boolean +util_format_is_yuv(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + + assert(desc); + if (!desc) { + return FALSE; + } + + return desc->colorspace == UTIL_FORMAT_COLORSPACE_YUV; +} /** * Calculates the depth format type based upon the incoming format description. -- 2.16.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2] nvc0: restore image binding on RGB10A2, remove from BGR10A2
Fixes a bunch of new CTS pbo tests that use those as an output format, which the state tracker converts into buffer image writes. No part of the driver is ready for BGR10A2. It could probably be enabled on Maxwell+, but seems unnecessary. This error was introduced when flipping the displayable bit on those formats, which accidentally also moved the image bit. Fixes: e1a70aed10d (nv50,nvc0: mark ABGR format as displayable instead of ARGB format) Signed-off-by: Ilia Mirkin--- v1 -> v2: fix up RGB10A2 as well - it needs the image binding flag. src/gallium/drivers/nouveau/nv50/nv50_formats.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_formats.c b/src/gallium/drivers/nouveau/nv50/nv50_formats.c index 0ead8ac2e1e..a55adfa59f4 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_formats.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_formats.c @@ -152,9 +152,9 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = F3(A, B4G4R4X4_UNORM, NONE, B, G, R, xx, UNORM, A4B4G4R4, T), F3(A, R9G9B9E5_FLOAT, NONE, R, G, B, xx, FLOAT, E5B9G9R9_SHAREDEXP, T), - C4(A, R10G10B10A2_UNORM, RGB10_A2_UNORM, R, G, B, A, UNORM, A2B10G10R10, TD), + C4(A, R10G10B10A2_UNORM, RGB10_A2_UNORM, R, G, B, A, UNORM, A2B10G10R10, ID), F3(A, R10G10B10X2_UNORM, RGB10_A2_UNORM, R, G, B, xx, UNORM, A2B10G10R10, T), - C4(A, B10G10R10A2_UNORM, BGR10_A2_UNORM, B, G, R, A, UNORM, A2B10G10R10, IB), + C4(A, B10G10R10A2_UNORM, BGR10_A2_UNORM, B, G, R, A, UNORM, A2B10G10R10, TB), F3(A, B10G10R10X2_UNORM, BGR10_A2_UNORM, B, G, R, xx, UNORM, A2B10G10R10, T), C4(A, R10G10B10A2_SNORM, NONE, R, G, B, A, SNORM, A2B10G10R10, T), C4(A, B10G10R10A2_SNORM, NONE, B, G, R, A, SNORM, A2B10G10R10, T), -- 2.16.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 00/61] nir: Move to using instructions for derefs
On Wed, Mar 28, 2018 at 9:41 PM, Jason Ekstrandwrote: > > On March 28, 2018 17:43:31 Rob Clark wrote: > > On Wed, Mar 28, 2018 at 8:16 PM, Jason Ekstrand > wrote: > On March 28, 2018 16:54:33 Rob Clark wrote: > > I had noticed the code to remove dead deref's in a few of the passes > (at least on your wip branch), and had wondered a bit about not just > requiring all the deref related lowering to happen in ssa and possibly > require dce after, although admittedly hadn't thought about it *too* > much yet.. > > Yeah. Like I said below, it should be ready enough to just have a tiny > clean-up pass instead of having to run full-on dce. Maybe just running dce > is the right choice; I'm not sure. > > > I kinda expected to use the dce clean things up once we are in a > deref-instruction world.. re: validation passes, could we not just > allow dead deref instructions to be ok. That seems like kind of a > natural thing.. > > Making validation ignore them is easy. The trickier bit is that they can > cause problems for any pass which works on all deref instructions as opposed > to working on texture instructions or intrinsics and tracing the deref chain > back. The later are ok because they'll never look at dead derefs. There > former (which are likely to be more efficient if we've CSEd derefs) can run > into trouble as it's not always obvious when a deref is dead. > > I'm sure I'll get a better feel for this whole mess as I continue to > progress. > > Defn not trying to second guess you since you are deeper into it that > I am.. But requiring dce (or a some sort of mini deref-dce) pass in > various places seems reasonable.. I guess it would be nice (given the > growing list of nir passes) to have some more formal way to require > that some pass(es) is run prior to a whatever random pass driver wants > to run would be nice. (Not sure if llvm's PassManager provides this.. > if it doesn't, it should.) > > Yeah. We could theoretically use the metadata system for this but it seems > like a bit of an abuse. I'll know more once I get done removing deref > chains. That process is teaching me about all sorts of things I missed on > the first pass. > mmm, I wouldn't look at it as an abuse, as much as making the metadata system a bit more generic. Metadata is really a subset of "state of the IR". (Although perhaps that means nir_metadata_* is no longer a good name ;-)) But this doesn't all have to be done at once.. merging addition of deref instructions with (nearly) immediate lowering to deref chains seems relatively safe (piglit was happy) and low impact on drivers. And would be helpful for clover / vtn pointer (ie. not having to rebase on top of two large patchsets) BR, -R ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/4] radv: enable out-of-order rasterization when it's safe on VI+
This can be disabled with RADV_DEBUG=nooutoforder. No CTS regressions on Polaris, and all Vulkan games I tested look good as well. Expect small performance improvements for applications where out-of-order rasterization can be enabled by the driver. Loosely based on RadeonSI. Signed-off-by: Samuel Pitoiset--- src/amd/vulkan/radv_cmd_buffer.c | 28 + src/amd/vulkan/radv_debug.h | 1 + src/amd/vulkan/radv_device.c | 7 ++ src/amd/vulkan/radv_extensions.py | 2 +- src/amd/vulkan/radv_pipeline.c| 240 +- src/amd/vulkan/radv_private.h | 4 + 6 files changed, 279 insertions(+), 3 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index cadb06f0af..c915d4142d 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -1171,10 +1171,24 @@ radv_emit_index_buffer(struct radv_cmd_buffer *cmd_buffer) void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer) { + struct radv_pipeline *pipeline = cmd_buffer->state.pipeline; + uint32_t pa_sc_mode_cntl_1 = + pipeline ? pipeline->graphics.ms.pa_sc_mode_cntl_1 : 0; uint32_t db_count_control; if(!cmd_buffer->state.active_occlusion_queries) { if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) { + if (G_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(pa_sc_mode_cntl_1) && + pipeline->graphics.disable_out_of_order_rast_for_occlusion) { + /* Re-enable out-of-order rasterization if the +* bound pipeline supports it and if it's has +* been disabled before starting occlusion +* queries. +*/ + radeon_set_context_reg(cmd_buffer->cs, + R_028A4C_PA_SC_MODE_CNTL_1, + pa_sc_mode_cntl_1); + } db_count_control = 0; } else { db_count_control = S_028004_ZPASS_INCREMENT_DISABLE(1); @@ -1186,6 +1200,20 @@ void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer) S_028004_ZPASS_ENABLE(1) | S_028004_SLICE_EVEN_ENABLE(1) | S_028004_SLICE_ODD_ENABLE(1); + + if (G_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(pa_sc_mode_cntl_1) && + pipeline->graphics.disable_out_of_order_rast_for_occlusion) { + /* If the bound pipeline has enabled +* out-of-order rasterization, we should +* disable it before starting occlusion +* queries. +*/ + pa_sc_mode_cntl_1 &= C_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE; + + radeon_set_context_reg(cmd_buffer->cs, + R_028A4C_PA_SC_MODE_CNTL_1, + pa_sc_mode_cntl_1); + } } else { db_count_control = S_028004_PERFECT_ZPASS_COUNTS(1) | S_028004_SAMPLE_RATE(0); /* TODO: set this to the number of samples of the current framebuffer */ diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h index 08877676b5..65bc61b1d4 100644 --- a/src/amd/vulkan/radv_debug.h +++ b/src/amd/vulkan/radv_debug.h @@ -43,6 +43,7 @@ enum { RADV_DEBUG_SYNC_SHADERS = 0x2000, RADV_DEBUG_NO_SISCHED= 0x4000, RADV_DEBUG_PREOPTIR = 0x8000, + RADV_DEBUG_NO_OUT_OF_ORDER = 0x1, }; enum { diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 4acdf3d416..9af6bf8059 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -307,6 +307,12 @@ radv_physical_device_init(struct radv_physical_device *device, device->has_scissor_bug = device->rad_info.family == CHIP_VEGA10 || device->rad_info.family == CHIP_RAVEN; + /* Out-of-order primitive rasterization. */ + device->has_out_of_order_rast = device->rad_info.chip_class >= VI && + device->rad_info.max_se >= 2; + device->out_of_order_rast_allowed = device->has_out_of_order_rast && + !(device->instance->debug_flags & RADV_DEBUG_NO_OUT_OF_ORDER); + radv_physical_device_init_mem_types(device); radv_fill_device_extension_table(device, >supported_extensions); @@ -376,6 +382,7
[Mesa-dev] [PATCH 3/4] radv: change blend_enable field to use four bits per CB
Signed-off-by: Samuel Pitoiset--- src/amd/vulkan/radv_pipeline.c | 8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 4960c50b72..e259b16318 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -50,7 +50,7 @@ #include "ac_shader_util.h" struct radv_blend_state { - uint32_t blend_enable; + uint32_t blend_enable_4bit; uint32_t need_src_alpha; uint32_t cb_color_control; @@ -455,9 +455,11 @@ radv_pipeline_compute_spi_color_formats(struct radv_pipeline *pipeline, cf = V_028714_SPI_SHADER_ZERO; } else { struct radv_render_pass_attachment *attachment = pass->attachments + subpass->color_attachments[i].attachment; + bool blend_enable = + blend->blend_enable_4bit & (0xfu << (i * 4)); cf = si_choose_spi_color_format(attachment->format, - blend->blend_enable & (1 << i), + blend_enable, blend->need_src_alpha & (1 << i)); } @@ -655,7 +657,7 @@ radv_pipeline_init_blend_state(struct radv_pipeline *pipeline, } blend.cb_blend_control[i] = blend_cntl; - blend.blend_enable |= 1 << i; + blend.blend_enable_4bit |= 0xfu << (i * 4); if (srcRGB == VK_BLEND_FACTOR_SRC_ALPHA || dstRGB == VK_BLEND_FACTOR_SRC_ALPHA || -- 2.16.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/4] radv: put more fields in radv_blend_state
Some will be used for further optimizations (ie. out-of-order rast). Signed-off-by: Samuel Pitoiset--- src/amd/vulkan/radv_pipeline.c | 33 +++-- 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index af1ea395d3..e1a95d39e5 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -50,6 +50,9 @@ #include "ac_shader_util.h" struct radv_blend_state { + uint32_t blend_enable; + uint32_t need_src_alpha; + uint32_t cb_color_control; uint32_t cb_target_mask; uint32_t sx_mrt_blend_opt[8]; @@ -58,6 +61,9 @@ struct radv_blend_state { uint32_t spi_shader_col_format; uint32_t cb_shader_mask; uint32_t db_alpha_to_mask; + + bool single_cb_enable; + bool mrt0_is_dual_src; }; struct radv_tessellation_state { @@ -435,17 +441,13 @@ static unsigned si_choose_spi_color_format(VkFormat vk_format, static void radv_pipeline_compute_spi_color_formats(struct radv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo, - uint32_t blend_enable, - uint32_t blend_need_alpha, - bool single_cb_enable, - bool blend_mrt0_is_dual_src, struct radv_blend_state *blend) { RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass); struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass; unsigned col_format = 0; - for (unsigned i = 0; i < (single_cb_enable ? 1 : subpass->color_count); ++i) { + for (unsigned i = 0; i < (blend->single_cb_enable ? 1 : subpass->color_count); ++i) { unsigned cf; if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED) { @@ -454,8 +456,8 @@ radv_pipeline_compute_spi_color_formats(struct radv_pipeline *pipeline, struct radv_render_pass_attachment *attachment = pass->attachments + subpass->color_attachments[i].attachment; cf = si_choose_spi_color_format(attachment->format, - blend_enable & (1 << i), - blend_need_alpha & (1 << i)); + blend->blend_enable & (1 << i), + blend->need_src_alpha & (1 << i)); } col_format |= cf << (4 * i); @@ -463,7 +465,7 @@ radv_pipeline_compute_spi_color_formats(struct radv_pipeline *pipeline, blend->cb_shader_mask = ac_get_cb_shader_mask(col_format); - if (blend_mrt0_is_dual_src) + if (blend->mrt0_is_dual_src) col_format |= (col_format & 0xf) << 4; blend->spi_shader_col_format = col_format; } @@ -534,16 +536,13 @@ radv_pipeline_init_blend_state(struct radv_pipeline *pipeline, const VkPipelineMultisampleStateCreateInfo *vkms = pCreateInfo->pMultisampleState; struct radv_blend_state blend = {0}; unsigned mode = V_028808_CB_NORMAL; - uint32_t blend_enable = 0, blend_need_alpha = 0; - bool blend_mrt0_is_dual_src = false; int i; - bool single_cb_enable = false; if (!vkblend) return blend; if (extra && extra->custom_blend_mode) { - single_cb_enable = true; + blend.single_cb_enable = true; mode = extra->custom_blend_mode; } blend.cb_color_control = 0; @@ -586,7 +585,7 @@ radv_pipeline_init_blend_state(struct radv_pipeline *pipeline, if (is_dual_src(srcRGB) || is_dual_src(dstRGB) || is_dual_src(srcA) || is_dual_src(dstA)) if (i == 0) - blend_mrt0_is_dual_src = true; + blend.mrt0_is_dual_src = true; if (eqRGB == VK_BLEND_OP_MIN || eqRGB == VK_BLEND_OP_MAX) { srcRGB = VK_BLEND_FACTOR_ONE; @@ -654,7 +653,7 @@ radv_pipeline_init_blend_state(struct radv_pipeline *pipeline, } blend.cb_blend_control[i] = blend_cntl; - blend_enable |= 1 << i; + blend.blend_enable |= 1 << i; if (srcRGB == VK_BLEND_FACTOR_SRC_ALPHA || dstRGB == VK_BLEND_FACTOR_SRC_ALPHA || @@ -662,7 +661,7 @@ radv_pipeline_init_blend_state(struct radv_pipeline *pipeline, dstRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE || srcRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA || dstRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA) - blend_need_alpha
[Mesa-dev] [PATCH 2/4] radv: scan which color blend attachments are enabled
With cb_target_enabled_4bit in order to have four bits per CB. Signed-off-by: Samuel Pitoiset--- src/amd/vulkan/radv_pipeline.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index e1a95d39e5..4960c50b72 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -55,6 +55,7 @@ struct radv_blend_state { uint32_t cb_color_control; uint32_t cb_target_mask; + uint32_t cb_target_enabled_4bit; uint32_t sx_mrt_blend_opt[8]; uint32_t cb_blend_control[8]; @@ -578,6 +579,7 @@ radv_pipeline_init_blend_state(struct radv_pipeline *pipeline, continue; blend.cb_target_mask |= (unsigned)att->colorWriteMask << (4 * i); + blend.cb_target_enabled_4bit |= 0xf << (4 * i); if (!att->blendEnable) { blend.cb_blend_control[i] = blend_cntl; continue; -- 2.16.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 105755] Mesa freezes when the GLSL shader contains a `for` loop with an uninitialized `i` index/counter variable
https://bugs.freedesktop.org/show_bug.cgi?id=105755 Sergii Romantsovchanged: What|Removed |Added Attachment #138422|0 |1 is obsolete|| Attachment #138422|Issue with some bots head 2 |sorry, wrong attachment description|| -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 105755] Mesa freezes when the GLSL shader contains a `for` loop with an uninitialized `i` index/counter variable
https://bugs.freedesktop.org/show_bug.cgi?id=105755 Sergii Romantsovchanged: What|Removed |Added Attachment #138420|0 |1 is obsolete|| Attachment #138420|Issue with some bots head 1 |sorry, wrong attachment description|| -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 105755] Mesa freezes when the GLSL shader contains a `for` loop with an uninitialized `i` index/counter variable
https://bugs.freedesktop.org/show_bug.cgi?id=105755 --- Comment #20 from Sergii Romantsov--- Created attachment 138422 --> https://bugs.freedesktop.org/attachment.cgi?id=138422=edit Issue with some bots head 2 -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 105755] Mesa freezes when the GLSL shader contains a `for` loop with an uninitialized `i` index/counter variable
https://bugs.freedesktop.org/show_bug.cgi?id=105755 --- Comment #19 from Sergii Romantsov--- Created attachment 138420 --> https://bugs.freedesktop.org/attachment.cgi?id=138420=edit Issue with some bots head 1 -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] radv: do not always disable dual quad mode when chip has RbPlus
For GFX9+ only, RadeonSI does this too. Signed-off-by: Samuel Pitoiset--- src/amd/vulkan/radv_pipeline.c | 20 +--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index af1ea395d3..6797f768fe 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -669,9 +669,23 @@ radv_pipeline_init_blend_state(struct radv_pipeline *pipeline, blend.sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) | S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED); } - /* disable RB+ for now */ - if (pipeline->device->physical_device->has_rbplus) - blend.cb_color_control |= S_028808_DISABLE_DUAL_QUAD(1); + if (pipeline->device->physical_device->has_rbplus) { + /* Disable RB+ blend optimizations for dual source blending. */ + if (blend_mrt0_is_dual_src) { + for (i = 0; i < 8; i++) { + blend.sx_mrt_blend_opt[i] = + S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) | + S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE); + } + } + + /* RB+ doesn't work with dual source blending, logic op and +* RESOLVE. +*/ + if (blend_mrt0_is_dual_src || vkblend->logicOpEnable || + mode == V_028808_CB_RESOLVE) + blend.cb_color_control |= S_028808_DISABLE_DUAL_QUAD(1); + } if (blend.cb_target_mask) blend.cb_color_control |= S_028808_MODE(mode); -- 2.16.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 105807] [Regression, bisected]: 3D Rendering not working correctly in Warhammer 40k: Dawn of War II
https://bugs.freedesktop.org/show_bug.cgi?id=105807 --- Comment #1 from b...@besd.de--- Created attachment 138419 --> https://bugs.freedesktop.org/attachment.cgi?id=138419=edit Ingame Image -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 105807] [Regression, bisected]: 3D Rendering not working correctly in Warhammer 40k: Dawn of War II
https://bugs.freedesktop.org/show_bug.cgi?id=105807 Bug ID: 105807 Summary: [Regression, bisected]: 3D Rendering not working correctly in Warhammer 40k: Dawn of War II Product: Mesa Version: git Hardware: x86-64 (AMD64) OS: Linux (All) Status: NEW Severity: normal Priority: medium Component: Mesa core Assignee: mesa-dev@lists.freedesktop.org Reporter: b...@besd.de QA Contact: mesa-dev@lists.freedesktop.org CC: asm...@feralinteractive.com, mar...@gmail.com Created attachment 138418 --> https://bugs.freedesktop.org/attachment.cgi?id=138418=edit Incorrect 3D rendering (black/white shadows) Hi all, rendering in Warhammer 40k: Dawn of War II breaks with the commit below. In the menu only black and white shadows appear instead of 3d figure (see screenshot). In the game no 3d objects are visible only colorful shadows. Works fine with 17.3.7. a0c8b49284efe736849c0a45920ad0a1bbd8d93d is the first bad commit commit a0c8b49284efe736849c0a45920ad0a1bbd8d93d Author: Marek OlšákDate: Wed Feb 14 20:13:40 2018 +0100 mesa: enable OpenGL 3.1 with ARB_compatibility Tested-by: Dieter Nützel Reviewed-by: Brian Paul :04 04 e77355f4d5979f42b5e8f55dd6c6ea62cba32ff3 24f0fe1b3e5f9d1daa801a706299f0115f9d8f2d M docs :04 04 89d6cc6fe13b05f4625fb48abbb94050f0cb5c96 3b91e5c0091caf1426b09568846f8f2b3bc64803 M src 'bisect run' erfolgreich ausgeführt -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 105775] F1 2017 crashes on GCN 1.0 cards
https://bugs.freedesktop.org/show_bug.cgi?id=105775 --- Comment #19 from Alex Smith--- Yes, same call failing there too. -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 105775] F1 2017 crashes on GCN 1.0 cards
https://bugs.freedesktop.org/show_bug.cgi?id=105775 --- Comment #18 from Jacob--- Created attachment 138416 --> https://bugs.freedesktop.org/attachment.cgi?id=138416=edit dmp when starting a race Just to chime in, when running everything on low at 1600x900 windowed, the game crashes for me too on a 270x with 4GB of VRAM, so if this dmp contains the same failing call, it sounds extremely unlikely if it's due to too little VRAM. -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 105775] F1 2017 crashes on GCN 1.0 cards
https://bugs.freedesktop.org/show_bug.cgi?id=105775 --- Comment #17 from Alex Smith--- Thanks. The failing call is vkEndCommandBuffer. That matches with a few other crashes we have logged on GCN 1.0 cards. RADV devs, any ideas what might cause that to happen for 1.0 cards specifically? We've never seen it on newer cards. I doubt it would be really running out of VRAM when running on lowest settings at 720p. Dave, are you able to reproduce it if you run the benchmark at high settings or something like that? -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 105775] F1 2017 crashes on GCN 1.0 cards
https://bugs.freedesktop.org/show_bug.cgi?id=105775 --- Comment #16 from Amarildo--- Created attachment 138415 --> https://bugs.freedesktop.org/attachment.cgi?id=138415=edit New dmp -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 105775] F1 2017 crashes on GCN 1.0 cards
https://bugs.freedesktop.org/show_bug.cgi?id=105775 --- Comment #15 from Amarildo--- Ah, I thought it was a RADV problem. I'll attach a new dump bellow. -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [ANNOUNCE] Mesa 17.3.8 release candidate
Hello list, The candidate for the Mesa 17.3.8 is now available. Currently we have: - 18 queued - 0 nominated (outstanding) - and 0 rejected patches The current queue consists of: In NIR we have a couple of patches to fix a crash when unrolling loops, as well as a fix for per_vertex_output intrinsic. GLSL receives a similar patch for the unrolling loop. Intel's i965 driver gets several fixes; among them, one that solves several CTS regressions, and another that fix for a GPU hang in Cherryview. Finally, we have other fixes in EGL/Wayland, ANV, RADV, Radeon, Dri and Nine, and Android build. Take a look at section "Mesa stable queue" for more information. Testing reports/general approval Any testing reports (or general approval of the state of the branch) will be greatly appreciated. The plan is to have 17.3.8 this Monday (2nd April), around or shortly after 11:00 GMT. If you have any questions or suggestions - be that about the current patch queue or otherwise, please go ahead. Trivial merge conflicts --- commit f8569000542c929304572eb83e7fd240421b66c5 Author: Dave Airlieradv: get correct offset into LDS for indexed vars. (cherry picked from commit f9de2d409bf4f068a99d358d592d96ab4803f7fb) commit b2603cdd528cce1cd1e69c64e50db8c54cc8a855 Author: Marek Olšák st/dri: fix OpenGL-OpenCL interop for GL_TEXTURE_BUFFER (cherry picked from commit db495b8962909f74e90b9eb0463fb37f37ac5f62) commit 9475e58334e3eb7c33f55f6a760d531f92f6bcb4 Author: Eric Engestrom meson/configure: detect endian.h instead of trying to guess when it's available (cherry picked from commit cbee1bfb34274668a05995b9d4c78ddec9e5ea4c) Cheers, J.A. Mesa stable queue - Nominated (0) == Queued (18) === Axel Davy (3): st/nine: Fix bad tracking of vs textures for NINESBT_ALL st/nine: Fixes warning about implicit conversion st/nine: Fix non inversible matrix check Caio Marcelo de Oliveira Filho (1): anv/pipeline: fail if TCS/TES compile fail Dave Airlie (1): radv: get correct offset into LDS for indexed vars. Derek Foreman (1): egl/wayland: Make swrast display_sync the correct queue Eric Engestrom (1): meson/configure: detect endian.h instead of trying to guess when it's available Squashed with: configure: use AC_CHECK_HEADERS to check for endian.h Ian Romanick (2): mesa: Don't write to user buffer in glGetTexParameterIuiv on error i965/vec4: Fix null destination register in 3-source instructions Jason Ekstrand (1): i965: Emit texture cache invalidates around blorp_copy Jordan Justen (2): i965: Calculate thread_count in brw_alloc_stage_scratch i965: Hard code CS scratch_ids_per_subslice for Cherryview Leo Liu (1): radeon/vce: move feedback command inside of destroy function Marek Olšák (1): st/dri: fix OpenGL-OpenCL interop for GL_TEXTURE_BUFFER Rob Clark (1): nir: fix per_vertex_output intrinsic Timothy Arceri (2): glsl: fix infinite loop caused by bug in loop unrolling pass nir: fix crash in loop unroll corner case Rejected (0) = ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] mesa: Inherit texture view multi-sample information from the original texture images.
On 27 March 2018 at 18:57, Brian Paulwrote: > LGTM. I guess we probably don't have much piglit coverage for texture_view > + MSAA. > > Reviewed-by: Brian Paul > I've pushed this, thanks for the review. A piglit test should be straightforward, but I'll likely be fairly busy for at least the next few months. (WineConf preparations being one of the reasons.) Henri ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] gl.h: remove stale comment, trailing whitespace
Reviewed-by: Emil Velikov-Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] intel/compiler: fix return statement warning in brw_regs_negative_equal()
Hi Brian, On 29 March 2018 at 02:48, Brian Paulwrote: > Silence a gcc warning about missing return value in non-void function. > For some reason, gcc 5.4.0 (at least) can't deduce that all else/if > cases return a value. A small brain dump: Guessing that's because of the unreachable() cases at the end of the if (a->file == IMM) branch. Ideally we'll update the macro to make things more obvious for the compiler. Otherwise we'll have to update dozens of similar instances through the code base. -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v4] i965: initialize SPIR-V capabilities
Needed for ARB_gl_spirv. Those are not the same that the Intel vulkan driver. From the ARB_spirv_extensions spec: "3. If a new GL extension is added that includes SPIR-V support via a new SPIR-V extension does it's SPIR-V extension also get enumerated by the SPIR_V_EXTENSIONS_ARB query?. RESOLVED. Yes. It's good to include it for consistency. Any SPIR-V functionality supported beyond the SPIR-V version that is required for the GL API version should be enumerated." So in addition to the core SPIR-V support, there is the possibility of specific GL extensions enabling specific SPIR-V extensions (so capabilities). That would mean that it is possible that OpenGL and Vulkan not having the same capabilities supported, even for the same driver. For this reason it is better to keep them separated. As an example: at the time of this patch writing Intel vulkan driver support multiview, but there isn't any OpenGL multiview GL extension supported. Note: we initialize SPIR-V capabilities at brwCreateContext instead of the usual brw_initialize_context_constants because we want to do that only if the extension is enabled. v2: * Rebase update (SpirVCapabilities not a pointer anymore) * Fill spirv capabilities for OpenGL >= 3.3 (Ian Romanick) v3: * Drop multiview support, as i965 doesn't support any multiview GL extension (Jason) * Fill spirv capabilities only if the extension is enabled (Jason) --- Minor tweak after last rebase against master. src/mesa/drivers/dri/i965/brw_context.c | 19 +++ 1 file changed, 19 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index fca5c8e3072..11bd68ae61e 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -78,6 +78,7 @@ #include "common/gen_defines.h" +#include "compiler/spirv/nir_spirv.h" /*** * Mesa's Driver Functions ***/ @@ -343,6 +344,20 @@ brw_init_driver_functions(struct brw_context *brw, brw_deserialize_program_binary; } +static void +brw_initialize_spirv_supported_capabilities(struct brw_context *brw) +{ + const struct gen_device_info *devinfo = >screen->devinfo; + struct gl_context *ctx = >ctx; + + ctx->Const.SpirVCapabilities.float64 = devinfo->gen >= 8; + ctx->Const.SpirVCapabilities.int64 = devinfo->gen >= 8; + ctx->Const.SpirVCapabilities.tessellation = true; + ctx->Const.SpirVCapabilities.draw_parameters = true; + ctx->Const.SpirVCapabilities.image_write_without_format = true; + ctx->Const.SpirVCapabilities.variable_pointers = true; +} + static void brw_initialize_context_constants(struct brw_context *brw) { @@ -1063,6 +1078,10 @@ brwCreateContext(gl_api api, _mesa_override_extensions(ctx); _mesa_compute_version(ctx); + /* GL_ARB_gl_spirv */ + if (ctx->Extensions.ARB_gl_spirv) + brw_initialize_spirv_supported_capabilities(brw); + _mesa_initialize_dispatch_tables(ctx); _mesa_initialize_vbo_vtxfmt(ctx); -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] nvc0: drop image binding from BGR10A2 format
Did a CTS run on that. Things are looking better with it. No regressions. Tested-By: Karol HerbstOn Thu, Mar 29, 2018 at 5:47 AM, Ilia Mirkin wrote: > Fixes a bunch of new CTS pbo tests that use that as an output format, > which the state tracker converts into buffer image writes. > > No part of the driver is ready for BGR10A2. It could probably be enabled > on Maxwell+, but seems unnecessary. > > Signed-off-by: Ilia Mirkin > --- > src/gallium/drivers/nouveau/nv50/nv50_formats.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/gallium/drivers/nouveau/nv50/nv50_formats.c > b/src/gallium/drivers/nouveau/nv50/nv50_formats.c > index 0ead8ac2e1e..9f8faf768dd 100644 > --- a/src/gallium/drivers/nouveau/nv50/nv50_formats.c > +++ b/src/gallium/drivers/nouveau/nv50/nv50_formats.c > @@ -154,7 +154,7 @@ const struct nv50_format > nv50_format_table[PIPE_FORMAT_COUNT] = > > C4(A, R10G10B10A2_UNORM, RGB10_A2_UNORM, R, G, B, A, UNORM, A2B10G10R10, > TD), > F3(A, R10G10B10X2_UNORM, RGB10_A2_UNORM, R, G, B, xx, UNORM, A2B10G10R10, > T), > - C4(A, B10G10R10A2_UNORM, BGR10_A2_UNORM, B, G, R, A, UNORM, A2B10G10R10, > IB), > + C4(A, B10G10R10A2_UNORM, BGR10_A2_UNORM, B, G, R, A, UNORM, A2B10G10R10, > TB), > F3(A, B10G10R10X2_UNORM, BGR10_A2_UNORM, B, G, R, xx, UNORM, A2B10G10R10, > T), > C4(A, R10G10B10A2_SNORM, NONE, R, G, B, A, SNORM, A2B10G10R10, T), > C4(A, B10G10R10A2_SNORM, NONE, B, G, R, A, SNORM, A2B10G10R10, T), > -- > 2.16.1 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH, v2] CHROMIUM: configure.ac/meson.build: Fix -latomic test
And now I left the CHROMIUM tag, sorry ,-( On Thu, Mar 29, 2018 at 4:31 PM, Nicolas Boichatwrote: > From: Nicolas Boichat > > When compiling with LLVM 6.0, the test fails to detect that > -latomic is actually required, as the atomic call is inlined. > > In the code itself (src/util/disk_cache.c), we see this pattern: > p_atomic_add(cache->size, - (uint64_t)size); > where cache->size is an uint64_t *, and results in the following > link time error without -latomic: > src/util/disk_cache.c:628: error: undefined reference to > '__atomic_fetch_add_8' > > Fix the configure/meson test to replicate this pattern, which then > correctly realizes the need for -latomic. > > Signed-off-by: Nicolas Boichat > --- > > Changes since v1: > - Updated meson.build as well (untested) > > configure.ac | 6 -- > meson.build | 6 -- > 2 files changed, 8 insertions(+), 4 deletions(-) > > diff --git a/configure.ac b/configure.ac > index e874f8ebfb2..eff9a0ef88f 100644 > --- a/configure.ac > +++ b/configure.ac > @@ -445,9 +445,11 @@ if test "x$GCC_ATOMIC_BUILTINS_SUPPORTED" = x1; then > AC_MSG_CHECKING(whether -latomic is needed) > AC_LINK_IFELSE([AC_LANG_SOURCE([[ > #include > -uint64_t v; > +struct { > +uint64_t* v; > +} x; > int main() { > -return (int)__atomic_load_n(, __ATOMIC_ACQUIRE); > +return (int)__atomic_load_n(x.v, __ATOMIC_ACQUIRE); > }]])], GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC=no, > GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC=yes) > AC_MSG_RESULT($GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC) > if test "x$GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC" = xyes; then > diff --git a/meson.build b/meson.build > index f210eeb2530..cd567dc000e 100644 > --- a/meson.build > +++ b/meson.build > @@ -850,8 +850,10 @@ if cc.compiles('int main() { int n; return > __atomic_load_n(, __ATOMIC_ACQUIRE) ># as ARM. >if not cc.links('''#include > int main() { > - uint64_t n; > - return (int)__atomic_load_n(, __ATOMIC_ACQUIRE); > + struct { > + uint64_t *v; > + } x; > + return (int)__atomic_load_n(x.v, __ATOMIC_ACQUIRE); > }''', >name : 'GCC atomic builtins required -latomic') > dep_atomic = cc.find_library('atomic') > -- > 2.17.0.rc1.321.gba9d0f2565-goog > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH, v2] CHROMIUM: configure.ac/meson.build: Fix -latomic test
From: Nicolas BoichatWhen compiling with LLVM 6.0, the test fails to detect that -latomic is actually required, as the atomic call is inlined. In the code itself (src/util/disk_cache.c), we see this pattern: p_atomic_add(cache->size, - (uint64_t)size); where cache->size is an uint64_t *, and results in the following link time error without -latomic: src/util/disk_cache.c:628: error: undefined reference to '__atomic_fetch_add_8' Fix the configure/meson test to replicate this pattern, which then correctly realizes the need for -latomic. Signed-off-by: Nicolas Boichat --- Changes since v1: - Updated meson.build as well (untested) configure.ac | 6 -- meson.build | 6 -- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/configure.ac b/configure.ac index e874f8ebfb2..eff9a0ef88f 100644 --- a/configure.ac +++ b/configure.ac @@ -445,9 +445,11 @@ if test "x$GCC_ATOMIC_BUILTINS_SUPPORTED" = x1; then AC_MSG_CHECKING(whether -latomic is needed) AC_LINK_IFELSE([AC_LANG_SOURCE([[ #include -uint64_t v; +struct { +uint64_t* v; +} x; int main() { -return (int)__atomic_load_n(, __ATOMIC_ACQUIRE); +return (int)__atomic_load_n(x.v, __ATOMIC_ACQUIRE); }]])], GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC=no, GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC=yes) AC_MSG_RESULT($GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC) if test "x$GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC" = xyes; then diff --git a/meson.build b/meson.build index f210eeb2530..cd567dc000e 100644 --- a/meson.build +++ b/meson.build @@ -850,8 +850,10 @@ if cc.compiles('int main() { int n; return __atomic_load_n(, __ATOMIC_ACQUIRE) # as ARM. if not cc.links('''#include int main() { - uint64_t n; - return (int)__atomic_load_n(, __ATOMIC_ACQUIRE); + struct { + uint64_t *v; + } x; + return (int)__atomic_load_n(x.v, __ATOMIC_ACQUIRE); }''', name : 'GCC atomic builtins required -latomic') dep_atomic = cc.find_library('atomic') -- 2.17.0.rc1.321.gba9d0f2565-goog ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] configure.ac: Fix -latomic test
Scrap this, I guess I should update meson.build, too. Will send v2 in a few minutes. On Thu, Mar 29, 2018 at 4:26 PM, Nicolas Boichatwrote: > From: Nicolas Boichat > > When compiling with LLVM 6.0, the test fails to detect that > -latomic is actually required, as the atomic call is inlined. > > In the code itself (src/util/disk_cache.c), we see this pattern: > p_atomic_add(cache->size, - (uint64_t)size); > where cache->size is an uint64_t *, and results in the following > link time error without -latomic: > src/util/disk_cache.c:628: error: undefined reference to > '__atomic_fetch_add_8' > > Fix the configure test to replicate this pattern, which then > correctly realizes the need for -latomic. > > Signed-off-by: Nicolas Boichat > --- > configure.ac | 6 -- > 1 file changed, 4 insertions(+), 2 deletions(-) > > diff --git a/configure.ac b/configure.ac > index e874f8ebfb2..eff9a0ef88f 100644 > --- a/configure.ac > +++ b/configure.ac > @@ -445,9 +445,11 @@ if test "x$GCC_ATOMIC_BUILTINS_SUPPORTED" = x1; then > AC_MSG_CHECKING(whether -latomic is needed) > AC_LINK_IFELSE([AC_LANG_SOURCE([[ > #include > -uint64_t v; > +struct { > +uint64_t* v; > +} x; > int main() { > -return (int)__atomic_load_n(, __ATOMIC_ACQUIRE); > +return (int)__atomic_load_n(x.v, __ATOMIC_ACQUIRE); > }]])], GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC=no, > GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC=yes) > AC_MSG_RESULT($GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC) > if test "x$GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC" = xyes; then > -- > 2.17.0.rc1.321.gba9d0f2565-goog > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] configure.ac: Fix -latomic test
From: Nicolas BoichatWhen compiling with LLVM 6.0, the test fails to detect that -latomic is actually required, as the atomic call is inlined. In the code itself (src/util/disk_cache.c), we see this pattern: p_atomic_add(cache->size, - (uint64_t)size); where cache->size is an uint64_t *, and results in the following link time error without -latomic: src/util/disk_cache.c:628: error: undefined reference to '__atomic_fetch_add_8' Fix the configure test to replicate this pattern, which then correctly realizes the need for -latomic. Signed-off-by: Nicolas Boichat --- configure.ac | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index e874f8ebfb2..eff9a0ef88f 100644 --- a/configure.ac +++ b/configure.ac @@ -445,9 +445,11 @@ if test "x$GCC_ATOMIC_BUILTINS_SUPPORTED" = x1; then AC_MSG_CHECKING(whether -latomic is needed) AC_LINK_IFELSE([AC_LANG_SOURCE([[ #include -uint64_t v; +struct { +uint64_t* v; +} x; int main() { -return (int)__atomic_load_n(, __ATOMIC_ACQUIRE); +return (int)__atomic_load_n(x.v, __ATOMIC_ACQUIRE); }]])], GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC=no, GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC=yes) AC_MSG_RESULT($GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC) if test "x$GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC" = xyes; then -- 2.17.0.rc1.321.gba9d0f2565-goog ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 105775] F1 2017 crashes on GCN 1.0 cards
https://bugs.freedesktop.org/show_bug.cgi?id=105775 --- Comment #14 from Alex Smith--- The crash is in the game code so I don't think a Mesa backtrace would help. It looks like the dump file hasn't been attached properly - could you re-attach it so I can look at what call is failing? -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 105320] Storage texel buffer access produces wrong results (RX Vega)
https://bugs.freedesktop.org/show_bug.cgi?id=105320 --- Comment #3 from Samuel Pitoiset--- This should be fixed with https://cgit.freedesktop.org/mesa/mesa/commit/?id=4503ff760c794c3bb15b978a47c530037d56498e Note that it's a workaround because we are waiting for a proper LLVM fix (Bas has one but it hasn't been pushed yet). Thanks for the report Jozef. -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 105320] Storage texel buffer access produces wrong results (RX Vega)
https://bugs.freedesktop.org/show_bug.cgi?id=105320 Samuel Pitoisetchanged: What|Removed |Added Resolution|--- |FIXED Status|NEW |RESOLVED -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 105738] commit f7ffa504a065dc2631fd38cc5fe885b277f4e7e7 causes artifacting in radv
https://bugs.freedesktop.org/show_bug.cgi?id=105738 Samuel Pitoisetchanged: What|Removed |Added Status|NEW |RESOLVED Resolution|--- |FIXED --- Comment #6 from Samuel Pitoiset --- Should be fixed with https://cgit.freedesktop.org/mesa/mesa/commit/?id=4f96747530be799e3ccd84ccf48df6d7fdbd0a03 -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/7] vulkan: Add KHR_display extension to anv and radv using DRM
Hi Keith, If I read the patch correctly, the plane has been interpreted as the same as connector, and the stackIndex is the index of connector of current device. Is it by intentional? If the hardware don't have underlay/overlay supported, is it better to always report plane 0 rather than pretend to have multiple plane? Thanks. Best Regards, David -Original Message- From: dri-devel [mailto:dri-devel-boun...@lists.freedesktop.org] On Behalf Of Keith Packard Sent: Saturday, February 10, 2018 12:45 PM To: mesa-dev@lists.freedesktop.org Cc: Keith Packard; dri-de...@lists.freedesktop.org Subject: [PATCH 1/7] vulkan: Add KHR_display extension to anv and radv using DRM This adds support for the KHR_display extension to the anv and radv Vulkan drivers. The drivers now attempt to open the master DRM node when the KHR_display extension is requested so that the common winsys code can perform the necessary operations. Signed-off-by: Keith Packard --- configure.ac |1 + meson.build|4 +- src/amd/vulkan/Makefile.am |8 + src/amd/vulkan/Makefile.sources|3 + src/amd/vulkan/meson.build |7 + src/amd/vulkan/radv_device.c | 28 +- src/amd/vulkan/radv_extensions.py |7 +- src/amd/vulkan/radv_private.h |2 + src/amd/vulkan/radv_wsi.c |3 +- src/amd/vulkan/radv_wsi_display.c | 143 src/intel/Makefile.sources |3 + src/intel/Makefile.vulkan.am |7 + src/intel/vulkan/anv_device.c | 18 +- src/intel/vulkan/anv_extensions.py |1 + src/intel/vulkan/anv_extensions_gen.py |5 +- src/intel/vulkan/anv_wsi.c |3 +- src/intel/vulkan/anv_wsi_display.c | 129 +++ src/intel/vulkan/meson.build |7 + src/vulkan/Makefile.am |7 + src/vulkan/Makefile.sources|4 + src/vulkan/wsi/meson.build | 10 + src/vulkan/wsi/wsi_common.c| 19 +- src/vulkan/wsi/wsi_common.h|5 +- src/vulkan/wsi/wsi_common_display.c| 1368 src/vulkan/wsi/wsi_common_display.h| 72 ++ src/vulkan/wsi/wsi_common_private.h| 10 + 26 files changed, 1858 insertions(+), 16 deletions(-) create mode 100644 src/amd/vulkan/radv_wsi_display.c create mode 100644 src/intel/vulkan/anv_wsi_display.c create mode 100644 src/vulkan/wsi/wsi_common_display.c create mode 100644 src/vulkan/wsi/wsi_common_display.h diff --git a/configure.ac b/configure.ac index 8ed606c7694..46318365603 100644 --- a/configure.ac +++ b/configure.ac @@ -1849,6 +1849,7 @@ fi AM_CONDITIONAL(HAVE_PLATFORM_X11, echo "$platforms" | grep -q 'x11') AM_CONDITIONAL(HAVE_PLATFORM_WAYLAND, echo "$platforms" | grep -q 'wayland') AM_CONDITIONAL(HAVE_PLATFORM_DRM, echo "$platforms" | grep -q 'drm') +AM_CONDITIONAL(HAVE_PLATFORM_DISPLAY, echo "$platforms" | grep -q 'drm') AM_CONDITIONAL(HAVE_PLATFORM_SURFACELESS, echo "$platforms" | grep -q 'surfaceless') AM_CONDITIONAL(HAVE_PLATFORM_ANDROID, echo "$platforms" | grep -q 'android') diff --git a/meson.build b/meson.build index b39e2f8ab96..aeb7f5e2917 100644 --- a/meson.build +++ b/meson.build @@ -239,11 +239,12 @@ with_platform_wayland = false with_platform_x11 = false with_platform_drm = false with_platform_surfaceless = false +with_platform_display = false egl_native_platform = '' _platforms = get_option('platforms') if _platforms == 'auto' if system_has_kms_drm -_platforms = 'x11,wayland,drm,surfaceless' +_platforms = 'x11,wayland,drm,surfaceless,display' elif ['darwin', 'windows', 'cygwin'].contains(host_machine.system()) _platforms = 'x11,surfaceless' else @@ -257,6 +258,7 @@ if _platforms != '' with_platform_wayland = _split.contains('wayland') with_platform_drm = _split.contains('drm') with_platform_surfaceless = _split.contains('surfaceless') + with_platform_display = _split.contains('display') egl_native_platform = _split[0] endif diff --git a/src/amd/vulkan/Makefile.am b/src/amd/vulkan/Makefile.am index 61025968942..061b8144b88 100644 --- a/src/amd/vulkan/Makefile.am +++ b/src/amd/vulkan/Makefile.am @@ -76,6 +76,14 @@ VULKAN_LIB_DEPS = \ $(DLOPEN_LIBS) \ -lm +if HAVE_PLATFORM_DISPLAY +AM_CPPFLAGS += \ + -DVK_USE_PLATFORM_DISPLAY_KHR + +VULKAN_SOURCES += $(VULKAN_WSI_DISPLAY_FILES) + +endif + if HAVE_PLATFORM_X11 AM_CPPFLAGS += \ $(XCB_DRI3_CFLAGS) \ diff --git a/src/amd/vulkan/Makefile.sources b/src/amd/vulkan/Makefile.sources index a510d88d965..618a6cdaed0 100644 --- a/src/amd/vulkan/Makefile.sources +++ b/src/amd/vulkan/Makefile.sources @@ -78,6 +78,9 @@ VULKAN_WSI_WAYLAND_FILES := \ VULKAN_WSI_X11_FILES := \ radv_wsi_x11.c +VULKAN_WSI_DISPLAY_FILES := \ + radv_wsi_display.c + VULKAN_GENERATED_FILES := \ radv_entrypoints.c \
[Mesa-dev] [Bug 101747] Steam-Game Turmoil, Segfault on start
https://bugs.freedesktop.org/show_bug.cgi?id=101747 Timothy Arcerichanged: What|Removed |Added Depends on||105797 Referenced Bugs: https://bugs.freedesktop.org/show_bug.cgi?id=105797 [Bug 105797] Tracker for YoYo based games start-up crashes -- You are receiving this mail because: You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev