Re: [Mesa-dev] [PATCH 1/5] mesa/bufferobj: make _mesa_delete_buffer_object externally accessible
For the series: Reviewed-by: Marek OlšákMarek On Wed, Jan 6, 2016 at 3:53 AM, Nicolai Hähnle wrote: > From: Nicolai Hähnle > > gl_buffer_object has grown more complicated and requires cleanup. Using this > function from drivers will be more future-proof. > --- > src/mesa/main/bufferobj.c | 2 +- > src/mesa/main/bufferobj.h | 4 > 2 files changed, 5 insertions(+), 1 deletion(-) > > diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c > index 8a9f9b6..4a098ac 100644 > --- a/src/mesa/main/bufferobj.c > +++ b/src/mesa/main/bufferobj.c > @@ -447,7 +447,7 @@ _mesa_new_buffer_object(struct gl_context *ctx, GLuint > name) > * > * Default callback for the \c dd_function_table::DeleteBuffer() hook. > */ > -static void > +void > _mesa_delete_buffer_object(struct gl_context *ctx, > struct gl_buffer_object *bufObj) > { > diff --git a/src/mesa/main/bufferobj.h b/src/mesa/main/bufferobj.h > index 3eac96d..a5bfe88 100644 > --- a/src/mesa/main/bufferobj.h > +++ b/src/mesa/main/bufferobj.h > @@ -109,6 +109,10 @@ _mesa_initialize_buffer_object(struct gl_context *ctx, > GLuint name); > > extern void > +_mesa_delete_buffer_object(struct gl_context *ctx, > + struct gl_buffer_object *bufObj); > + > +extern void > _mesa_reference_buffer_object_(struct gl_context *ctx, > struct gl_buffer_object **ptr, > struct gl_buffer_object *bufObj); > -- > 2.5.0 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] glsl: fix varying slot allocation for blocks and structs with explicit locations
Previously each member was being counted as using a single slot, count_attribute_slots() fixes the count for array and struct members. Also don't assign a negitive to the unsigned expl_location variable. --- Fixes these new piglit tests: http://patchwork.freedesktop.org/patch/69531/ src/glsl/ast_to_hir.cpp | 9 + 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index 0197cdc..50d5e22 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -6408,12 +6408,13 @@ ast_process_struct_or_iface_block_members(exec_list *instructions, if (process_qualifier_constant(state, , "location", qual->location, _location)) { fields[i].location = VARYING_SLOT_VAR0 + qual_location; - expl_location = fields[i].location + 1; + expl_location = fields[i].location + + fields[i].type->count_attribute_slots(false); } } else { if (layout && layout->flags.q.explicit_location) { fields[i].location = expl_location; - expl_location = expl_location + 1; + expl_location += fields[i].type->count_attribute_slots(false); } else { fields[i].location = -1; } @@ -6570,7 +6571,7 @@ ast_struct_specifier::hir(exec_list *instructions, state->struct_specifier_depth++; - unsigned expl_location = -1; + unsigned expl_location = 0; if (layout && layout->flags.q.explicit_location) { if (!process_qualifier_constant(state, , "location", layout->location, _location)) { @@ -6763,7 +6764,7 @@ ast_interface_block::hir(exec_list *instructions, return NULL; } - unsigned expl_location = -1; + unsigned expl_location = 0; if (layout.flags.q.explicit_location) { if (!process_qualifier_constant(state, , "location", layout.location, _location)) { -- 2.4.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 93577] Total war: Attila crashes at startup
https://bugs.freedesktop.org/show_bug.cgi?id=93577 --- Comment #4 from Médéric Boquien--- Thanks for the investigation. Is there a way I can find the list of extensions they are using? That way I can keep an eye with upcoming Mesa versions and try again when everything is in place. -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/3] tgsi/scan: set which color components are read by a fragment shader
On 05.01.2016 20:46, Marek Olšák wrote: From: Marek OlšákThis will be used by radeonsi. --- src/gallium/auxiliary/tgsi/tgsi_scan.c | 30 ++ src/gallium/auxiliary/tgsi/tgsi_scan.h | 1 + 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index e3a6fb0..6ea32ee 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -187,14 +187,28 @@ tgsi_scan_shader(const struct tgsi_token *tokens, } if (procType == TGSI_PROCESSOR_FRAGMENT && - !src->Register.Indirect && - info->input_semantic_name[src->Register.Index] == - TGSI_SEMANTIC_POSITION && - (src->Register.SwizzleX == TGSI_SWIZZLE_Z || - src->Register.SwizzleY == TGSI_SWIZZLE_Z || - src->Register.SwizzleZ == TGSI_SWIZZLE_Z || - src->Register.SwizzleW == TGSI_SWIZZLE_Z)) { - info->reads_z = TRUE; + !src->Register.Indirect) { + unsigned name = +info->input_semantic_name[src->Register.Index]; + unsigned index = +info->input_semantic_index[src->Register.Index]; Move index down into the TGSI_SEMANTIC_COLOR branch? Either way, Reviewed-by: Nicolai Hähnle + + if (name == TGSI_SEMANTIC_POSITION && + (src->Register.SwizzleX == TGSI_SWIZZLE_Z || + src->Register.SwizzleY == TGSI_SWIZZLE_Z || + src->Register.SwizzleZ == TGSI_SWIZZLE_Z || + src->Register.SwizzleW == TGSI_SWIZZLE_Z)) +info->reads_z = TRUE; + + if (name == TGSI_SEMANTIC_COLOR) { +unsigned mask = + (1 << src->Register.SwizzleX) | + (1 << src->Register.SwizzleY) | + (1 << src->Register.SwizzleZ) | + (1 << src->Register.SwizzleW); + +info->colors_read |= mask << (index * 4); + } } } diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h index a3e4378..b0b423a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.h +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h @@ -77,6 +77,7 @@ struct tgsi_shader_info uint opcode_count[TGSI_OPCODE_LAST]; /**< opcode histogram */ + ubyte colors_read; /**< which color components are read by the FS */ ubyte colors_written; boolean reads_position; /**< does fragment shader read position? */ boolean reads_z; /**< does fragment shader read depth? */ ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/3] tgsi/scan: fix tgsi_shader_info::reads_z
Patches 1 & 2 are Reviewed-by: Nicolai HähnleOn 05.01.2016 20:46, Marek Olšák wrote: From: Marek Olšák This has no users in Mesa. --- src/gallium/auxiliary/tgsi/tgsi_scan.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index e3feed9..e3a6fb0 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -187,8 +187,9 @@ tgsi_scan_shader(const struct tgsi_token *tokens, } if (procType == TGSI_PROCESSOR_FRAGMENT && - info->reads_position && - src->Register.Index == 0 && + !src->Register.Indirect && + info->input_semantic_name[src->Register.Index] == + TGSI_SEMANTIC_POSITION && (src->Register.SwizzleX == TGSI_SWIZZLE_Z || src->Register.SwizzleY == TGSI_SWIZZLE_Z || src->Register.SwizzleZ == TGSI_SWIZZLE_Z || ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 93577] Total war: Attila crashes at startup
https://bugs.freedesktop.org/show_bug.cgi?id=93577 Jose Fonsecachanged: What|Removed |Added Status|NEW |RESOLVED Resolution|--- |NOTABUG --- Comment #3 from Jose Fonseca --- (In reply to Médéric Boquien from comment #0) > Note that the producers of Total War: Attila explicitly state that they only > support the NVidia binary driver at the exclusion of everything else, but > they do not state what's missing/buggy in the Mesa drivers. It seems they require at least OpenGL 4.3 core profile support. But given they make no attempt to check for errors it seems a lost cause. I wouldn't be surprised if they used proprietary NVIDIA extensions without checking. I don't think there's a bug here: if they specifically state they only support NVIDIA blob there's nothing we can do but short of emulation NVIDIA blob (extension etc.) -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 07/23] radeonsi: export "undef" values for undefined PS outputs
From: Marek Olšák--- src/gallium/drivers/radeonsi/si_shader.c | 19 ++- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index e08a076..73a34ac 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1323,11 +1323,12 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base, LLVMInt32TypeInContext(base->gallivm->context), pack_args, 2, LLVMReadNoneAttribute | LLVMNoUnwindAttribute); - args[chan + 7] = args[chan + 5] = + args[chan + 5] = LLVMBuildBitCast(base->gallivm->builder, packed, LLVMFloatTypeInContext(base->gallivm->context), ""); + args[chan + 7] = base->undef; } } else memcpy([5], values, sizeof(values[0]) * 4); @@ -2119,10 +2120,10 @@ static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base, args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRTZ); args[4] = uint->zero; /* COMP flag */ - args[5] = base->zero; /* R, depth */ - args[6] = base->zero; /* G, stencil test value[0:7], stencil op value[8:15] */ - args[7] = base->zero; /* B, sample mask */ - args[8] = base->zero; /* A, alpha to mask */ + args[5] = base->undef; /* R, depth */ + args[6] = base->undef; /* G, stencil test value[0:7], stencil op value[8:15] */ + args[7] = base->undef; /* B, sample mask */ + args[8] = base->undef; /* A, alpha to mask */ if (depth) { args[5] = depth; @@ -2173,10 +2174,10 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base) args[2] = uint->one; /* DONE bit */ args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRT); args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */ - args[5] = uint->zero; /* R */ - args[6] = uint->zero; /* G */ - args[7] = uint->zero; /* B */ - args[8] = uint->zero; /* A */ + args[5] = uint->undef; /* R */ + args[6] = uint->undef; /* G */ + args[7] = uint->undef; /* B */ + args[8] = uint->undef; /* A */ lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export", LLVMVoidTypeInContext(base->gallivm->context), -- 2.1.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 12/23] radeonsi: add struct si_shader_config
From: Marek OlšákThere will be 1 config per variant, which will be a union of configs from {prolog, main, epilog}. For now, just add the structure. --- src/gallium/drivers/radeonsi/si_compute.c | 24 ++-- src/gallium/drivers/radeonsi/si_shader.c| 31 +++ src/gallium/drivers/radeonsi/si_shader.h| 23 +++- src/gallium/drivers/radeonsi/si_state_draw.c| 4 +- src/gallium/drivers/radeonsi/si_state_shaders.c | 50 - 5 files changed, 68 insertions(+), 64 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 1c4d6b3..8edf4ad 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -68,7 +68,7 @@ static void init_scratch_buffer(struct si_context *sctx, struct si_compute *prog unsigned scratch_bytes_needed; si_shader_binary_read_config(>shader, offset); - scratch_bytes_needed = program->shader.scratch_bytes_per_wave; + scratch_bytes_needed = program->shader.config.scratch_bytes_per_wave; scratch_bytes = MAX2(scratch_bytes, scratch_bytes_needed); } @@ -86,7 +86,7 @@ static void init_scratch_buffer(struct si_context *sctx, struct si_compute *prog * to the maximum bytes needed, so it can compute the stride * correctly. */ - program->shader.scratch_bytes_per_wave = scratch_bytes; + program->shader.config.scratch_bytes_per_wave = scratch_bytes; /* Patch the shader with the scratch buffer address. */ si_shader_apply_scratch_relocs(sctx, @@ -281,12 +281,12 @@ static void si_launch_grid( memcpy(kernel_args + (num_work_size_bytes / 4), input, program->input_size); - if (shader->scratch_bytes_per_wave > 0) { + if (shader->config.scratch_bytes_per_wave > 0) { COMPUTE_DBG(sctx->screen, "Waves: %u; Scratch per wave: %u bytes; " "Total Scratch: %u bytes\n", num_waves_for_scratch, - shader->scratch_bytes_per_wave, - shader->scratch_bytes_per_wave * + shader->config.scratch_bytes_per_wave, + shader->config.scratch_bytes_per_wave * num_waves_for_scratch); radeon_add_to_buffer_list(>b, >b.gfx, @@ -313,7 +313,7 @@ static void si_launch_grid( si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 8, scratch_buffer_va); si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 12, S_008F04_BASE_ADDRESS_HI(scratch_buffer_va >> 32) - | S_008F04_STRIDE(shader->scratch_bytes_per_wave / 64)); + | S_008F04_STRIDE(shader->config.scratch_bytes_per_wave / 64)); si_pm4_set_reg(pm4, R_00B810_COMPUTE_START_X, 0); si_pm4_set_reg(pm4, R_00B814_COMPUTE_START_Y, 0); @@ -361,9 +361,9 @@ static void si_launch_grid( si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, shader_va >> 8); si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, shader_va >> 40); - si_pm4_set_reg(pm4, R_00B848_COMPUTE_PGM_RSRC1, shader->rsrc1); + si_pm4_set_reg(pm4, R_00B848_COMPUTE_PGM_RSRC1, shader->config.rsrc1); - lds_blocks = shader->lds_size; + lds_blocks = shader->config.lds_size; /* XXX: We are over allocating LDS. For SI, the shader reports LDS in * blocks of 256 bytes, so if there are 4 bytes lds allocated in * the shader and 4 bytes allocated by the state tracker, then @@ -377,10 +377,10 @@ static void si_launch_grid( assert(lds_blocks <= 0xFF); - shader->rsrc2 &= C_00B84C_LDS_SIZE; - shader->rsrc2 |= S_00B84C_LDS_SIZE(lds_blocks); + shader->config.rsrc2 &= C_00B84C_LDS_SIZE; + shader->config.rsrc2 |= S_00B84C_LDS_SIZE(lds_blocks); - si_pm4_set_reg(pm4, R_00B84C_COMPUTE_PGM_RSRC2, shader->rsrc2); + si_pm4_set_reg(pm4, R_00B84C_COMPUTE_PGM_RSRC2, shader->config.rsrc2); si_pm4_set_reg(pm4, R_00B854_COMPUTE_RESOURCE_LIMITS, 0); si_pm4_set_reg(pm4, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, @@ -402,7 +402,7 @@ static void si_launch_grid( * COMPUTE_PGM_RSRC2.SCRATCH_EN is enabled. */ S_00B860_WAVES(num_waves_for_scratch) - | S_00B860_WAVESIZE(shader->scratch_bytes_per_wave >> 10)) + | S_00B860_WAVESIZE(shader->config.scratch_bytes_per_wave >> 10)) ; si_pm4_cmd_begin(pm4, PKT3_DISPATCH_DIRECT); diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index f6a5051..c468ee3 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -3728,25 +3728,25 @@ void si_shader_binary_read_config(struct si_shader *shader,
[Mesa-dev] [PATCH 15/23] radeonsi: always keep shader code, rodata, and relocs in memory
From: Marek OlšákWe won't compile shaders in draw calls, but we will concatenate shader binaries according to states in draw calls, so keep the binaries. --- src/gallium/drivers/radeonsi/si_shader.c | 10 +++--- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index de117d9..abc1652 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -3908,14 +3908,9 @@ int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader, return r; FREE(shader->binary.config); - FREE(shader->binary.rodata); FREE(shader->binary.global_symbol_offsets); - if (shader->config.scratch_bytes_per_wave == 0) { - FREE(shader->binary.code); - FREE(shader->binary.relocs); - memset(>binary, 0, - offsetof(struct radeon_shader_binary, disasm_string)); - } + shader->binary.config = NULL; + shader->binary.global_symbol_offsets = NULL; return r; } @@ -4228,6 +4223,7 @@ void si_shader_destroy(struct si_shader *shader) r600_resource_reference(>bo, NULL); FREE(shader->binary.code); + FREE(shader->binary.rodata); FREE(shader->binary.relocs); FREE(shader->binary.disasm_string); } -- 2.1.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 06/23] radeonsi: move MRTZ export into a separate function
From: Marek Olšák--- src/gallium/drivers/radeonsi/si_shader.c | 113 +-- 1 file changed, 62 insertions(+), 51 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 8441fb4..e08a076 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2100,6 +2100,59 @@ static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context * bld_base) FREE(outputs); } +static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base, + LLVMValueRef depth, LLVMValueRef stencil, + LLVMValueRef samplemask) +{ + struct si_screen *sscreen = si_shader_context(bld_base)->screen; + struct lp_build_context *base = _base->base; + struct lp_build_context *uint = _base->uint_bld; + LLVMValueRef args[9]; + unsigned mask = 0; + + assert(depth || stencil || samplemask); + + args[1] = uint->one; /* whether the EXEC mask is valid */ + args[2] = uint->one; /* DONE bit */ + + /* Specify the target we are exporting */ + args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRTZ); + + args[4] = uint->zero; /* COMP flag */ + args[5] = base->zero; /* R, depth */ + args[6] = base->zero; /* G, stencil test value[0:7], stencil op value[8:15] */ + args[7] = base->zero; /* B, sample mask */ + args[8] = base->zero; /* A, alpha to mask */ + + if (depth) { + args[5] = depth; + mask |= 0x1; + } + + if (stencil) { + args[6] = stencil; + mask |= 0x2; + } + + if (samplemask) { + args[7] = samplemask; + mask |= 0x4; + } + + /* SI (except OLAND) has a bug that it only looks +* at the X writemask component. */ + if (sscreen->b.chip_class == SI && + sscreen->b.family != CHIP_OLAND) + mask |= 0x1; + + /* Specify which components to enable */ + args[0] = lp_build_const_int32(base->gallivm, mask); + + lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export", + LLVMVoidTypeInContext(base->gallivm->context), + args, 9, 0); +} + static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base) { struct si_shader_context * si_shader_ctx = si_shader_context(bld_base); @@ -2109,7 +2162,7 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base) struct tgsi_shader_info *info = >selector->info; LLVMBuilderRef builder = base->gallivm->builder; LLVMValueRef args[9]; - int depth_index = -1, stencil_index = -1, samplemask_index = -1; + LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL; int last_color_export = -1; int i; @@ -2148,13 +2201,16 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base) /* Select the correct target */ switch (semantic_name) { case TGSI_SEMANTIC_POSITION: - depth_index = i; + depth = LLVMBuildLoad(builder, + si_shader_ctx->radeon_bld.soa.outputs[i][2], ""); continue; case TGSI_SEMANTIC_STENCIL: - stencil_index = i; + stencil = LLVMBuildLoad(builder, + si_shader_ctx->radeon_bld.soa.outputs[i][1], ""); continue; case TGSI_SEMANTIC_SAMPLEMASK: - samplemask_index = i; + samplemask = LLVMBuildLoad(builder, + si_shader_ctx->radeon_bld.soa.outputs[i][0], ""); continue; case TGSI_SEMANTIC_COLOR: target = V_008DFC_SQ_EXP_MRT + semantic_index; @@ -2214,53 +2270,8 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base) args, 9, 0); } - if (depth_index >= 0 || stencil_index >= 0 || samplemask_index >= 0) { - LLVMValueRef out_ptr; - unsigned mask = 0; - - args[1] = uint->one; /* whether the EXEC mask is valid */ - args[2] = uint->one; /* DONE bit */ - - /* Specify the target we are exporting */ - args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRTZ); - - args[4] = uint->zero; /* COMP flag */ - args[5] = base->zero; /* R, depth */ - args[6] = base->zero; /* G, stencil test value[0:7], stencil op value[8:15] */ - args[7] = base->zero; /* B, sample mask */ - args[8] = base->zero; /*
[Mesa-dev] [PATCH 18/23] radeonsi: add si_shader_destroy_binary
From: Marek Olšák--- src/gallium/drivers/radeonsi/si_shader.c | 14 +- src/gallium/drivers/radeonsi/si_shader.h | 1 + 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 770f5b7..0773fff 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -4219,6 +4219,14 @@ out: return r; } +void si_shader_destroy_binary(struct radeon_shader_binary *binary) +{ + FREE(binary->code); + FREE(binary->rodata); + FREE(binary->relocs); + FREE(binary->disasm_string); +} + void si_shader_destroy(struct si_shader *shader) { if (shader->gs_copy_shader) { @@ -4230,9 +4238,5 @@ void si_shader_destroy(struct si_shader *shader) r600_resource_reference(>scratch_bo, NULL); r600_resource_reference(>bo, NULL); - - FREE(shader->binary.code); - FREE(shader->binary.rodata); - FREE(shader->binary.relocs); - FREE(shader->binary.disasm_string); + si_shader_destroy_binary(>binary); } diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 2220fc7..780383c 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -341,6 +341,7 @@ int si_compile_llvm(struct si_screen *sscreen, struct pipe_debug_callback *debug, unsigned processor); void si_shader_destroy(struct si_shader *shader); +void si_shader_destroy_binary(struct radeon_shader_binary *binary); unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index); int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader); void si_shader_binary_read(struct si_screen *sscreen, -- 2.1.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 11/23] radeonsi: move NULL exporting into a separate function
From: Marek Olšák--- src/gallium/drivers/radeonsi/si_shader.c | 37 +++- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index f60b560..f6a5051 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2186,34 +2186,41 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base, args, 9, 0); } +static void si_export_null(struct lp_build_tgsi_context *bld_base) +{ + struct lp_build_context *base = _base->base; + struct lp_build_context *uint = _base->uint_bld; + LLVMValueRef args[9]; + + args[0] = lp_build_const_int32(base->gallivm, 0x0); /* enabled channels */ + args[1] = uint->one; /* whether the EXEC mask is valid */ + args[2] = uint->one; /* DONE bit */ + args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_NULL); + args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */ + args[5] = uint->undef; /* R */ + args[6] = uint->undef; /* G */ + args[7] = uint->undef; /* B */ + args[8] = uint->undef; /* A */ + + lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export", + LLVMVoidTypeInContext(base->gallivm->context), + args, 9, 0); +} + static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base) { struct si_shader_context * si_shader_ctx = si_shader_context(bld_base); struct si_shader * shader = si_shader_ctx->shader; struct lp_build_context * base = _base->base; - struct lp_build_context * uint = _base->uint_bld; struct tgsi_shader_info *info = >selector->info; LLVMBuilderRef builder = base->gallivm->builder; - LLVMValueRef args[9]; LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL; int last_color_export = -1; int i; /* If there are no outputs, add a dummy export. */ if (!info->num_outputs) { - args[0] = lp_build_const_int32(base->gallivm, 0x0); /* enabled channels */ - args[1] = uint->one; /* whether the EXEC mask is valid */ - args[2] = uint->one; /* DONE bit */ - args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_NULL); - args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */ - args[5] = uint->undef; /* R */ - args[6] = uint->undef; /* G */ - args[7] = uint->undef; /* B */ - args[8] = uint->undef; /* A */ - - lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export", - LLVMVoidTypeInContext(base->gallivm->context), - args, 9, 0); + si_export_null(bld_base); return; } -- 2.1.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 16/23] radeonsi: move si_shader_binary_upload out of si_compile_llvm
From: Marek Olšák--- src/gallium/drivers/radeonsi/si_compute.c | 1 + src/gallium/drivers/radeonsi/si_shader.c | 12 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index a543c55..aedea8e 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -124,6 +124,7 @@ static void *si_create_compute_state( code, header->num_bytes); si_compile_llvm(sctx->screen, >kernels[i], sctx->tm, mod, >b.debug, TGSI_PROCESSOR_COMPUTE); + si_shader_binary_upload(sctx->screen, >kernels[i]); LLVMDisposeModule(mod); } } diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index abc1652..91473a7 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -3903,10 +3903,6 @@ int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader, si_shader_binary_read(sscreen, >binary, >config, debug, processor); - r = si_shader_binary_upload(sscreen, shader); - if (r) - return r; - FREE(shader->binary.config); FREE(shader->binary.global_symbol_offsets); shader->binary.config = NULL; @@ -3987,6 +3983,8 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen, r = si_compile_llvm(sscreen, si_shader_ctx->shader, si_shader_ctx->tm, bld_base->base.gallivm->module, debug, TGSI_PROCESSOR_GEOMETRY); + if (!r) + r = si_shader_binary_upload(sscreen, si_shader_ctx->shader); radeon_llvm_dispose(_shader_ctx->radeon_bld); @@ -4187,6 +4185,12 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, goto out; } + r = si_shader_binary_upload(sscreen, shader); + if (r) { + fprintf(stderr, "LLVM failed to upload shader\n"); + goto out; + } + radeon_llvm_dispose(_shader_ctx.radeon_bld); if (si_shader_ctx.type == TGSI_PROCESSOR_GEOMETRY) { -- 2.1.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 09/23] radeonsi: use EXP_NULL for pixel shaders without outputs
From: Marek OlšákThis never happens currently. --- src/gallium/drivers/radeonsi/si_shader.c| 2 +- src/gallium/drivers/radeonsi/si_state_shaders.c | 7 ++- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 13e5140..4204db0 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2153,7 +2153,7 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base) args[0] = lp_build_const_int32(base->gallivm, 0x0); /* enabled channels */ args[1] = uint->one; /* whether the EXEC mask is valid */ args[2] = uint->one; /* DONE bit */ - args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRT); + args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_NULL); args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */ args[5] = uint->undef; /* R */ args[6] = uint->undef; /* G */ diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 68ba7ec..af21f3e 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -428,11 +428,8 @@ static void si_shader_ps(struct si_shader *shader) colors_written = info->colors_written; export_16bpc = shader->key.ps.export_16bpc; - if (!info->num_outputs) { - colors_written = 0x1; /* dummy export */ - export_16bpc = 0; - } else if (info->colors_written == 0x1 && - info->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]) { + if (info->colors_written == 0x1 && + info->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]) { colors_written |= (1 << (shader->key.ps.last_cbuf + 1)) - 1; } -- 2.1.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 13/23] radeonsi: don't pass si_shader to si_shader_binary_read_config
From: Marek Olšák--- src/gallium/drivers/radeonsi/si_compute.c | 5 +++-- src/gallium/drivers/radeonsi/si_shader.c | 28 ++-- src/gallium/drivers/radeonsi/si_shader.h | 3 ++- 3 files changed, 19 insertions(+), 17 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 8edf4ad..7aedd39 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -67,7 +67,8 @@ static void init_scratch_buffer(struct si_context *sctx, struct si_compute *prog program->shader.binary.global_symbol_offsets[i]; unsigned scratch_bytes_needed; - si_shader_binary_read_config(>shader, offset); + si_shader_binary_read_config(>shader.binary, +>shader.config, offset); scratch_bytes_needed = program->shader.config.scratch_bytes_per_wave; scratch_bytes = MAX2(scratch_bytes, scratch_bytes_needed); } @@ -260,7 +261,7 @@ static void si_launch_grid( #if HAVE_LLVM >= 0x0306 /* Read the config information */ - si_shader_binary_read_config(shader, pc); + si_shader_binary_read_config(>binary, >config, pc); #endif /* Upload the kernel arguments */ diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index c468ee3..f9e61a2 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -3708,19 +3708,19 @@ static void preload_ring_buffers(struct si_shader_context *si_shader_ctx) } } -void si_shader_binary_read_config(struct si_shader *shader, +void si_shader_binary_read_config(struct radeon_shader_binary *binary, + struct si_shader_config *conf, unsigned symbol_offset) { unsigned i; const unsigned char *config = - radeon_shader_binary_config_start(>binary, - symbol_offset); + radeon_shader_binary_config_start(binary, symbol_offset); /* XXX: We may be able to emit some of these values directly rather than * extracting fields to be emitted later. */ - for (i = 0; i < shader->binary.config_size_per_symbol; i+= 8) { + for (i = 0; i < binary->config_size_per_symbol; i+= 8) { unsigned reg = util_le32_to_cpu(*(uint32_t*)(config + i)); unsigned value = util_le32_to_cpu(*(uint32_t*)(config + i + 4)); switch (reg) { @@ -3728,25 +3728,25 @@ void si_shader_binary_read_config(struct si_shader *shader, case R_00B128_SPI_SHADER_PGM_RSRC1_VS: case R_00B228_SPI_SHADER_PGM_RSRC1_GS: case R_00B848_COMPUTE_PGM_RSRC1: - shader->config.num_sgprs = MAX2(shader->config.num_sgprs, (G_00B028_SGPRS(value) + 1) * 8); - shader->config.num_vgprs = MAX2(shader->config.num_vgprs, (G_00B028_VGPRS(value) + 1) * 4); - shader->config.float_mode = G_00B028_FLOAT_MODE(value); - shader->config.rsrc1 = value; + conf->num_sgprs = MAX2(conf->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8); + conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4); + conf->float_mode = G_00B028_FLOAT_MODE(value); + conf->rsrc1 = value; break; case R_00B02C_SPI_SHADER_PGM_RSRC2_PS: - shader->config.lds_size = MAX2(shader->config.lds_size, G_00B02C_EXTRA_LDS_SIZE(value)); + conf->lds_size = MAX2(conf->lds_size, G_00B02C_EXTRA_LDS_SIZE(value)); break; case R_00B84C_COMPUTE_PGM_RSRC2: - shader->config.lds_size = MAX2(shader->config.lds_size, G_00B84C_LDS_SIZE(value)); - shader->config.rsrc2 = value; + conf->lds_size = MAX2(conf->lds_size, G_00B84C_LDS_SIZE(value)); + conf->rsrc2 = value; break; case R_0286CC_SPI_PS_INPUT_ENA: - shader->config.spi_ps_input_ena = value; + conf->spi_ps_input_ena = value; break; case R_0286E8_SPI_TMPRING_SIZE: case R_00B860_COMPUTE_TMPRING_SIZE: /* WAVESIZE is in units of 256 dwords. */ - shader->config.scratch_bytes_per_wave = + conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(value) * 256 * 4 * 1; break; default: @@ -3858,7 +3858,7 @@ void
[Mesa-dev] [PATCH 08/23] radeonsi: only use LLVMBuildLoad once when updating color outputs at the end
From: Marek Olšákwithout LLVMBuildStore. So: - do LLVMBuildLoad - update the values as necessary - export --- src/gallium/drivers/radeonsi/si_shader.c | 67 ++-- 1 file changed, 20 insertions(+), 47 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 73a34ac..13e5140 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1334,24 +1334,8 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base, memcpy([5], values, sizeof(values[0]) * 4); } -/* Load from output pointers and initialize arguments for the shader export intrinsic */ -static void si_llvm_init_export_args_load(struct lp_build_tgsi_context *bld_base, - LLVMValueRef *out_ptr, - unsigned target, - LLVMValueRef *args) -{ - struct gallivm_state *gallivm = bld_base->base.gallivm; - LLVMValueRef values[4]; - int i; - - for (i = 0; i < 4; i++) - values[i] = LLVMBuildLoad(gallivm->builder, out_ptr[i], ""); - - si_llvm_init_export_args(bld_base, values, target, args); -} - static void si_alpha_test(struct lp_build_tgsi_context *bld_base, - LLVMValueRef alpha_ptr) + LLVMValueRef alpha) { struct si_shader_context *si_shader_ctx = si_shader_context(bld_base); struct gallivm_state *gallivm = bld_base->base.gallivm; @@ -1363,8 +1347,7 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base, LLVMValueRef alpha_pass = lp_build_cmp(_base->base, si_shader_ctx->shader->key.ps.alpha_func, -LLVMBuildLoad(gallivm->builder, alpha_ptr, ""), -alpha_ref); +alpha, alpha_ref); LLVMValueRef arg = lp_build_select(_base->base, alpha_pass, @@ -1383,12 +1366,12 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base, } } -static void si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *bld_base, - LLVMValueRef alpha_ptr) +static LLVMValueRef si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *bld_base, + LLVMValueRef alpha) { struct si_shader_context *si_shader_ctx = si_shader_context(bld_base); struct gallivm_state *gallivm = bld_base->base.gallivm; - LLVMValueRef coverage, alpha; + LLVMValueRef coverage; /* alpha = alpha * popcount(coverage) / SI_NUM_SMOOTH_AA_SAMPLES */ coverage = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, @@ -1406,9 +1389,7 @@ static void si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *bld_base lp_build_const_float(gallivm, 1.0 / SI_NUM_SMOOTH_AA_SAMPLES), ""); - alpha = LLVMBuildLoad(gallivm->builder, alpha_ptr, ""); - alpha = LLVMBuildFMul(gallivm->builder, alpha, coverage, ""); - LLVMBuildStore(gallivm->builder, alpha, alpha_ptr); + return LLVMBuildFMul(gallivm->builder, alpha, coverage, ""); } static void si_llvm_emit_clipvertex(struct lp_build_tgsi_context * bld_base, @@ -2196,8 +2177,8 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base) for (i = 0; i < info->num_outputs; i++) { unsigned semantic_name = info->output_semantic_name[i]; unsigned semantic_index = info->output_semantic_index[i]; - unsigned target; - LLVMValueRef alpha_ptr; + unsigned target, j; + LLVMValueRef color[4] = {}; /* Select the correct target */ switch (semantic_name) { @@ -2215,29 +2196,24 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base) continue; case TGSI_SEMANTIC_COLOR: target = V_008DFC_SQ_EXP_MRT + semantic_index; - alpha_ptr = si_shader_ctx->radeon_bld.soa.outputs[i][3]; - if (si_shader_ctx->shader->key.ps.clamp_color) { - for (int j = 0; j < 4; j++) { - LLVMValueRef ptr = si_shader_ctx->radeon_bld.soa.outputs[i][j]; - LLVMValueRef result = LLVMBuildLoad(builder, ptr, ""); + for (j = 0; j < 4; j++) + color[j] = LLVMBuildLoad(builder, +
[Mesa-dev] [PATCH 10/23] radeonsi: move MRT color exporting into a separate function
From: Marek OlšákThis will be used by a fragment shader epilog. --- src/gallium/drivers/radeonsi/si_shader.c | 93 +++- 1 file changed, 55 insertions(+), 38 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 4204db0..f60b560 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2135,6 +2135,57 @@ static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base, args, 9, 0); } +static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base, + LLVMValueRef *color, unsigned index, + bool is_last) +{ + struct si_shader_context *si_shader_ctx = si_shader_context(bld_base); + struct lp_build_context *base = _base->base; + LLVMValueRef args[9]; + int i; + + /* Clamp color */ + if (si_shader_ctx->shader->key.ps.clamp_color) + for (i = 0; i < 4; i++) + color[i] = radeon_llvm_saturate(bld_base, color[i]); + + /* Alpha to one */ + if (si_shader_ctx->shader->key.ps.alpha_to_one) + color[3] = base->one; + + /* Alpha test */ + if (index == 0 && + si_shader_ctx->shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS) + si_alpha_test(bld_base, color[3]); + + /* Line & polygon smoothing */ + if (si_shader_ctx->shader->key.ps.poly_line_smoothing) + color[3] = si_scale_alpha_by_sample_mask(bld_base, color[3]); + + /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */ + if (index == 0 && + si_shader_ctx->shader->key.ps.last_cbuf > 0) { + for (int c = 1; c <= si_shader_ctx->shader->key.ps.last_cbuf; c++) { + si_llvm_init_export_args(bld_base, color, +V_008DFC_SQ_EXP_MRT + c, args); + lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export", + LLVMVoidTypeInContext(base->gallivm->context), + args, 9, 0); + } + } + + /* Export */ + si_llvm_init_export_args(bld_base, color, V_008DFC_SQ_EXP_MRT + index, +args); + if (is_last) { + args[1] = bld_base->uint_bld.one; /* whether the EXEC mask is valid */ + args[2] = bld_base->uint_bld.one; /* DONE bit */ + } + lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export", + LLVMVoidTypeInContext(base->gallivm->context), + args, 9, 0); +} + static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base) { struct si_shader_context * si_shader_ctx = si_shader_context(bld_base); @@ -2177,7 +2228,7 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base) for (i = 0; i < info->num_outputs; i++) { unsigned semantic_name = info->output_semantic_name[i]; unsigned semantic_index = info->output_semantic_index[i]; - unsigned target, j; + unsigned j; LLVMValueRef color[4] = {}; /* Select the correct target */ @@ -2195,53 +2246,19 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base) si_shader_ctx->radeon_bld.soa.outputs[i][0], ""); continue; case TGSI_SEMANTIC_COLOR: - target = V_008DFC_SQ_EXP_MRT + semantic_index; - for (j = 0; j < 4; j++) color[j] = LLVMBuildLoad(builder, si_shader_ctx->radeon_bld.soa.outputs[i][j], ""); - if (si_shader_ctx->shader->key.ps.clamp_color) - for (j = 0; j < 4; j++) - color[j] = radeon_llvm_saturate(bld_base, color[j]); - - if (si_shader_ctx->shader->key.ps.alpha_to_one) - color[3] = base->one; - - if (semantic_index == 0 && - si_shader_ctx->shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS) - si_alpha_test(bld_base, color[3]); - - if (si_shader_ctx->shader->key.ps.poly_line_smoothing) - color[3] = si_scale_alpha_by_sample_mask(bld_base, color[3]); - break; + si_export_mrt_color(bld_base, color, semantic_index, + last_color_export == i); + continue; default:
[Mesa-dev] [PATCH 00/23] RadeonSI: Restructuring shader code generation part 2
Hi, These boring patches focus on restructuring pixel shader output handling and code around si_compile_llvm (config, dumping, etc.). They are mostly code movements and dividing functions into smaller ones, so that they can be re-used by pixel shader epilog compilation code. Please review. Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 04/23] radeonsi: set SPI color formats and CB_SHADER_MASK outside of compilation
From: Marek Olšák--- src/gallium/drivers/radeonsi/si_shader.c| 12 +- src/gallium/drivers/radeonsi/si_shader.h| 2 -- src/gallium/drivers/radeonsi/si_state_shaders.c | 30 ++--- 3 files changed, 28 insertions(+), 16 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index f322c4e..85113c0 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1302,18 +1302,8 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base, if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) { int cbuf = target - V_008DFC_SQ_EXP_MRT; - if (cbuf >= 0 && cbuf < 8) { + if (cbuf >= 0 && cbuf < 8) compressed = (si_shader_ctx->shader->key.ps.export_16bpc >> cbuf) & 0x1; - - if (compressed) - si_shader_ctx->shader->spi_shader_col_format |= - V_028714_SPI_SHADER_FP16_ABGR << (4 * cbuf); - else - si_shader_ctx->shader->spi_shader_col_format |= - V_028714_SPI_SHADER_32_ABGR << (4 * cbuf); - - si_shader_ctx->shader->cb_shader_mask |= 0xf << (4 * cbuf); - } } /* Set COMPR flag */ diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 3d14c79..b89d3b2 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -277,8 +277,6 @@ struct si_shader { unsignedspi_ps_input_ena; unsignedfloat_mode; unsignedscratch_bytes_per_wave; - unsignedspi_shader_col_format; - unsignedcb_shader_mask; union si_shader_key key; unsignednparam; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 4b007ec..b08b035f 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -387,6 +387,8 @@ static void si_shader_ps(struct si_shader *shader) struct tgsi_shader_info *info = >selector->info; struct si_pm4_state *pm4; unsigned i, spi_ps_in_control; + unsigned spi_shader_col_format = 0, cb_shader_mask = 0; + unsigned colors_written, export_16bpc; unsigned num_sgprs, num_user_sgprs; unsigned spi_baryc_cntl = 0; uint64_t va; @@ -422,12 +424,35 @@ static void si_shader_ps(struct si_shader *shader) } } + /* Find out what SPI_SHADER_COL_FORMAT and CB_SHADER_MASK should be. */ + colors_written = info->colors_written; + export_16bpc = shader->key.ps.export_16bpc; + + if (info->colors_written == 0x0) { + colors_written = 0x1; /* dummy export */ + export_16bpc = 0; + } else if (info->colors_written == 0x1 && + info->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]) { + colors_written |= (1 << (shader->key.ps.last_cbuf + 1)) - 1; + } + + while (colors_written) { + i = u_bit_scan(_written); + if (export_16bpc & (1 << i)) + spi_shader_col_format |= V_028714_SPI_SHADER_FP16_ABGR << (4 * i); + else + spi_shader_col_format |= V_028714_SPI_SHADER_32_ABGR << (4 * i); + cb_shader_mask |= 0xf << (4 * i); + } + + /* Set interpolation controls. */ has_centroid = G_0286CC_PERSP_CENTROID_ENA(shader->spi_ps_input_ena) || G_0286CC_LINEAR_CENTROID_ENA(shader->spi_ps_input_ena); spi_ps_in_control = S_0286D8_NUM_INTERP(shader->nparam) | S_0286D8_BC_OPTIMIZE_DISABLE(has_centroid); + /* Set registers. */ si_pm4_set_reg(pm4, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl); si_pm4_set_reg(pm4, R_0286D8_SPI_PS_IN_CONTROL, spi_ps_in_control); @@ -437,9 +462,8 @@ static void si_shader_ps(struct si_shader *shader) info->writes_z ? V_028710_SPI_SHADER_32_R : V_028710_SPI_SHADER_ZERO); - si_pm4_set_reg(pm4, R_028714_SPI_SHADER_COL_FORMAT, - shader->spi_shader_col_format); - si_pm4_set_reg(pm4, R_02823C_CB_SHADER_MASK, shader->cb_shader_mask); + si_pm4_set_reg(pm4, R_028714_SPI_SHADER_COL_FORMAT, spi_shader_col_format); + si_pm4_set_reg(pm4, R_02823C_CB_SHADER_MASK, cb_shader_mask); va = shader->bo->gpu_address; si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER); -- 2.1.4
[Mesa-dev] [PATCH 01/23] radeonsi: determine DB_SHADER_CONTROL outside of shader compilation
From: Marek Olšákbecause the API pixel shader binary will not emulate alpha test one day, so the KILL_ENABLE bit must be determined elsewhere. --- src/gallium/drivers/radeonsi/si_shader.c| 20 src/gallium/drivers/radeonsi/si_shader.h| 5 ++- src/gallium/drivers/radeonsi/si_state_shaders.c | 43 + 3 files changed, 40 insertions(+), 28 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 426f40f..4b49f9d 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1390,8 +1390,6 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base, LLVMVoidTypeInContext(gallivm->context), NULL, 0, 0); } - - si_shader_ctx->shader->db_shader_control |= S_02880C_KILL_ENABLE(1); } static void si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *bld_base, @@ -2229,22 +2227,18 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base) out_ptr = si_shader_ctx->radeon_bld.soa.outputs[depth_index][2]; args[5] = LLVMBuildLoad(base->gallivm->builder, out_ptr, ""); mask |= 0x1; - si_shader_ctx->shader->db_shader_control |= S_02880C_Z_EXPORT_ENABLE(1); } if (stencil_index >= 0) { out_ptr = si_shader_ctx->radeon_bld.soa.outputs[stencil_index][1]; args[6] = LLVMBuildLoad(base->gallivm->builder, out_ptr, ""); mask |= 0x2; - si_shader_ctx->shader->db_shader_control |= - S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(1); } if (samplemask_index >= 0) { out_ptr = si_shader_ctx->radeon_bld.soa.outputs[samplemask_index][0]; args[7] = LLVMBuildLoad(base->gallivm->builder, out_ptr, ""); mask |= 0x4; - si_shader_ctx->shader->db_shader_control |= S_02880C_MASK_EXPORT_ENABLE(1); } /* SI (except OLAND) has a bug that it only looks @@ -4113,9 +4107,6 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, if (sel->type != PIPE_SHADER_COMPUTE) shader->dx10_clamp_mode = true; - if (sel->info.uses_kill) - shader->db_shader_control |= S_02880C_KILL_ENABLE(1); - shader->uses_instanceid = sel->info.uses_instanceid; bld_base->info = poly_stipple ? _shader_info : >info; bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant; @@ -4190,17 +4181,6 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, case TGSI_PROCESSOR_FRAGMENT: si_shader_ctx.radeon_bld.load_input = declare_input_fs; bld_base->emit_epilogue = si_llvm_emit_fs_epilogue; - - switch (sel->info.properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT]) { - case TGSI_FS_DEPTH_LAYOUT_GREATER: - shader->db_shader_control |= - S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_GREATER_THAN_Z); - break; - case TGSI_FS_DEPTH_LAYOUT_LESS: - shader->db_shader_control |= - S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_LESS_THAN_Z); - break; - } break; default: assert(!"Unsupported shader type"); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index d377a2a..067704f 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -201,6 +201,7 @@ struct si_shader_selector { boolforces_persample_interp_for_persp; boolforces_persample_interp_for_linear; + /* GS parameters. */ unsignedesgs_itemsize; unsignedgs_input_verts_per_prim; unsignedgs_output_prim; @@ -210,6 +211,9 @@ struct si_shader_selector { unsignedgsvs_vertex_size; unsignedmax_gsvs_emit_size; + /* PS parameters. */ + unsigneddb_shader_control; + /* masks of "get_unique_index" bits */ uint64_toutputs_written; uint32_tpatch_outputs_written; @@ -275,7 +279,6 @@ struct si_shader { unsignedscratch_bytes_per_wave; unsignedspi_shader_col_format; unsignedspi_shader_z_format; - unsigneddb_shader_control; unsignedcb_shader_mask;
[Mesa-dev] [PATCH 02/23] radeonsi: determine SPI_SHADER_Z_FORMAT outside of shader compilation
From: Marek Olšák--- src/gallium/drivers/radeonsi/si_shader.c| 7 --- src/gallium/drivers/radeonsi/si_shader.h| 1 - src/gallium/drivers/radeonsi/si_state_shaders.c | 7 ++- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 4b49f9d..b7c44b9 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2247,13 +2247,6 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base) si_shader_ctx->screen->b.family != CHIP_OLAND) mask |= 0x1; - if (samplemask_index >= 0) - si_shader_ctx->shader->spi_shader_z_format = V_028710_SPI_SHADER_32_ABGR; - else if (stencil_index >= 0) - si_shader_ctx->shader->spi_shader_z_format = V_028710_SPI_SHADER_32_GR; - else - si_shader_ctx->shader->spi_shader_z_format = V_028710_SPI_SHADER_32_R; - /* Specify which components to enable */ args[0] = lp_build_const_int32(base->gallivm, mask); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 067704f..3d14c79 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -278,7 +278,6 @@ struct si_shader { unsignedfloat_mode; unsignedscratch_bytes_per_wave; unsignedspi_shader_col_format; - unsignedspi_shader_z_format; unsignedcb_shader_mask; union si_shader_key key; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 41e331b..61db8ef 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -431,7 +431,12 @@ static void si_shader_ps(struct si_shader *shader) si_pm4_set_reg(pm4, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl); si_pm4_set_reg(pm4, R_0286D8_SPI_PS_IN_CONTROL, spi_ps_in_control); - si_pm4_set_reg(pm4, R_028710_SPI_SHADER_Z_FORMAT, shader->spi_shader_z_format); + si_pm4_set_reg(pm4, R_028710_SPI_SHADER_Z_FORMAT, + info->writes_samplemask ? V_028710_SPI_SHADER_32_ABGR : + info->writes_stencil ? V_028710_SPI_SHADER_32_GR : + info->writes_z ? V_028710_SPI_SHADER_32_R : + V_028710_SPI_SHADER_ZERO); + si_pm4_set_reg(pm4, R_028714_SPI_SHADER_COL_FORMAT, shader->spi_shader_col_format); si_pm4_set_reg(pm4, R_02823C_CB_SHADER_MASK, shader->cb_shader_mask); -- 2.1.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 03/23] radeonsi: write all MRTs only if there is exactly one output
From: Marek OlšákThis doesn't fix a known bug, but better safe than sorry. Also, simplify the expression in si_shader.c. --- src/gallium/drivers/radeonsi/si_shader.c| 5 ++--- src/gallium/drivers/radeonsi/si_state_shaders.c | 4 +++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index b7c44b9..f322c4e 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2189,9 +2189,8 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base) /* This instruction will be emitted at the end of the shader. */ memcpy(last_args, args, sizeof(args)); - /* Handle FS_COLOR0_WRITES_ALL_CBUFS. */ - if (shader->selector->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] && - semantic_index == 0 && + /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */ + if (semantic_index == 0 && si_shader_ctx->shader->key.ps.last_cbuf > 0) { for (int c = 1; c <= si_shader_ctx->shader->key.ps.last_cbuf; c++) { si_llvm_init_export_args_load(bld_base, diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 61db8ef..4b007ec 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -552,8 +552,10 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, case PIPE_SHADER_FRAGMENT: { struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; - if (sel->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]) + if (sel->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] && + sel->info.colors_written == 0x1) key->ps.last_cbuf = MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1; + key->ps.export_16bpc = sctx->framebuffer.export_16bpc; if (rs) { -- 2.1.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 93577] Total war: Attila crashes at startup
https://bugs.freedesktop.org/show_bug.cgi?id=93577 --- Comment #2 from Jose Fonseca--- (In reply to Michel Dänzer from comment #1) > Reassigning to Mesa core since it happens with the i965 driver as well, but > it looks like it might be a game bug. > > BTW, an apitrace is only useful if replaying it reproduces the problem. Actually the apitrace shows the problem: 2347 glXCreateContextAttribsARB(dpy = 0x6c00ec0, config = 0x6d081a0, share_context = NULL, direct = True, attrib_list = {GLX_CONTEXT_MAJOR_VERSION_ARB, 4, GLX_CONTEXT_MINOR_VERSION_ARB, 3, GLX_CONTEXT_PROFILE_MASK_ARB, GLX_CONTEXT_CORE_PROFILE_BIT_ARB, 0}) = NULL 2348 glXMakeCurrent(dpy = 0x6c00ec0, drawable = 0, ctx = NULL) = True 2350 glGenTextures(n = 1, textures = &0) ... 2595 glMapBufferRange(target = GL_PIXEL_PACK_BUFFER, offset = 0, length = 0, access = GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_FLUSH_EXPLICIT_BIT) = NULL The game requests a 4.3 context, it doesn't get one, but happily proceeds using a null context as if nothing happened ... until it gets a NULL glMapBufferRange and segfaults. glretrace skips gl calls with a NULL context (because on Windows the OPENGL32.DLL silently drops them so quite a few Windows apps inadvertebntly do it when shutting, so glretrace needs to ignore when replaying on Linux to prevent crashes) -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 14/23] radeonsi: don't pass si_shader to si_shader_binary_read
From: Marek Olšák--- src/gallium/drivers/radeonsi/si_compute.c | 3 ++- src/gallium/drivers/radeonsi/si_shader.c | 23 --- src/gallium/drivers/radeonsi/si_shader.h | 7 +-- 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 7aedd39..a543c55 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -136,7 +136,8 @@ static void *si_create_compute_state( * the shader code to the GPU. */ init_scratch_buffer(sctx, program); - si_shader_binary_read(sctx->screen, >shader, >b.debug, + si_shader_binary_read(sctx->screen, >shader.binary, + >shader.config, >b.debug, TGSI_PROCESSOR_COMPUTE); si_shader_binary_upload(sctx->screen, >shader); diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index f9e61a2..de117d9 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -3853,12 +3853,13 @@ static void si_shader_dump_disassembly(const struct radeon_shader_binary *binary } } -void si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader, - struct pipe_debug_callback *debug, unsigned processor) +void si_shader_binary_read(struct si_screen *sscreen, + struct radeon_shader_binary *binary, + struct si_shader_config *conf, + struct pipe_debug_callback *debug, + unsigned processor) { - const struct radeon_shader_binary *binary = >binary; - - si_shader_binary_read_config(>binary, >config, 0); + si_shader_binary_read_config(binary, conf, 0); if (r600_can_dump_shader(>b, processor)) { if (!(sscreen->b.debug_flags & DBG_NO_ASM)) @@ -3867,15 +3868,14 @@ void si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader, fprintf(stderr, "*** SHADER STATS ***\n" "SGPRS: %d\nVGPRS: %d\nCode Size: %d bytes\nLDS: %d blocks\n" "Scratch: %d bytes per wave\n\n", - shader->config.num_sgprs, shader->config.num_vgprs, binary->code_size, - shader->config.lds_size, shader->config.scratch_bytes_per_wave); + conf->num_sgprs, conf->num_vgprs, binary->code_size, + conf->lds_size, conf->scratch_bytes_per_wave); } pipe_debug_message(debug, SHADER_INFO, "Shader Stats: SGPRS: %d VGPRS: %d Code Size: %d LDS: %d Scratch: %d", - shader->config.num_sgprs, shader->config.num_vgprs, - binary->code_size, shader->config.lds_size, - shader->config.scratch_bytes_per_wave); + conf->num_sgprs, conf->num_vgprs, binary->code_size, + conf->lds_size, conf->scratch_bytes_per_wave); } int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader, @@ -3900,7 +3900,8 @@ int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader, return r; } - si_shader_binary_read(sscreen, shader, debug, processor); + si_shader_binary_read(sscreen, >binary, >config, + debug, processor); r = si_shader_binary_upload(sscreen, shader); if (r) diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 93d5af6..b0abacc 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -339,8 +339,11 @@ int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader, void si_shader_destroy(struct si_shader *shader); unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index); int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader); -void si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader, - struct pipe_debug_callback *debug, unsigned processor); +void si_shader_binary_read(struct si_screen *sscreen, + struct radeon_shader_binary *binary, + struct si_shader_config *conf, + struct pipe_debug_callback *debug, + unsigned processor); void si_shader_apply_scratch_relocs(struct si_context *sctx, struct si_shader *shader, uint64_t scratch_va); -- 2.1.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 22/23] radeonsi: move si_shader_dump call out of si_compile_llvm
From: Marek Olšák--- src/gallium/drivers/radeonsi/si_compute.c | 3 +++ src/gallium/drivers/radeonsi/si_shader.c | 10 -- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 2380242..ffac656 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -125,6 +125,9 @@ static void *si_create_compute_state( si_compile_llvm(sctx->screen, >kernels[i].binary, >kernels[i].config, sctx->tm, mod, >b.debug, TGSI_PROCESSOR_COMPUTE); + si_shader_dump(sctx->screen, >kernels[i].binary, + >kernels[i].config, + >b.debug, TGSI_PROCESSOR_COMPUTE); si_shader_binary_upload(sctx->screen, >kernels[i]); LLVMDisposeModule(mod); } diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index fea5b14..58d16cf 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -3913,7 +3913,6 @@ int si_compile_llvm(struct si_screen *sscreen, } si_shader_binary_read_config(binary, conf, 0); - si_shader_dump(sscreen, binary, conf, debug, processor); FREE(binary->config); FREE(binary->global_symbol_offsets); @@ -3996,8 +3995,12 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen, _shader_ctx->shader->config, si_shader_ctx->tm, bld_base->base.gallivm->module, debug, TGSI_PROCESSOR_GEOMETRY); - if (!r) + if (!r) { + si_shader_dump(sscreen, _shader_ctx->shader->binary, + _shader_ctx->shader->config, debug, + TGSI_PROCESSOR_GEOMETRY); r = si_shader_binary_upload(sscreen, si_shader_ctx->shader); + } radeon_llvm_dispose(_shader_ctx->radeon_bld); @@ -4199,6 +4202,9 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, goto out; } + si_shader_dump(sscreen, >binary, >config, + debug, si_shader_ctx.type); + r = si_shader_binary_upload(sscreen, shader); if (r) { fprintf(stderr, "LLVM failed to upload shader\n"); -- 2.1.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 17/23] radeonsi: don't pass si_shader to si_compile_llvm
From: Marek Olšák--- src/gallium/drivers/radeonsi/si_compute.c | 3 ++- src/gallium/drivers/radeonsi/si_shader.c | 33 ++- src/gallium/drivers/radeonsi/si_shader.h | 10 +++--- 3 files changed, 28 insertions(+), 18 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index aedea8e..3562bd8 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -122,7 +122,8 @@ static void *si_create_compute_state( for (i = 0; i < program->num_kernels; i++) { LLVMModuleRef mod = radeon_llvm_get_kernel_module(program->llvm_ctx, i, code, header->num_bytes); - si_compile_llvm(sctx->screen, >kernels[i], sctx->tm, + si_compile_llvm(sctx->screen, >kernels[i].binary, + >kernels[i].config, sctx->tm, mod, >b.debug, TGSI_PROCESSOR_COMPUTE); si_shader_binary_upload(sctx->screen, >kernels[i]); LLVMDisposeModule(mod); diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 91473a7..770f5b7 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -3878,9 +3878,13 @@ void si_shader_binary_read(struct si_screen *sscreen, conf->lds_size, conf->scratch_bytes_per_wave); } -int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader, - LLVMTargetMachineRef tm, LLVMModuleRef mod, - struct pipe_debug_callback *debug, unsigned processor) +int si_compile_llvm(struct si_screen *sscreen, + struct radeon_shader_binary *binary, + struct si_shader_config *conf, + LLVMTargetMachineRef tm, + LLVMModuleRef mod, + struct pipe_debug_callback *debug, + unsigned processor) { int r = 0; unsigned count = p_atomic_inc_return(>b.num_compilations); @@ -3892,21 +3896,20 @@ int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader, LLVMDumpModule(mod); } - if (!si_replace_shader(count, >binary)) { - r = radeon_llvm_compile(mod, >binary, + if (!si_replace_shader(count, binary)) { + r = radeon_llvm_compile(mod, binary, r600_get_llvm_processor_name(sscreen->b.family), tm, debug); if (r) return r; } - si_shader_binary_read(sscreen, >binary, >config, - debug, processor); + si_shader_binary_read(sscreen, binary, conf, debug, processor); - FREE(shader->binary.config); - FREE(shader->binary.global_symbol_offsets); - shader->binary.config = NULL; - shader->binary.global_symbol_offsets = NULL; + FREE(binary->config); + FREE(binary->global_symbol_offsets); + binary->config = NULL; + binary->global_symbol_offsets = NULL; return r; } @@ -3980,8 +3983,9 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen, if (dump) fprintf(stderr, "Copy Vertex Shader for Geometry Shader:\n\n"); - r = si_compile_llvm(sscreen, si_shader_ctx->shader, - si_shader_ctx->tm, bld_base->base.gallivm->module, + r = si_compile_llvm(sscreen, _shader_ctx->shader->binary, + _shader_ctx->shader->config, si_shader_ctx->tm, + bld_base->base.gallivm->module, debug, TGSI_PROCESSOR_GEOMETRY); if (!r) r = si_shader_binary_upload(sscreen, si_shader_ctx->shader); @@ -4179,7 +4183,8 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, radeon_llvm_finalize_module(_shader_ctx.radeon_bld); mod = bld_base->base.gallivm->module; - r = si_compile_llvm(sscreen, shader, tm, mod, debug, si_shader_ctx.type); + r = si_compile_llvm(sscreen, >binary, >config, tm, + mod, debug, si_shader_ctx.type); if (r) { fprintf(stderr, "LLVM failed to compile shader\n"); goto out; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index b0abacc..2220fc7 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -333,9 +333,13 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, struct si_shader *shader, struct pipe_debug_callback *debug); void si_dump_shader_key(unsigned shader,
[Mesa-dev] [PATCH 23/23] radeonsi: adjust the parameters of si_shader_dump
From: Marek OlšákThe function will be extended to dump all binaries shaders will consist of, so si_shader* makes sense here. --- src/gallium/drivers/radeonsi/si_compute.c | 6 ++ src/gallium/drivers/radeonsi/si_shader.c | 18 +++--- src/gallium/drivers/radeonsi/si_shader.h | 7 ++- 3 files changed, 11 insertions(+), 20 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index ffac656..5a08cbf 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -125,8 +125,7 @@ static void *si_create_compute_state( si_compile_llvm(sctx->screen, >kernels[i].binary, >kernels[i].config, sctx->tm, mod, >b.debug, TGSI_PROCESSOR_COMPUTE); - si_shader_dump(sctx->screen, >kernels[i].binary, - >kernels[i].config, + si_shader_dump(sctx->screen, >kernels[i], >b.debug, TGSI_PROCESSOR_COMPUTE); si_shader_binary_upload(sctx->screen, >kernels[i]); LLVMDisposeModule(mod); @@ -143,8 +142,7 @@ static void *si_create_compute_state( init_scratch_buffer(sctx, program); si_shader_binary_read_config(>shader.binary, >shader.config, 0); - si_shader_dump(sctx->screen, >shader.binary, - >shader.config, >b.debug, + si_shader_dump(sctx->screen, >shader, >b.debug, TGSI_PROCESSOR_COMPUTE); si_shader_binary_upload(sctx->screen, >shader); diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 58d16cf..b1a9a1e 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -3873,17 +3873,15 @@ static void si_shader_dump_stats(struct si_screen *sscreen, conf->lds_size, conf->scratch_bytes_per_wave); } -void si_shader_dump(struct si_screen *sscreen, - struct radeon_shader_binary *binary, - struct si_shader_config *conf, - struct pipe_debug_callback *debug, - unsigned processor) +void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader, + struct pipe_debug_callback *debug, unsigned processor) { if (r600_can_dump_shader(>b, processor)) if (!(sscreen->b.debug_flags & DBG_NO_ASM)) - si_shader_dump_disassembly(binary, debug); + si_shader_dump_disassembly(>binary, debug); - si_shader_dump_stats(sscreen, conf, binary->code_size, debug, processor); + si_shader_dump_stats(sscreen, >config, +shader->binary.code_size, debug, processor); } int si_compile_llvm(struct si_screen *sscreen, @@ -3996,8 +3994,7 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen, bld_base->base.gallivm->module, debug, TGSI_PROCESSOR_GEOMETRY); if (!r) { - si_shader_dump(sscreen, _shader_ctx->shader->binary, - _shader_ctx->shader->config, debug, + si_shader_dump(sscreen, si_shader_ctx->shader, debug, TGSI_PROCESSOR_GEOMETRY); r = si_shader_binary_upload(sscreen, si_shader_ctx->shader); } @@ -4202,8 +4199,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, goto out; } - si_shader_dump(sscreen, >binary, >config, - debug, si_shader_ctx.type); + si_shader_dump(sscreen, shader, debug, si_shader_ctx.type); r = si_shader_binary_upload(sscreen, shader); if (r) { diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 712bcd9..1635358 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -344,11 +344,8 @@ void si_shader_destroy(struct si_shader *shader); void si_shader_destroy_binary(struct radeon_shader_binary *binary); unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index); int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader); -void si_shader_dump(struct si_screen *sscreen, - struct radeon_shader_binary *binary, - struct si_shader_config *conf, - struct pipe_debug_callback *debug, - unsigned processor); +void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader, + struct pipe_debug_callback *debug, unsigned processor); void si_shader_apply_scratch_relocs(struct si_context
[Mesa-dev] [PATCH 21/23] radeonsi: inline si_shader_binary_read
From: Marek Olšák--- src/gallium/drivers/radeonsi/si_compute.c | 4 ++-- src/gallium/drivers/radeonsi/si_shader.c | 8 +--- src/gallium/drivers/radeonsi/si_shader.h | 2 -- 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index ffa941b..2380242 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -138,8 +138,8 @@ static void *si_create_compute_state( * the shader code to the GPU. */ init_scratch_buffer(sctx, program); - si_shader_binary_read(>shader.binary, - >shader.config); + si_shader_binary_read_config(>shader.binary, +>shader.config, 0); si_shader_dump(sctx->screen, >shader.binary, >shader.config, >b.debug, TGSI_PROCESSOR_COMPUTE); diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 1f334af..fea5b14 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -3886,12 +3886,6 @@ void si_shader_dump(struct si_screen *sscreen, si_shader_dump_stats(sscreen, conf, binary->code_size, debug, processor); } -void si_shader_binary_read(struct radeon_shader_binary *binary, - struct si_shader_config *conf) -{ - si_shader_binary_read_config(binary, conf, 0); -} - int si_compile_llvm(struct si_screen *sscreen, struct radeon_shader_binary *binary, struct si_shader_config *conf, @@ -3918,7 +3912,7 @@ int si_compile_llvm(struct si_screen *sscreen, return r; } - si_shader_binary_read(binary, conf); + si_shader_binary_read_config(binary, conf, 0); si_shader_dump(sscreen, binary, conf, debug, processor); FREE(binary->config); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 51dfcd0..712bcd9 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -344,8 +344,6 @@ void si_shader_destroy(struct si_shader *shader); void si_shader_destroy_binary(struct radeon_shader_binary *binary); unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index); int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader); -void si_shader_binary_read(struct radeon_shader_binary *binary, - struct si_shader_config *conf); void si_shader_dump(struct si_screen *sscreen, struct radeon_shader_binary *binary, struct si_shader_config *conf, -- 2.1.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 05/23] radeonsi: simplify setting the DONE bit for PS exports
From: Marek OlšákFirst find out what the last export is and simply set the DONE bit there. --- src/gallium/drivers/radeonsi/si_shader.c| 126 ++-- src/gallium/drivers/radeonsi/si_state_shaders.c | 2 +- 2 files changed, 55 insertions(+), 73 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 85113c0..8441fb4 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2109,10 +2109,36 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base) struct tgsi_shader_info *info = >selector->info; LLVMBuilderRef builder = base->gallivm->builder; LLVMValueRef args[9]; - LLVMValueRef last_args[9] = { 0 }; int depth_index = -1, stencil_index = -1, samplemask_index = -1; + int last_color_export = -1; int i; + /* If there are no outputs, add a dummy export. */ + if (!info->num_outputs) { + args[0] = lp_build_const_int32(base->gallivm, 0x0); /* enabled channels */ + args[1] = uint->one; /* whether the EXEC mask is valid */ + args[2] = uint->one; /* DONE bit */ + args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRT); + args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */ + args[5] = uint->zero; /* R */ + args[6] = uint->zero; /* G */ + args[7] = uint->zero; /* B */ + args[8] = uint->zero; /* A */ + + lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export", + LLVMVoidTypeInContext(base->gallivm->context), + args, 9, 0); + return; + } + + /* Determine the last export. If MRTZ is present, it's always last. +* Otherwise, find the last color export. +*/ + if (!info->writes_z && !info->writes_stencil && !info->writes_samplemask) + for (i = 0; i < info->num_outputs; i++) + if (info->output_semantic_name[i] == TGSI_SEMANTIC_COLOR) + last_color_export = i; + for (i = 0; i < info->num_outputs; i++) { unsigned semantic_name = info->output_semantic_name[i]; unsigned semantic_index = info->output_semantic_index[i]; @@ -2157,56 +2183,48 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base) break; default: - target = 0; fprintf(stderr, "Warning: SI unhandled fs output type:%d\n", semantic_name); + continue; } - si_llvm_init_export_args_load(bld_base, - si_shader_ctx->radeon_bld.soa.outputs[i], - target, args); - - if (semantic_name == TGSI_SEMANTIC_COLOR) { - /* If there is an export instruction waiting to be emitted, do so now. */ - if (last_args[0]) { - lp_build_intrinsic(base->gallivm->builder, - "llvm.SI.export", + /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */ + if (semantic_index == 0 && + si_shader_ctx->shader->key.ps.last_cbuf > 0) { + for (int c = 1; c <= si_shader_ctx->shader->key.ps.last_cbuf; c++) { + si_llvm_init_export_args_load(bld_base, + si_shader_ctx->radeon_bld.soa.outputs[i], + V_008DFC_SQ_EXP_MRT + c, args); + lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export", LLVMVoidTypeInContext(base->gallivm->context), - last_args, 9, 0); + args, 9, 0); } + } - /* This instruction will be emitted at the end of the shader. */ - memcpy(last_args, args, sizeof(args)); - - /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */ - if (semantic_index == 0 && - si_shader_ctx->shader->key.ps.last_cbuf > 0) { - for (int c = 1; c <= si_shader_ctx->shader->key.ps.last_cbuf; c++) { - si_llvm_init_export_args_load(bld_base, -
[Mesa-dev] [PATCH 20/23] radeonsi: move si_shader_dump call out of si_shader_binary_read
From: Marek Olšák--- src/gallium/drivers/radeonsi/si_compute.c | 8 +--- src/gallium/drivers/radeonsi/si_shader.c | 21 + src/gallium/drivers/radeonsi/si_shader.h | 12 +++- 3 files changed, 21 insertions(+), 20 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 3562bd8..ffa941b 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -138,9 +138,11 @@ static void *si_create_compute_state( * the shader code to the GPU. */ init_scratch_buffer(sctx, program); - si_shader_binary_read(sctx->screen, >shader.binary, - >shader.config, >b.debug, - TGSI_PROCESSOR_COMPUTE); + si_shader_binary_read(>shader.binary, + >shader.config); + si_shader_dump(sctx->screen, >shader.binary, + >shader.config, >b.debug, + TGSI_PROCESSOR_COMPUTE); si_shader_binary_upload(sctx->screen, >shader); #endif diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 95cdf8a..1f334af 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -3873,11 +3873,11 @@ static void si_shader_dump_stats(struct si_screen *sscreen, conf->lds_size, conf->scratch_bytes_per_wave); } -static void si_shader_dump(struct si_screen *sscreen, - struct radeon_shader_binary *binary, - struct si_shader_config *conf, - struct pipe_debug_callback *debug, - unsigned processor) +void si_shader_dump(struct si_screen *sscreen, + struct radeon_shader_binary *binary, + struct si_shader_config *conf, + struct pipe_debug_callback *debug, + unsigned processor) { if (r600_can_dump_shader(>b, processor)) if (!(sscreen->b.debug_flags & DBG_NO_ASM)) @@ -3886,14 +3886,10 @@ static void si_shader_dump(struct si_screen *sscreen, si_shader_dump_stats(sscreen, conf, binary->code_size, debug, processor); } -void si_shader_binary_read(struct si_screen *sscreen, - struct radeon_shader_binary *binary, - struct si_shader_config *conf, - struct pipe_debug_callback *debug, - unsigned processor) +void si_shader_binary_read(struct radeon_shader_binary *binary, + struct si_shader_config *conf) { si_shader_binary_read_config(binary, conf, 0); - si_shader_dump(sscreen, binary, conf, debug, processor); } int si_compile_llvm(struct si_screen *sscreen, @@ -3922,7 +3918,8 @@ int si_compile_llvm(struct si_screen *sscreen, return r; } - si_shader_binary_read(sscreen, binary, conf, debug, processor); + si_shader_binary_read(binary, conf); + si_shader_dump(sscreen, binary, conf, debug, processor); FREE(binary->config); FREE(binary->global_symbol_offsets); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 780383c..51dfcd0 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -344,11 +344,13 @@ void si_shader_destroy(struct si_shader *shader); void si_shader_destroy_binary(struct radeon_shader_binary *binary); unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index); int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader); -void si_shader_binary_read(struct si_screen *sscreen, - struct radeon_shader_binary *binary, - struct si_shader_config *conf, - struct pipe_debug_callback *debug, - unsigned processor); +void si_shader_binary_read(struct radeon_shader_binary *binary, + struct si_shader_config *conf); +void si_shader_dump(struct si_screen *sscreen, + struct radeon_shader_binary *binary, + struct si_shader_config *conf, + struct pipe_debug_callback *debug, + unsigned processor); void si_shader_apply_scratch_relocs(struct si_context *sctx, struct si_shader *shader, uint64_t scratch_va); -- 2.1.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 19/23] radeonsi: separate shader dumping code to si_shader_dump and *_dump_stats
From: Marek OlšákEventually, I'd like to dump stats for several combined binaries, which is why you don't see a binary parameter in si_shader_dump_stats --- src/gallium/drivers/radeonsi/si_shader.c | 42 +++- 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 0773fff..95cdf8a 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -3853,31 +3853,49 @@ static void si_shader_dump_disassembly(const struct radeon_shader_binary *binary } } -void si_shader_binary_read(struct si_screen *sscreen, - struct radeon_shader_binary *binary, - struct si_shader_config *conf, - struct pipe_debug_callback *debug, - unsigned processor) +static void si_shader_dump_stats(struct si_screen *sscreen, +struct si_shader_config *conf, +unsigned code_size, +struct pipe_debug_callback *debug, +unsigned processor) { - si_shader_binary_read_config(binary, conf, 0); - if (r600_can_dump_shader(>b, processor)) { - if (!(sscreen->b.debug_flags & DBG_NO_ASM)) - si_shader_dump_disassembly(binary, debug); - fprintf(stderr, "*** SHADER STATS ***\n" "SGPRS: %d\nVGPRS: %d\nCode Size: %d bytes\nLDS: %d blocks\n" "Scratch: %d bytes per wave\n\n", - conf->num_sgprs, conf->num_vgprs, binary->code_size, + conf->num_sgprs, conf->num_vgprs, code_size, conf->lds_size, conf->scratch_bytes_per_wave); } pipe_debug_message(debug, SHADER_INFO, "Shader Stats: SGPRS: %d VGPRS: %d Code Size: %d LDS: %d Scratch: %d", - conf->num_sgprs, conf->num_vgprs, binary->code_size, + conf->num_sgprs, conf->num_vgprs, code_size, conf->lds_size, conf->scratch_bytes_per_wave); } +static void si_shader_dump(struct si_screen *sscreen, + struct radeon_shader_binary *binary, + struct si_shader_config *conf, + struct pipe_debug_callback *debug, + unsigned processor) +{ + if (r600_can_dump_shader(>b, processor)) + if (!(sscreen->b.debug_flags & DBG_NO_ASM)) + si_shader_dump_disassembly(binary, debug); + + si_shader_dump_stats(sscreen, conf, binary->code_size, debug, processor); +} + +void si_shader_binary_read(struct si_screen *sscreen, + struct radeon_shader_binary *binary, + struct si_shader_config *conf, + struct pipe_debug_callback *debug, + unsigned processor) +{ + si_shader_binary_read_config(binary, conf, 0); + si_shader_dump(sscreen, binary, conf, debug, processor); +} + int si_compile_llvm(struct si_screen *sscreen, struct radeon_shader_binary *binary, struct si_shader_config *conf, -- 2.1.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] llvmpipe: scale up bounding box planes to subpixel precision
Intricate stuff. As long as testing was successful, looks good to me. For both, Reviewed-by: Brian PaulOn 01/05/2016 05:06 PM, srol...@vmware.com wrote: From: Roland Scheidegger Otherwise some planes we get in rasterization have subpixel precision, others not. Doesn't matter so far, but will soon. (OpenGL actually supports viewports with subpixel accuracy, so could even do bounding box calcs with that). --- src/gallium/drivers/llvmpipe/lp_setup_line.c | 20 ++-- src/gallium/drivers/llvmpipe/lp_setup_point.c | 20 ++-- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 20 ++-- 3 files changed, 30 insertions(+), 30 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c index a0de599..f425825 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_line.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -713,24 +713,24 @@ try_setup_line( struct lp_setup_context *setup, const struct u_rect *scissor = >scissors[viewport_index]; - plane[4].dcdx = -1; + plane[4].dcdx = -1 << 8; plane[4].dcdy = 0; - plane[4].c = 1-scissor->x0; - plane[4].eo = 1; + plane[4].c = (1-scissor->x0) << 8; + plane[4].eo = 1 << 8; - plane[5].dcdx = 1; + plane[5].dcdx = 1 << 8; plane[5].dcdy = 0; - plane[5].c = scissor->x1+1; + plane[5].c = (scissor->x1+1) << 8; plane[5].eo = 0; plane[6].dcdx = 0; - plane[6].dcdy = 1; - plane[6].c = 1-scissor->y0; - plane[6].eo = 1; + plane[6].dcdy = 1 << 8; + plane[6].c = (1-scissor->y0) << 8; + plane[6].eo = 1 << 8; plane[7].dcdx = 0; - plane[7].dcdy = -1; - plane[7].c = scissor->y1+1; + plane[7].dcdy = -1 << 8; + plane[7].c = (scissor->y1+1) << 8; plane[7].eo = 0; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c index 14c389f..ddb6f0e 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_point.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c @@ -492,24 +492,24 @@ try_setup_point( struct lp_setup_context *setup, { struct lp_rast_plane *plane = GET_PLANES(point); - plane[0].dcdx = -1; + plane[0].dcdx = -1 << 8; plane[0].dcdy = 0; - plane[0].c = 1-bbox.x0; - plane[0].eo = 1; + plane[0].c = (1-bbox.x0) << 8; + plane[0].eo = 1 << 8; - plane[1].dcdx = 1; + plane[1].dcdx = 1 << 8; plane[1].dcdy = 0; - plane[1].c = bbox.x1+1; + plane[1].c = (bbox.x1+1) << 8; plane[1].eo = 0; plane[2].dcdx = 0; - plane[2].dcdy = 1; - plane[2].c = 1-bbox.y0; - plane[2].eo = 1; + plane[2].dcdy = 1 << 8; + plane[2].c = (1-bbox.y0) << 8; + plane[2].eo = 1 << 8; plane[3].dcdx = 0; - plane[3].dcdy = -1; - plane[3].c = bbox.y1+1; + plane[3].dcdy = -1 << 8; + plane[3].c = (bbox.y1+1) << 8; plane[3].eo = 0; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index fefd1c1..5ad4ac1 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -576,24 +576,24 @@ do_triangle_ccw(struct lp_setup_context *setup, if (nr_planes == 7) { const struct u_rect *scissor = >scissors[viewport_index]; - plane[3].dcdx = -1; + plane[3].dcdx = -1 << 8; plane[3].dcdy = 0; - plane[3].c = 1-scissor->x0; - plane[3].eo = 1; + plane[3].c = (1-scissor->x0) << 8; + plane[3].eo = 1 << 8; - plane[4].dcdx = 1; + plane[4].dcdx = 1 << 8; plane[4].dcdy = 0; - plane[4].c = scissor->x1+1; + plane[4].c = (scissor->x1+1) << 8; plane[4].eo = 0; plane[5].dcdx = 0; - plane[5].dcdy = 1; - plane[5].c = 1-scissor->y0; - plane[5].eo = 1; + plane[5].dcdy = 1 << 8; + plane[5].c = (1-scissor->y0) << 8; + plane[5].eo = 1 << 8; plane[6].dcdx = 0; - plane[6].dcdy = -1; - plane[6].c = scissor->y1+1; + plane[6].dcdy = -1 << 8; + plane[6].c = (scissor->y1+1) << 8; plane[6].eo = 0; } ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 77449] Tracker bug for all bugs related to Steam titles
https://bugs.freedesktop.org/show_bug.cgi?id=77449 Bug 77449 depends on bug 76664, which changed state. Bug 76664 Summary: Metro: Last Light segfaults very often in level 10 (swamp) on loading last checkpoint https://bugs.freedesktop.org/show_bug.cgi?id=76664 What|Removed |Added Status|ASSIGNED|RESOLVED Resolution|--- |WONTFIX -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] glsl: fix varying slot allocation for blocks and structs with explicit locations
On Wed, Jan 6, 2016 at 4:32 AM, Timothy Arceriwrote: > Previously each member was being counted as using a single slot, > count_attribute_slots() fixes the count for array and struct members. > > Also don't assign a negitive to the unsigned expl_location variable. > --- > > Fixes these new piglit tests: >http://patchwork.freedesktop.org/patch/69531/ > > src/glsl/ast_to_hir.cpp | 9 + > 1 file changed, 5 insertions(+), 4 deletions(-) > > diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp > index 0197cdc..50d5e22 100644 > --- a/src/glsl/ast_to_hir.cpp > +++ b/src/glsl/ast_to_hir.cpp > @@ -6408,12 +6408,13 @@ ast_process_struct_or_iface_block_members(exec_list > *instructions, > if (process_qualifier_constant(state, , "location", > qual->location, _location)) { > fields[i].location = VARYING_SLOT_VAR0 + qual_location; > - expl_location = fields[i].location + 1; > + expl_location = fields[i].location + > + fields[i].type->count_attribute_slots(false); > } > } else { > if (layout && layout->flags.q.explicit_location) { > fields[i].location = expl_location; > - expl_location = expl_location + 1; > + expl_location += fields[i].type->count_attribute_slots(false); > } else { > fields[i].location = -1; > } > @@ -6570,7 +6571,7 @@ ast_struct_specifier::hir(exec_list *instructions, > > state->struct_specifier_depth++; > > - unsigned expl_location = -1; > + unsigned expl_location = 0; > if (layout && layout->flags.q.explicit_location) { >if (!process_qualifier_constant(state, , "location", >layout->location, _location)) { > @@ -6763,7 +6764,7 @@ ast_interface_block::hir(exec_list *instructions, >return NULL; > } > > - unsigned expl_location = -1; > + unsigned expl_location = 0; There are a number of places that check for location != -1 as a sanity check... won't this defeat that? > if (layout.flags.q.explicit_location) { >if (!process_qualifier_constant(state, , "location", >layout.location, _location)) { > -- > 2.4.3 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 05/23] radeonsi: simplify setting the DONE bit for PS exports
Patches 1-5 are Reviewed-by: Nicolai HähnleOn 06.01.2016 07:41, Marek Olšák wrote: From: Marek Olšák First find out what the last export is and simply set the DONE bit there. --- src/gallium/drivers/radeonsi/si_shader.c| 126 ++-- src/gallium/drivers/radeonsi/si_state_shaders.c | 2 +- 2 files changed, 55 insertions(+), 73 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 85113c0..8441fb4 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2109,10 +2109,36 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base) struct tgsi_shader_info *info = >selector->info; LLVMBuilderRef builder = base->gallivm->builder; LLVMValueRef args[9]; - LLVMValueRef last_args[9] = { 0 }; int depth_index = -1, stencil_index = -1, samplemask_index = -1; + int last_color_export = -1; int i; + /* If there are no outputs, add a dummy export. */ + if (!info->num_outputs) { + args[0] = lp_build_const_int32(base->gallivm, 0x0); /* enabled channels */ + args[1] = uint->one; /* whether the EXEC mask is valid */ + args[2] = uint->one; /* DONE bit */ + args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRT); + args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */ + args[5] = uint->zero; /* R */ + args[6] = uint->zero; /* G */ + args[7] = uint->zero; /* B */ + args[8] = uint->zero; /* A */ + + lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export", + LLVMVoidTypeInContext(base->gallivm->context), + args, 9, 0); + return; + } + + /* Determine the last export. If MRTZ is present, it's always last. +* Otherwise, find the last color export. +*/ + if (!info->writes_z && !info->writes_stencil && !info->writes_samplemask) + for (i = 0; i < info->num_outputs; i++) + if (info->output_semantic_name[i] == TGSI_SEMANTIC_COLOR) + last_color_export = i; + for (i = 0; i < info->num_outputs; i++) { unsigned semantic_name = info->output_semantic_name[i]; unsigned semantic_index = info->output_semantic_index[i]; @@ -2157,56 +2183,48 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base) break; default: - target = 0; fprintf(stderr, "Warning: SI unhandled fs output type:%d\n", semantic_name); + continue; } - si_llvm_init_export_args_load(bld_base, - si_shader_ctx->radeon_bld.soa.outputs[i], - target, args); - - if (semantic_name == TGSI_SEMANTIC_COLOR) { - /* If there is an export instruction waiting to be emitted, do so now. */ - if (last_args[0]) { - lp_build_intrinsic(base->gallivm->builder, - "llvm.SI.export", + /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */ + if (semantic_index == 0 && + si_shader_ctx->shader->key.ps.last_cbuf > 0) { + for (int c = 1; c <= si_shader_ctx->shader->key.ps.last_cbuf; c++) { + si_llvm_init_export_args_load(bld_base, + si_shader_ctx->radeon_bld.soa.outputs[i], + V_008DFC_SQ_EXP_MRT + c, args); + lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export", LLVMVoidTypeInContext(base->gallivm->context), - last_args, 9, 0); + args, 9, 0); } + } - /* This instruction will be emitted at the end of the shader. */ - memcpy(last_args, args, sizeof(args)); - - /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */ - if (semantic_index == 0 && - si_shader_ctx->shader->key.ps.last_cbuf > 0) { - for (int c = 1; c <= si_shader_ctx->shader->key.ps.last_cbuf; c++) { -
Re: [Mesa-dev] [PATCH 2/2] llvmpipe: avoid most 64 bit math in rasterization
Am 06.01.2016 um 17:31 schrieb Jose Fonseca: > On 06/01/16 16:26, Jose Fonseca wrote: >> On 06/01/16 00:06, srol...@vmware.com wrote: >>> From: Roland Scheidegger>>> >>> The trick here is to recognize that in the c + n * dcdx calculations, >>> not only can the lower FIXED_ORDER bits not change (as the dcdx values >>> have those all zero) but that this means the sign bit of the >>> calculations >>> cannot be different as well, that is >>> sign(c + n*dcdx) == sign((c >> FIXED_ORDER) + n*(dcdx >> FIXED_ORDER)). >>> That shaves off more than enough bits to never require 64bit masks. >>> A shifted plane c value could still easily exceed 32 bits, however >>> since we >>> throw out planes which are trivial accept even before binning (and >>> similarly >>> don't even get to see tris for which there was a trivial reject >>> plane)) this >>> is never a problem. >>> The idea isnt't all that revolutionary, in fact something similar was >>> tried >>> ages ago (9773722c2b09d5f0615a47cecf4347859474dc56) back when the >>> values were >>> only 32 bit anyway. I believe now it didn't quite work then because the >>> adjustment needed for testing trivial reject / partial masks wasn't >>> handled >>> correctly. >>> This still keeps the separate 32/64 bit paths for now, as the 32 bit >>> one still >>> looks minimally simpler (and also because if we'd pass in dcdx/dcdy/eo >>> unscaled >>> from setup which would be a good reason to ditch the 32 bit path, we'd >>> need to >>> change the special-purpose rasterization functions for small tris). >>> >>> This passes piglit triangle-rasterization (-fbo -auto -max_size >>> -subpixelbits 8). It still fails triangle-rasterization-overdraw >>> -max_size >>> (no change, fails everything at position 2048 - interestingly for >>> softpipe, >>> nvidia maxwell 1 blob, and amd evergreen open-source drivers the test >>> fails >>> as well but at 4096 - seems like we're missing a float mantissa bit >>> somewhere!). >> >> I don't think that's how the test is supposed to be run. >> >> If you do an apitrace, you'll see the test creates a fbo with 1000x1000, >> a viewport with 16Kx16K, and does a readpixels of 4Kx4K... > > The problem is that the generic "-fbo" option is not useful for this, as > we can't reliably resize it after the fact. > > Take a look at tests/general/triangle-rasterization.cpp -- it has a > different option "-use_fbo" that creates its own fbo. OK I was running that the wrong way too I think. This one still passes with -max_size -use_fbo -subpixelbits 8 (takes _forever_ though - all due to convert_ubyte in readpixel path...) triangle-rasterization-overdraw with just -auto passes. The max_size parameter is a bit confusing since it won't do anything at all without -fbo as piglit_width/height will just get overwritten to window_width/height (and with fbo it will just fail badly). Increasing the window size manually to 8192/8192 won't really work neither as the size will be cut down to screen size. However, increasing this and then use -fbo actually does the right thing. And passes. Would be nice if piglit could pick up those size parameters _after_ piglit_init... Roland > Jose > > >> >> Jose >> >>> --- >>> src/gallium/drivers/llvmpipe/lp_rast_tri.c | 84 >>> +-- >>> src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h | 107 >>> + >>> 2 files changed, 133 insertions(+), 58 deletions(-) >>> >>> diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c >>> b/src/gallium/drivers/llvmpipe/lp_rast_tri.c >>> index c9b9221..a4dd6ef 100644 >>> --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c >>> +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c >>> @@ -64,43 +64,43 @@ block_full_16(struct lp_rasterizer_task *task, >>> } >>> >>> static inline unsigned >>> -build_mask_linear(int64_t c, int64_t dcdx, int64_t dcdy) >>> +build_mask_linear(int32_t c, int32_t dcdx, int32_t dcdy) >>> { >>> unsigned mask = 0; >>> >>> - int64_t c0 = c; >>> - int64_t c1 = c0 + dcdy; >>> - int64_t c2 = c1 + dcdy; >>> - int64_t c3 = c2 + dcdy; >>> - >>> - mask |= ((c0 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 0); >>> - mask |= ((c0 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 1); >>> - mask |= ((c0 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 2); >>> - mask |= ((c0 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 3); >>> - mask |= ((c1 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 4); >>> - mask |= ((c1 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 5); >>> - mask |= ((c1 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 6); >>> - mask |= ((c1 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 7); >>> - mask |= ((c2 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 8); >>> - mask |= ((c2 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 9); >>> - mask |= ((c2 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 10); >>> - mask |= ((c2 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 11); >>> - mask |= ((c3 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 12); >>> - mask |= ((c3 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 13); >>> - mask |= ((c3 + 2 * dcdx) >>
[Mesa-dev] [Bug 77449] Tracker bug for all bugs related to Steam titles
https://bugs.freedesktop.org/show_bug.cgi?id=77449 Ernst Sjöstrandchanged: What|Removed |Added CC||ern...@gmail.com -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 77449] Tracker bug for all bugs related to Steam titles
https://bugs.freedesktop.org/show_bug.cgi?id=77449 Ernst Sjöstrandchanged: What|Removed |Added Depends on||92944 -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 23/28] glsl: add pack varying to resource list for vertex input / fragment output
On 2015-12-29 06:00, Timothy Arceri wrote: > This is needed now that we pack these type of varyings when they have a > component layout qualifier. > --- > src/glsl/linker.cpp | 15 --- > 1 file changed, 8 insertions(+), 7 deletions(-) > > diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp > index 44dd7f0..52a326a 100644 > --- a/src/glsl/linker.cpp > +++ b/src/glsl/linker.cpp > @@ -3763,13 +3763,14 @@ build_program_resource_list(struct gl_shader_program > *shProg) > if (input_stage == MESA_SHADER_STAGES && output_stage == 0) >return; > > - /* Program interface needs to expose varyings in case of SSO. */ > - if (shProg->SeparateShader) { > - if (!add_packed_varyings(shProg, input_stage, GL_PROGRAM_INPUT)) > - return; > - if (!add_packed_varyings(shProg, output_stage, GL_PROGRAM_OUTPUT)) > - return; > - } > + /* Program interface needs to expose varyings in case of SSO, or in case > of > +* vertex inputs/fragement outputs that are packed unsing the component s/fragement/fragment s/unsing/using > +* layout qualifier. > +*/ > + if (!add_packed_varyings(shProg, input_stage, GL_PROGRAM_INPUT)) > + return; > + if (!add_packed_varyings(shProg, output_stage, GL_PROGRAM_OUTPUT)) > + return; > > if (!add_fragdata_arrays(shProg)) >return; ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/3] st/mesa: check state->mesa in early return check in st_validate_state()
We were checking the dirty->st flags but not the dirty->mesa flags. When we took the early return, we didn't clear the dirty->mesa flags so the next time we called st_validate_state() we'd often flush the glBitmap cache. And since st_validate_state() is called from st_Bitmap(), it meant we flushed the bitmap cache for every glBitmap() call. This change seems to recover most of the performance loss observed with the ipers demo on llvmpipe since commit commit 36c93a6fae27561. Cc: mesa-sta...@lists.freedesktop.org --- src/mesa/state_tracker/st_atom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/state_tracker/st_atom.c b/src/mesa/state_tracker/st_atom.c index 43dbadd..c1a9d00 100644 --- a/src/mesa/state_tracker/st_atom.c +++ b/src/mesa/state_tracker/st_atom.c @@ -188,7 +188,7 @@ void st_validate_state( struct st_context *st ) st_manager_validate_framebuffers(st); - if (state->st == 0) + if (state->st == 0 && state->mesa == 0) return; /*printf("%s %x/%x\n", __func__, state->mesa, state->st);*/ -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] gallium/r600: Replace ALIGN_DIVUP with DIV_ROUND_UP
Nicolai Hähnlewrites: > On 30.12.2015 13:44, Krzysztof A. Sobiecki wrote: >> Nicolai Hähnle writes: >> >>> On 30.12.2015 08:42, Krzysztof A. Sobiecki wrote: Nicolai Hähnle writes: > On 29.12.2015 14:27, Krzysztof A. Sobiecki wrote: >> From: Krzysztof Sobiecki >> >> ALIGN_DIVUP is a driver specific(r600g) macro that duplicates >> DIV_ROUND_UP functionality. >> Replacing it with DIV_ROUND_UP eliminates this problems. > > Those macros are actually slightly different, and the assembly > generated by the ALIGN_DIVUP looks clearly better to me. > > I remember seeing a very long thread about this not so long ago - what > was the resolution there? > > Cheers, > Nicolai > I would like to remove ALIGN_DIVUP first and then debate with implementation DIV_ROUND_UP should use. btw. I prefer 1 + ((x - 1) / y) >>> >>> That produces an incorrect result when x is an unsigned type and equal >>> to 0 -- and that is something that existing code definitely relies on. >>> >>> Cheers, >>> Nicolai >>> >> Then what about (x / y) + (i % y != 0) > > Generates similar assembly to the DIV_ROUND_UP version. > > Anyway, now that I look at it again I'd say just go ahead and add my > R-b. Yes, the assembly looks slightly worse, but only slightly, and > avoiding surprises with overflows down the line seems like a good > idea. > > Cheers, > Nicolai > I don't have commit access, can you push it, sorry. -- X was an interactive protocol: alpha blending a full-screen image looked like slugs racing down the monitor. http://www.keithp.com/~keithp/talks/usenix2000/render.html ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 00/23] RadeonSI: Restructuring shader code generation part 2
On Wed, Jan 06, 2016 at 01:41:22PM +0100, Marek Olšák wrote: > Hi, > > These boring patches focus on restructuring pixel shader output handling and > code around si_compile_llvm (config, dumping, etc.). They are mostly code > movements and dividing functions into smaller ones, so that they can be > re-used by pixel shader epilog compilation code. > > Please review. These all look OK to me. -Tom > > Marek > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/4] st/mesa: use GLbitfield in st_state_flags, add comments
Use GLbitfield instead of GLuint to be consistent with other variables. --- src/mesa/state_tracker/st_context.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index 35c8932..91b0f97 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -65,8 +65,8 @@ struct u_upload_mgr; struct st_state_flags { - GLuint mesa; - uint64_t st; + GLbitfield mesa; /**< Mask of _NEW_x flags */ + uint64_t st; /**< Mask of ST_NEW_x flags */ }; struct st_tracked_state { -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/4] s/GLuint/GLbitfield/ for st_invalidate_state() parameter
To match dd_function_table::UpdateState(). --- src/mesa/state_tracker/st_context.c | 2 +- src/mesa/state_tracker/st_context.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index 31cc99d..e12c166 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -99,7 +99,7 @@ static void st_Enable(struct gl_context * ctx, GLenum cap, GLboolean state) /** * Called via ctx->Driver.UpdateState() */ -void st_invalidate_state(struct gl_context * ctx, GLuint new_state) +void st_invalidate_state(struct gl_context * ctx, GLbitfield new_state) { struct st_context *st = st_context(ctx); diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index 276fa63..35c8932 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -252,7 +252,7 @@ struct st_framebuffer extern void st_init_driver_functions(struct pipe_screen *screen, struct dd_function_table *functions); -void st_invalidate_state(struct gl_context * ctx, GLuint new_state); +void st_invalidate_state(struct gl_context * ctx, GLbitfield new_state); -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/3] st/mesa: be more careful about state validation in st_Bitmap()
If the only dirty state is mesa's _NEW_PROGRAM_CONSTANTS flag, we can skip state validation before drawing a bitmap since that state doesn't effect bitmap rendering. This further increases the performance of the ipers demo on llvmpipe to about what it was before commit 36c93a6fae27561. --- src/mesa/state_tracker/st_cb_bitmap.c | 9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index c2cbcbd..191f144 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -622,7 +622,14 @@ st_Bitmap(struct gl_context *ctx, GLint x, GLint y, if (width == 0 || height == 0) return; - st_validate_state(st); + /* We only need to validate state of the st dirty flags are set or +* any non-_NEW_PROGRAM_CONSTANTS mesa flags are set. The VS we use +* for bitmap drawing uses no constants and the FS constants are +* explicitly uploaded in the draw_bitmap_quad() function. +*/ + if ((st->dirty.mesa & ~_NEW_PROGRAM_CONSTANTS) || st->dirty.st) { + st_validate_state(st); + } if (!st->bitmap.vs) { /* create pass-through vertex shader now */ -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] llvmpipe: avoid most 64 bit math in rasterization
On 06/01/16 16:26, Jose Fonseca wrote: On 06/01/16 00:06, srol...@vmware.com wrote: From: Roland ScheideggerThe trick here is to recognize that in the c + n * dcdx calculations, not only can the lower FIXED_ORDER bits not change (as the dcdx values have those all zero) but that this means the sign bit of the calculations cannot be different as well, that is sign(c + n*dcdx) == sign((c >> FIXED_ORDER) + n*(dcdx >> FIXED_ORDER)). That shaves off more than enough bits to never require 64bit masks. A shifted plane c value could still easily exceed 32 bits, however since we throw out planes which are trivial accept even before binning (and similarly don't even get to see tris for which there was a trivial reject plane)) this is never a problem. The idea isnt't all that revolutionary, in fact something similar was tried ages ago (9773722c2b09d5f0615a47cecf4347859474dc56) back when the values were only 32 bit anyway. I believe now it didn't quite work then because the adjustment needed for testing trivial reject / partial masks wasn't handled correctly. This still keeps the separate 32/64 bit paths for now, as the 32 bit one still looks minimally simpler (and also because if we'd pass in dcdx/dcdy/eo unscaled from setup which would be a good reason to ditch the 32 bit path, we'd need to change the special-purpose rasterization functions for small tris). This passes piglit triangle-rasterization (-fbo -auto -max_size -subpixelbits 8). It still fails triangle-rasterization-overdraw -max_size (no change, fails everything at position 2048 - interestingly for softpipe, nvidia maxwell 1 blob, and amd evergreen open-source drivers the test fails as well but at 4096 - seems like we're missing a float mantissa bit somewhere!). I don't think that's how the test is supposed to be run. If you do an apitrace, you'll see the test creates a fbo with 1000x1000, a viewport with 16Kx16K, and does a readpixels of 4Kx4K... The problem is that the generic "-fbo" option is not useful for this, as we can't reliably resize it after the fact. Take a look at tests/general/triangle-rasterization.cpp -- it has a different option "-use_fbo" that creates its own fbo. Jose Jose --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 84 +-- src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h | 107 + 2 files changed, 133 insertions(+), 58 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index c9b9221..a4dd6ef 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -64,43 +64,43 @@ block_full_16(struct lp_rasterizer_task *task, } static inline unsigned -build_mask_linear(int64_t c, int64_t dcdx, int64_t dcdy) +build_mask_linear(int32_t c, int32_t dcdx, int32_t dcdy) { unsigned mask = 0; - int64_t c0 = c; - int64_t c1 = c0 + dcdy; - int64_t c2 = c1 + dcdy; - int64_t c3 = c2 + dcdy; - - mask |= ((c0 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 0); - mask |= ((c0 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 1); - mask |= ((c0 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 2); - mask |= ((c0 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 3); - mask |= ((c1 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 4); - mask |= ((c1 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 5); - mask |= ((c1 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 6); - mask |= ((c1 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 7); - mask |= ((c2 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 8); - mask |= ((c2 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 9); - mask |= ((c2 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 10); - mask |= ((c2 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 11); - mask |= ((c3 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 12); - mask |= ((c3 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 13); - mask |= ((c3 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 14); - mask |= ((c3 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 15); + int32_t c0 = c; + int32_t c1 = c0 + dcdy; + int32_t c2 = c1 + dcdy; + int32_t c3 = c2 + dcdy; + + mask |= ((c0 + 0 * dcdx) >> 31) & (1 << 0); + mask |= ((c0 + 1 * dcdx) >> 31) & (1 << 1); + mask |= ((c0 + 2 * dcdx) >> 31) & (1 << 2); + mask |= ((c0 + 3 * dcdx) >> 31) & (1 << 3); + mask |= ((c1 + 0 * dcdx) >> 31) & (1 << 4); + mask |= ((c1 + 1 * dcdx) >> 31) & (1 << 5); + mask |= ((c1 + 2 * dcdx) >> 31) & (1 << 6); + mask |= ((c1 + 3 * dcdx) >> 31) & (1 << 7); + mask |= ((c2 + 0 * dcdx) >> 31) & (1 << 8); + mask |= ((c2 + 1 * dcdx) >> 31) & (1 << 9); + mask |= ((c2 + 2 * dcdx) >> 31) & (1 << 10); + mask |= ((c2 + 3 * dcdx) >> 31) & (1 << 11); + mask |= ((c3 + 0 * dcdx) >> 31) & (1 << 12); + mask |= ((c3 + 1 * dcdx) >> 31) & (1 << 13); + mask |= ((c3 + 2 * dcdx) >> 31) & (1 << 14); + mask |= ((c3 + 3 * dcdx) >> 31) & (1 << 15); return mask; } static inline void -build_masks(int64_t c, -int64_t cdiff, -int64_t dcdx, -int64_t dcdy, -unsigned *outmask, -
[Mesa-dev] [PATCH 2/3] st/mesa: move bitmap cache flushing out of state validation
Just do it where needed (before drawing, clearing, etc). --- src/mesa/state_tracker/st_atom.c | 4 src/mesa/state_tracker/st_cb_clear.c | 3 +++ src/mesa/state_tracker/st_cb_drawpixels.c | 5 + src/mesa/state_tracker/st_cb_drawtex.c| 3 +++ src/mesa/state_tracker/st_draw.c | 3 +++ src/mesa/state_tracker/st_draw_feedback.c | 3 +++ 6 files changed, 17 insertions(+), 4 deletions(-) diff --git a/src/mesa/state_tracker/st_atom.c b/src/mesa/state_tracker/st_atom.c index c1a9d00..337213c 100644 --- a/src/mesa/state_tracker/st_atom.c +++ b/src/mesa/state_tracker/st_atom.c @@ -33,7 +33,6 @@ #include "pipe/p_defines.h" #include "st_context.h" #include "st_atom.h" -#include "st_cb_bitmap.h" #include "st_program.h" #include "st_manager.h" @@ -181,9 +180,6 @@ void st_validate_state( struct st_context *st ) check_attrib_edgeflag(st); - if (state->mesa) - st_flush_bitmap_cache(st); - check_program_state( st ); st_manager_validate_framebuffers(st); diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c index e09f5ec..7b6d10e 100644 --- a/src/mesa/state_tracker/st_cb_clear.c +++ b/src/mesa/state_tracker/st_cb_clear.c @@ -41,6 +41,7 @@ #include "program/prog_instruction.h" #include "st_context.h" #include "st_atom.h" +#include "st_cb_bitmap.h" #include "st_cb_clear.h" #include "st_cb_fbo.h" #include "st_format.h" @@ -466,6 +467,8 @@ st_Clear(struct gl_context *ctx, GLbitfield mask) GLbitfield clear_buffers = 0x0; GLuint i; + st_flush_bitmap_cache(st); + /* This makes sure the pipe has the latest scissor, etc values */ st_validate_state( st ); diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 86e8a55..7ed52dd 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -50,6 +50,7 @@ #include "st_atom.h" #include "st_atom_constbuf.h" +#include "st_cb_bitmap.h" #include "st_cb_drawpixels.h" #include "st_cb_readpixels.h" #include "st_cb_fbo.h" @@ -1063,6 +1064,8 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, /* Mesa state should be up to date by now */ assert(ctx->NewState == 0x0); + st_flush_bitmap_cache(st); + st_validate_state(st); /* Limit the size of the glDrawPixels to the max texture size. @@ -1422,6 +1425,8 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy, GLint readX, readY, readW, readH; struct gl_pixelstore_attrib pack = ctx->DefaultPacking; + st_flush_bitmap_cache(st); + st_validate_state(st); if (type == GL_DEPTH_STENCIL) { diff --git a/src/mesa/state_tracker/st_cb_drawtex.c b/src/mesa/state_tracker/st_cb_drawtex.c index b3e4b5b..e6ab77f 100644 --- a/src/mesa/state_tracker/st_cb_drawtex.c +++ b/src/mesa/state_tracker/st_cb_drawtex.c @@ -21,6 +21,7 @@ #include "st_context.h" #include "st_atom.h" +#include "st_cb_bitmap.h" #include "st_cb_drawtex.h" #include "pipe/p_context.h" @@ -113,6 +114,8 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, struct pipe_vertex_element velements[2 + MAX_TEXTURE_UNITS]; unsigned offset; + st_flush_bitmap_cache(st); + st_validate_state(st); /* determine if we need vertex color */ diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 63b4622..d7a9716 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -48,6 +48,7 @@ #include "st_context.h" #include "st_atom.h" +#include "st_cb_bitmap.h" #include "st_cb_bufferobjects.h" #include "st_cb_xformfb.h" #include "st_debug.h" @@ -197,6 +198,8 @@ st_draw_vbo(struct gl_context *ctx, /* Mesa core state should have been validated already */ assert(ctx->NewState == 0x0); + st_flush_bitmap_cache(st); + /* Validate state. */ if (st->dirty.st || ctx->NewDriverState) { st_validate_state(st); diff --git a/src/mesa/state_tracker/st_draw_feedback.c b/src/mesa/state_tracker/st_draw_feedback.c index 88c10a8..b6e6dea 100644 --- a/src/mesa/state_tracker/st_draw_feedback.c +++ b/src/mesa/state_tracker/st_draw_feedback.c @@ -33,6 +33,7 @@ #include "st_context.h" #include "st_atom.h" +#include "st_cb_bitmap.h" #include "st_cb_bufferobjects.h" #include "st_draw.h" #include "st_program.h" @@ -137,6 +138,8 @@ st_feedback_draw_vbo(struct gl_context *ctx, assert(draw); + st_flush_bitmap_cache(st); + st_validate_state(st); if (!index_bounds_valid) -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/2] st/mesa: check texture target in allocate_full_mipmap()
Some kinds of textures never have mipmaps. 3D textures seldom have mipmaps. --- src/mesa/state_tracker/st_cb_texture.c | 14 ++ 1 file changed, 14 insertions(+) diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 867d4da..f8b3679 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -404,6 +404,16 @@ static boolean allocate_full_mipmap(const struct st_texture_object *stObj, const struct st_texture_image *stImage) { + switch (stObj->base.Target) { + case GL_TEXTURE_RECTANGLE_NV: + case GL_TEXTURE_BUFFER: + case GL_TEXTURE_EXTERNAL_OES: + case GL_TEXTURE_2D_MULTISAMPLE: + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: + /* these texture types cannot be mipmapped */ + return FALSE; + } + if (stImage->base.Level > 0 || stObj->base.GenerateMipmap) return TRUE; @@ -420,6 +430,10 @@ allocate_full_mipmap(const struct st_texture_object *stObj, /* not a mipmap minification filter */ return FALSE; + if (stObj->base.Target == GL_TEXTURE_3D) + /* 3D textures are seldom mipmapped */ + return FALSE; + return TRUE; } -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] st/mesa: move mipmap allocation check logic into a function
Better readability and easier to extend. --- src/mesa/state_tracker/st_cb_texture.c | 54 ++ 1 file changed, 42 insertions(+), 12 deletions(-) diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 62f149a..867d4da 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -388,6 +388,43 @@ guess_base_level_size(GLenum target, /** + * Try to determine whether we should allocate memory for a full texture + * mipmap. The problem is when we get a glTexImage(level=0) call, we + * can't immediately know if other mipmap levels are coming next. Here + * we try to guess whether to allocate memory for a mipmap or just the + * 0th level. + * + * If we guess incorrectly here we'll later reallocate the right amount of + * memory either in st_AllocTextureImageBuffer() or st_finalize_texture(). + * + * \param stObj the texture object we're going to allocate memory for. + * \param stImage describes the incoming image which we need to store. + */ +static boolean +allocate_full_mipmap(const struct st_texture_object *stObj, + const struct st_texture_image *stImage) +{ + if (stImage->base.Level > 0 || stObj->base.GenerateMipmap) + return TRUE; + + if (stImage->base._BaseFormat == GL_DEPTH_COMPONENT || + stImage->base._BaseFormat == GL_DEPTH_STENCIL_EXT) + /* depth/stencil textures are seldom mipmapped */ + return FALSE; + + if (stObj->base.BaseLevel == 0 && stObj->base.MaxLevel == 0) + return FALSE; + + if (stObj->base.Sampler.MinFilter == GL_NEAREST || + stObj->base.Sampler.MinFilter == GL_LINEAR) + /* not a mipmap minification filter */ + return FALSE; + + return TRUE; +} + + +/** * Try to allocate a pipe_resource object for the given st_texture_object. * * We use the given st_texture_image as a clue to determine the size of the @@ -431,22 +468,15 @@ guess_and_alloc_texture(struct st_context *st, * to re-allocating a texture buffer with space for more (or fewer) * mipmap levels later. */ - if ((stObj->base.Sampler.MinFilter == GL_NEAREST || -stObj->base.Sampler.MinFilter == GL_LINEAR || -(stObj->base.BaseLevel == 0 && - stObj->base.MaxLevel == 0) || -stImage->base._BaseFormat == GL_DEPTH_COMPONENT || -stImage->base._BaseFormat == GL_DEPTH_STENCIL_EXT) && - !stObj->base.GenerateMipmap && - stImage->base.Level == 0) { - /* only alloc space for a single mipmap level */ - lastLevel = 0; - } - else { + if (allocate_full_mipmap(stObj, stImage)) { /* alloc space for a full mipmap */ lastLevel = _mesa_get_tex_max_num_levels(stObj->base.Target, width, height, depth) - 1; } + else { + /* only alloc space for a single mipmap level */ + lastLevel = 0; + } /* Save the level=0 dimensions */ stObj->width0 = width; -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] wgl: Rudimentary wglUseFontBitmaps sample.
It uses SYSTEM_FONT which actually creates some challenges when emulating wglUseFontBitmaps: in spite what https://msdn.microsoft.com/en-us/library/windows/desktop/dd374392.aspx implies, GetGlyphOutline(GGO_BITMAP) does not seem to work with certain fonts. The only solution is to draw the font charactors with a HBITMAP like the old Mesa fxwgl.c code used to do. That too, seems to be the way that opengl32.dll implements wglUseFontBitmaps. --- src/wgl/CMakeLists.txt | 2 + src/wgl/wglfont.c | 103 + 2 files changed, 105 insertions(+) create mode 100644 src/wgl/wglfont.c diff --git a/src/wgl/CMakeLists.txt b/src/wgl/CMakeLists.txt index 0229ac7..cb50cca 100644 --- a/src/wgl/CMakeLists.txt +++ b/src/wgl/CMakeLists.txt @@ -16,6 +16,7 @@ set_target_properties (wgl_sharedtex_mt PROPERTIES OUTPUT_NAME sharedtex_mt) add_executable (wglinfo wglinfo.c ${CMAKE_SOURCE_DIR}/src/xdemos/glinfo_common.c) add_executable (wglcontext wglcontext.c) add_executable (wincopy WIN32 wincopy.c wglutil.c) +add_executable (wglfont wglfont.c) install ( TARGETS @@ -23,6 +24,7 @@ install ( wgl_sharedtex_mt wglinfo wglcontext + wglfont wincopy DESTINATION wgl) diff --git a/src/wgl/wglfont.c b/src/wgl/wglfont.c new file mode 100644 index 000..86c5f88 --- /dev/null +++ b/src/wgl/wglfont.c @@ -0,0 +1,103 @@ +/* + * Copyright (C) 2015, VMware, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +int +main(int argc, char *argv[]) +{ + WNDCLASS wc; + HWND hwnd; + HDC hdc; + PIXELFORMATDESCRIPTOR pfd; + int iPixelFormat; + HGLRC hglrc; + + ZeroMemory(, sizeof wc); + wc.style = CS_OWNDC | CS_HREDRAW | CS_VREDRAW; + wc.lpfnWndProc = DefWindowProc; + wc.hIcon = LoadIcon(NULL, IDI_APPLICATION); + wc.hCursor = LoadCursor(NULL, IDC_ARROW); + wc.hbrBackground = (HBRUSH) (COLOR_BTNFACE + 1); + wc.lpszClassName = "wglfont"; + + if (!RegisterClass()) { + abort(); + } + + hwnd = CreateWindowEx(0, + wc.lpszClassName, + "wglfont", + WS_VISIBLE | WS_CLIPSIBLINGS | WS_CLIPCHILDREN | WS_TILEDWINDOW, + CW_USEDEFAULT, CW_USEDEFAULT, CW_USEDEFAULT, CW_USEDEFAULT, + NULL, NULL, + wc.hInstance, + NULL); + if (!hwnd) { + abort(); + } + + hdc = GetDC(hwnd); + if (!hdc) { + abort(); + } + + ZeroMemory(, sizeof pfd); + pfd.nSize = sizeof pfd; + pfd.nVersion = 1; + pfd.dwFlags = PFD_DOUBLEBUFFER | PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL; + pfd.iPixelType = PFD_TYPE_RGBA; + pfd.cColorBits = 24; + pfd.cDepthBits = 24; + pfd.iLayerType = PFD_MAIN_PLANE; + + iPixelFormat = ChoosePixelFormat(hdc, ); + if (!iPixelFormat) { + abort(); + } + + if (!SetPixelFormat(hdc, iPixelFormat, )) { + abort(); + } + + hglrc = wglCreateContext(hdc); + if (!hglrc) { + abort(); + } + + wglMakeCurrent(hdc, hglrc); + + SelectObject(hdc, GetStockObject(SYSTEM_FONT)); + + wglUseFontBitmaps(hdc, 0, 255, 1000); + + glListBase(1000); + + glCallLists(12, GL_UNSIGNED_BYTE, "Hello World!"); + + SwapBuffers(hdc); + + Sleep(1000); + + return 0; +} -- 2.5.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] llvmpipe: avoid most 64 bit math in rasterization
On 06/01/16 00:06, srol...@vmware.com wrote: From: Roland ScheideggerThe trick here is to recognize that in the c + n * dcdx calculations, not only can the lower FIXED_ORDER bits not change (as the dcdx values have those all zero) but that this means the sign bit of the calculations cannot be different as well, that is sign(c + n*dcdx) == sign((c >> FIXED_ORDER) + n*(dcdx >> FIXED_ORDER)). That shaves off more than enough bits to never require 64bit masks. A shifted plane c value could still easily exceed 32 bits, however since we throw out planes which are trivial accept even before binning (and similarly don't even get to see tris for which there was a trivial reject plane)) this is never a problem. The idea isnt't all that revolutionary, in fact something similar was tried ages ago (9773722c2b09d5f0615a47cecf4347859474dc56) back when the values were only 32 bit anyway. I believe now it didn't quite work then because the adjustment needed for testing trivial reject / partial masks wasn't handled correctly. This still keeps the separate 32/64 bit paths for now, as the 32 bit one still looks minimally simpler (and also because if we'd pass in dcdx/dcdy/eo unscaled from setup which would be a good reason to ditch the 32 bit path, we'd need to change the special-purpose rasterization functions for small tris). This passes piglit triangle-rasterization (-fbo -auto -max_size -subpixelbits 8). It still fails triangle-rasterization-overdraw -max_size (no change, fails everything at position 2048 - interestingly for softpipe, nvidia maxwell 1 blob, and amd evergreen open-source drivers the test fails as well but at 4096 - seems like we're missing a float mantissa bit somewhere!). I don't think that's how the test is supposed to be run. If you do an apitrace, you'll see the test creates a fbo with 1000x1000, a viewport with 16Kx16K, and does a readpixels of 4Kx4K... Jose --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 84 +-- src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h | 107 + 2 files changed, 133 insertions(+), 58 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index c9b9221..a4dd6ef 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -64,43 +64,43 @@ block_full_16(struct lp_rasterizer_task *task, } static inline unsigned -build_mask_linear(int64_t c, int64_t dcdx, int64_t dcdy) +build_mask_linear(int32_t c, int32_t dcdx, int32_t dcdy) { unsigned mask = 0; - int64_t c0 = c; - int64_t c1 = c0 + dcdy; - int64_t c2 = c1 + dcdy; - int64_t c3 = c2 + dcdy; - - mask |= ((c0 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 0); - mask |= ((c0 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 1); - mask |= ((c0 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 2); - mask |= ((c0 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 3); - mask |= ((c1 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 4); - mask |= ((c1 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 5); - mask |= ((c1 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 6); - mask |= ((c1 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 7); - mask |= ((c2 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 8); - mask |= ((c2 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 9); - mask |= ((c2 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 10); - mask |= ((c2 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 11); - mask |= ((c3 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 12); - mask |= ((c3 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 13); - mask |= ((c3 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 14); - mask |= ((c3 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 15); + int32_t c0 = c; + int32_t c1 = c0 + dcdy; + int32_t c2 = c1 + dcdy; + int32_t c3 = c2 + dcdy; + + mask |= ((c0 + 0 * dcdx) >> 31) & (1 << 0); + mask |= ((c0 + 1 * dcdx) >> 31) & (1 << 1); + mask |= ((c0 + 2 * dcdx) >> 31) & (1 << 2); + mask |= ((c0 + 3 * dcdx) >> 31) & (1 << 3); + mask |= ((c1 + 0 * dcdx) >> 31) & (1 << 4); + mask |= ((c1 + 1 * dcdx) >> 31) & (1 << 5); + mask |= ((c1 + 2 * dcdx) >> 31) & (1 << 6); + mask |= ((c1 + 3 * dcdx) >> 31) & (1 << 7); + mask |= ((c2 + 0 * dcdx) >> 31) & (1 << 8); + mask |= ((c2 + 1 * dcdx) >> 31) & (1 << 9); + mask |= ((c2 + 2 * dcdx) >> 31) & (1 << 10); + mask |= ((c2 + 3 * dcdx) >> 31) & (1 << 11); + mask |= ((c3 + 0 * dcdx) >> 31) & (1 << 12); + mask |= ((c3 + 1 * dcdx) >> 31) & (1 << 13); + mask |= ((c3 + 2 * dcdx) >> 31) & (1 << 14); + mask |= ((c3 + 3 * dcdx) >> 31) & (1 << 15); return mask; } static inline void -build_masks(int64_t c, -int64_t cdiff, -int64_t dcdx, -int64_t dcdy, - unsigned *outmask, - unsigned *partmask) +build_masks(int32_t c, +int32_t cdiff, +int32_t dcdx, +int32_t dcdy, +unsigned *outmask, +unsigned *partmask) { *outmask |= build_mask_linear(c, dcdx, dcdy); *partmask |=
Re: [Mesa-dev] [PATCH] wgl: Rudimentary wglUseFontBitmaps sample.
The copyright line could be bumped to 2016. Reviewed-by: Brian PaulOn 01/06/2016 09:21 AM, Jose Fonseca wrote: It uses SYSTEM_FONT which actually creates some challenges when emulating wglUseFontBitmaps: in spite what https://msdn.microsoft.com/en-us/library/windows/desktop/dd374392.aspx implies, GetGlyphOutline(GGO_BITMAP) does not seem to work with certain fonts. The only solution is to draw the font charactors with a HBITMAP like the old Mesa fxwgl.c code used to do. That too, seems to be the way that opengl32.dll implements wglUseFontBitmaps. --- src/wgl/CMakeLists.txt | 2 + src/wgl/wglfont.c | 103 + 2 files changed, 105 insertions(+) create mode 100644 src/wgl/wglfont.c diff --git a/src/wgl/CMakeLists.txt b/src/wgl/CMakeLists.txt index 0229ac7..cb50cca 100644 --- a/src/wgl/CMakeLists.txt +++ b/src/wgl/CMakeLists.txt @@ -16,6 +16,7 @@ set_target_properties (wgl_sharedtex_mt PROPERTIES OUTPUT_NAME sharedtex_mt) add_executable (wglinfo wglinfo.c ${CMAKE_SOURCE_DIR}/src/xdemos/glinfo_common.c) add_executable (wglcontext wglcontext.c) add_executable (wincopy WIN32 wincopy.c wglutil.c) +add_executable (wglfont wglfont.c) install ( TARGETS @@ -23,6 +24,7 @@ install ( wgl_sharedtex_mt wglinfo wglcontext + wglfont wincopy DESTINATION wgl) diff --git a/src/wgl/wglfont.c b/src/wgl/wglfont.c new file mode 100644 index 000..86c5f88 --- /dev/null +++ b/src/wgl/wglfont.c @@ -0,0 +1,103 @@ +/* + * Copyright (C) 2015, VMware, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +int +main(int argc, char *argv[]) +{ + WNDCLASS wc; + HWND hwnd; + HDC hdc; + PIXELFORMATDESCRIPTOR pfd; + int iPixelFormat; + HGLRC hglrc; + + ZeroMemory(, sizeof wc); + wc.style = CS_OWNDC | CS_HREDRAW | CS_VREDRAW; + wc.lpfnWndProc = DefWindowProc; + wc.hIcon = LoadIcon(NULL, IDI_APPLICATION); + wc.hCursor = LoadCursor(NULL, IDC_ARROW); + wc.hbrBackground = (HBRUSH) (COLOR_BTNFACE + 1); + wc.lpszClassName = "wglfont"; + + if (!RegisterClass()) { + abort(); + } + + hwnd = CreateWindowEx(0, + wc.lpszClassName, + "wglfont", + WS_VISIBLE | WS_CLIPSIBLINGS | WS_CLIPCHILDREN | WS_TILEDWINDOW, + CW_USEDEFAULT, CW_USEDEFAULT, CW_USEDEFAULT, CW_USEDEFAULT, + NULL, NULL, + wc.hInstance, + NULL); + if (!hwnd) { + abort(); + } + + hdc = GetDC(hwnd); + if (!hdc) { + abort(); + } + + ZeroMemory(, sizeof pfd); + pfd.nSize = sizeof pfd; + pfd.nVersion = 1; + pfd.dwFlags = PFD_DOUBLEBUFFER | PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL; + pfd.iPixelType = PFD_TYPE_RGBA; + pfd.cColorBits = 24; + pfd.cDepthBits = 24; + pfd.iLayerType = PFD_MAIN_PLANE; + + iPixelFormat = ChoosePixelFormat(hdc, ); + if (!iPixelFormat) { + abort(); + } + + if (!SetPixelFormat(hdc, iPixelFormat, )) { + abort(); + } + + hglrc = wglCreateContext(hdc); + if (!hglrc) { + abort(); + } + + wglMakeCurrent(hdc, hglrc); + + SelectObject(hdc, GetStockObject(SYSTEM_FONT)); + + wglUseFontBitmaps(hdc, 0, 255, 1000); + + glListBase(1000); + + glCallLists(12, GL_UNSIGNED_BYTE, "Hello World!"); + + SwapBuffers(hdc); + + Sleep(1000); + + return 0; +} ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/4] vbo: s/GLuint/GLbitfield/ for state bitmasks
--- src/mesa/vbo/vbo.h | 2 +- src/mesa/vbo/vbo_context.c | 2 +- src/mesa/vbo/vbo_exec.c| 2 +- src/mesa/vbo/vbo_exec.h| 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h index cef3b8c..dd9b428 100644 --- a/src/mesa/vbo/vbo.h +++ b/src/mesa/vbo/vbo.h @@ -78,7 +78,7 @@ struct _mesa_index_buffer { GLboolean _vbo_CreateContext( struct gl_context *ctx ); void _vbo_DestroyContext( struct gl_context *ctx ); -void _vbo_InvalidateState( struct gl_context *ctx, GLuint new_state ); +void _vbo_InvalidateState( struct gl_context *ctx, GLbitfield new_state ); void diff --git a/src/mesa/vbo/vbo_context.c b/src/mesa/vbo/vbo_context.c index 5e1a760..19b35a4 100644 --- a/src/mesa/vbo/vbo_context.c +++ b/src/mesa/vbo/vbo_context.c @@ -186,7 +186,7 @@ GLboolean _vbo_CreateContext( struct gl_context *ctx ) } -void _vbo_InvalidateState( struct gl_context *ctx, GLuint new_state ) +void _vbo_InvalidateState( struct gl_context *ctx, GLbitfield new_state ) { vbo_exec_invalidate_state(ctx, new_state); } diff --git a/src/mesa/vbo/vbo_exec.c b/src/mesa/vbo/vbo_exec.c index a301c6c..4db4f40 100644 --- a/src/mesa/vbo/vbo_exec.c +++ b/src/mesa/vbo/vbo_exec.c @@ -73,7 +73,7 @@ void vbo_exec_destroy( struct gl_context *ctx ) * invoked according to the state flags. That will have to wait for a * mesa rework: */ -void vbo_exec_invalidate_state( struct gl_context *ctx, GLuint new_state ) +void vbo_exec_invalidate_state( struct gl_context *ctx, GLbitfield new_state ) { struct vbo_context *vbo = vbo_context(ctx); struct vbo_exec_context *exec = >exec; diff --git a/src/mesa/vbo/vbo_exec.h b/src/mesa/vbo/vbo_exec.h index a80b2c9..27bff4a 100644 --- a/src/mesa/vbo/vbo_exec.h +++ b/src/mesa/vbo/vbo_exec.h @@ -146,7 +146,7 @@ struct vbo_exec_context */ void vbo_exec_init( struct gl_context *ctx ); void vbo_exec_destroy( struct gl_context *ctx ); -void vbo_exec_invalidate_state( struct gl_context *ctx, GLuint new_state ); +void vbo_exec_invalidate_state( struct gl_context *ctx, GLbitfield new_state ); /* Internal functions: -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/4] main: s/GLuint/GLbitfield for state bitmasks
--- src/mesa/main/api_arrayelt.c | 4 ++-- src/mesa/main/api_arrayelt.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mesa/main/api_arrayelt.c b/src/mesa/main/api_arrayelt.c index 92d8238..c84db5f 100644 --- a/src/mesa/main/api_arrayelt.c +++ b/src/mesa/main/api_arrayelt.c @@ -65,7 +65,7 @@ typedef struct { typedef struct { AEarray arrays[32]; AEattrib attribs[VERT_ATTRIB_MAX + 1]; - GLuint NewState; + GLbitfield NewState; /* List of VBOs we need to map before executing ArrayElements */ struct gl_buffer_object *vbo[VERT_ATTRIB_MAX]; @@ -1802,7 +1802,7 @@ _ae_ArrayElement(GLint elt) void -_ae_invalidate_state(struct gl_context *ctx, GLuint new_state) +_ae_invalidate_state(struct gl_context *ctx, GLbitfield new_state) { AEcontext *actx = AE_CONTEXT(ctx); diff --git a/src/mesa/main/api_arrayelt.h b/src/mesa/main/api_arrayelt.h index 39fdeb9..03cd9ec 100644 --- a/src/mesa/main/api_arrayelt.h +++ b/src/mesa/main/api_arrayelt.h @@ -33,7 +33,7 @@ extern GLboolean _ae_create_context( struct gl_context *ctx ); extern void _ae_destroy_context( struct gl_context *ctx ); -extern void _ae_invalidate_state( struct gl_context *ctx, GLuint new_state ); +extern void _ae_invalidate_state( struct gl_context *ctx, GLbitfield new_state ); extern void GLAPIENTRY _ae_ArrayElement( GLint elt ); /* May optionally be called before a batch of element calls: -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/3] nir/algebraic: Add more lowering
This commit adds lowering options for the following opcodes: - nir_op_fmod - nir_op_bitfield_insert - nir_op_uadd_carry - nir_op_usub_borrow --- src/glsl/nir/nir.h| 4 src/glsl/nir/nir_opt_algebraic.py | 6 ++ 2 files changed, 10 insertions(+) diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 59f6f68..61e51da 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1509,6 +1509,10 @@ typedef struct nir_shader_compiler_options { bool lower_fpow; bool lower_fsat; bool lower_fsqrt; + bool lower_fmod; + bool lower_bitfield_insert; + bool lower_uadd_carry; + bool lower_usub_borrow; /** lowers fneg and ineg to fsub and isub. */ bool lower_negate; /** lowers fsub and isub to fadd+fneg and iadd+ineg. */ diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py index a5a4841..f4a8632 100644 --- a/src/glsl/nir/nir_opt_algebraic.py +++ b/src/glsl/nir/nir_opt_algebraic.py @@ -222,6 +222,12 @@ optimizations = [ (('iadd', a, ('isub', 0, b)), ('isub', a, b)), (('fabs', ('fsub', 0.0, a)), ('fabs', a)), (('iabs', ('isub', 0, a)), ('iabs', a)), + + # Misc. lowering + (('fmod', a, b), ('fsub', a, ('fmul', b, ('ffloor', ('fdiv', a, b, 'options->lower_fmod'), + (('bitfield_insert', a, b, c, d), ('bfi', ('bfm', d, c), b, a), 'options->lower_bitfield_insert'), + (('uadd_carry', a, b), ('ult', ('iadd', a, b), a), 'options->lower_uadd_carry'), + (('usub_borrow', a, b), ('ult', a, b), 'options->lower_usub_borrow'), ] # Add optimizations to handle the case where the result of a ternary is -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/3] i965/compiler: Enable more lowering in NIR
We don't need these for GLSL or ARB, but we need them for SPIR-V --- src/mesa/drivers/dri/i965/brw_shader.cpp | 5 + 1 file changed, 5 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 6d15c60..4ae403c 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -104,6 +104,11 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) nir_options->lower_ffma = true; nir_options->lower_sub = true; nir_options->lower_fdiv = true; + nir_options->lower_scmp = true; + nir_options->lower_fmod = true; + nir_options->lower_bitfield_insert = true; + nir_options->lower_uadd_carry = true; + nir_options->lower_usub_borrow = true; /* In the vec4 backend, our dpN instruction replicates its result to all * the components of a vec4. We would like NIR to give us replicated fdot -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 16/28] glsl: don't pack tessellation stages like we do other stages
On Wed, 2016-01-06 at 17:50 -0500, Ilia Mirkin wrote: > On Tue, Dec 29, 2015 at 12:00 AM, Timothy Arceri >wrote: > > Tessellation shaders treat varyings as shared memory and > > invocations > > can access each others varyings therefore we can't use the existing > > method to lower them. > > That's not strictly true... this is only true of tess control outputs > (which can be written by the current invocation, but also read in by > other invocations, effectively acting as a shared memory -- both true > of per-invocation outputs as well as per-patch outputs). Does that > information change this patch at all? I don't think so. The problem is that the current packing code works like this: - Change vars to be packed to temporaries, create new packed varyings. - Copy *all* values from the new packed input varying to the temporaries at the start of main. - Copy *all* values from the temporaries to the new packed output vars at the end of main (or before emit for GS). As well as the invocations stomping on each other this results in 32 (GL_MAX_PATCH_VERTICES?) copies for each TCS input as it just copies the full array. The current packing just doesn't work well for tessellation, its easier to just disbale it for tessellation and do it all using a different method rather than trying to mix and match. > > > > > This adds a check for these stages as following patches will > > allow explicit locations to be lowered even when the driver and > > existing > > tesselation checks ask for it to be disabled, we do this to enable > > support > > for the component layout qualifier. > > --- > > src/glsl/lower_packed_varyings.cpp | 62 +- > > > > 1 file changed, 34 insertions(+), 28 deletions(-) > > > > diff --git a/src/glsl/lower_packed_varyings.cpp > > b/src/glsl/lower_packed_varyings.cpp > > index 2899846..e4e9a35 100644 > > --- a/src/glsl/lower_packed_varyings.cpp > > +++ b/src/glsl/lower_packed_varyings.cpp > > @@ -737,40 +737,46 @@ lower_packed_varyings(void *mem_ctx, unsigned > > locations_used, > >ir_variable_mode mode, unsigned > > gs_input_vertices, > >gl_shader *shader, bool > > disable_varying_packing) > > { > > - exec_list *instructions = shader->ir; > > ir_function *main_func = shader->symbols->get_function("main"); > > exec_list void_parameters; > > ir_function_signature *main_func_sig > >= main_func->matching_signature(NULL, _parameters, > > false); > > - exec_list new_instructions, new_variables; > > - lower_packed_varyings_visitor visitor(mem_ctx, locations_used, > > mode, > > - gs_input_vertices, > > - _instructions, > > - _variables, > > - disable_varying_packing); > > - visitor.run(shader); > > - if (mode == ir_var_shader_out) { > > - if (shader->Stage == MESA_SHADER_GEOMETRY) { > > - /* For geometry shaders, outputs need to be lowered > > before each call > > - * to EmitVertex() > > - */ > > - lower_packed_varyings_gs_splicer splicer(mem_ctx, > > _instructions); > > - > > - /* Add all the variables in first. */ > > - main_func_sig->body.head->insert_before(_variables); > > > > - /* Now update all the EmitVertex instances */ > > - splicer.run(instructions); > > + if (!(shader->Stage == MESA_SHADER_TESS_CTRL || > > + shader->Stage == MESA_SHADER_TESS_EVAL)) { > > + exec_list *instructions = shader->ir; > > + exec_list new_instructions, new_variables; > > + > > + lower_packed_varyings_visitor visitor(mem_ctx, > > locations_used, mode, > > +gs_input_vertices, > > +_instructions, > > +_variables, > > + > > disable_varying_packing); > > + visitor.run(shader); > > + if (mode == ir_var_shader_out) { > > + if (shader->Stage == MESA_SHADER_GEOMETRY) { > > +/* For geometry shaders, outputs need to be lowered > > before each > > + * call to EmitVertex() > > + */ > > +lower_packed_varyings_gs_splicer splicer(mem_ctx, > > + > > _instructions); > > + > > +/* Add all the variables in first. */ > > +main_func_sig->body.head > > ->insert_before(_variables); > > + > > +/* Now update all the EmitVertex instances */ > > +splicer.run(instructions); > > + } else { > > +/* For other shader types, outputs need to be lowered > > at the end > > + * of main() > > + */ > > +main_func_sig->body.append_list(_variables); > > +
[Mesa-dev] [PATCH V2 16/35] glsl: move packing rules for tessellation stages into the packing code
Following patches will allow packing of varyings with explicit locations via the component layout qualifier. Moving the rules here will enable us to call an alternate path for packing tessellation stages with explicit locations. --- V2: move the tessellation packing rules, allow TES output to be packed. src/glsl/link_varyings.cpp | 17 ++ src/glsl/lower_packed_varyings.cpp | 63 +- 2 files changed, 38 insertions(+), 42 deletions(-) diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp index be662bc..69e24e3 100644 --- a/src/glsl/link_varyings.cpp +++ b/src/glsl/link_varyings.cpp @@ -1640,18 +1640,7 @@ assign_varying_locations(struct gl_context *ctx, assert(!ctx->Extensions.EXT_transform_feedback); } - /* Tessellation shaders treat inputs and outputs as shared memory and can -* access inputs and outputs of other invocations. -* Therefore, they can't be lowered to temps easily (and definitely not -* efficiently). -*/ - bool disable_varying_packing = - ctx->Const.DisableVaryingPacking || - (consumer && consumer->Stage == MESA_SHADER_TESS_EVAL) || - (consumer && consumer->Stage == MESA_SHADER_TESS_CTRL) || - (producer && producer->Stage == MESA_SHADER_TESS_CTRL); - - varying_matches matches(disable_varying_packing, + varying_matches matches(ctx->Const.DisableVaryingPacking, producer ? producer->Stage : (gl_shader_stage)-1, consumer ? consumer->Stage : (gl_shader_stage)-1); hash_table *tfeedback_candidates @@ -1864,13 +1853,13 @@ assign_varying_locations(struct gl_context *ctx, if (producer) { lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_out, -0, producer, disable_varying_packing); +0, producer, ctx->Const.DisableVaryingPacking); } if (consumer) { lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_in, consumer_vertices, consumer, -disable_varying_packing); +ctx->Const.DisableVaryingPacking); } return true; diff --git a/src/glsl/lower_packed_varyings.cpp b/src/glsl/lower_packed_varyings.cpp index 2899846..4723c2b 100644 --- a/src/glsl/lower_packed_varyings.cpp +++ b/src/glsl/lower_packed_varyings.cpp @@ -737,40 +737,47 @@ lower_packed_varyings(void *mem_ctx, unsigned locations_used, ir_variable_mode mode, unsigned gs_input_vertices, gl_shader *shader, bool disable_varying_packing) { - exec_list *instructions = shader->ir; ir_function *main_func = shader->symbols->get_function("main"); exec_list void_parameters; ir_function_signature *main_func_sig = main_func->matching_signature(NULL, _parameters, false); - exec_list new_instructions, new_variables; - lower_packed_varyings_visitor visitor(mem_ctx, locations_used, mode, - gs_input_vertices, - _instructions, - _variables, - disable_varying_packing); - visitor.run(shader); - if (mode == ir_var_shader_out) { - if (shader->Stage == MESA_SHADER_GEOMETRY) { - /* For geometry shaders, outputs need to be lowered before each call - * to EmitVertex() - */ - lower_packed_varyings_gs_splicer splicer(mem_ctx, _instructions); - - /* Add all the variables in first. */ - main_func_sig->body.head->insert_before(_variables); - /* Now update all the EmitVertex instances */ - splicer.run(instructions); + if (!(shader->Stage == MESA_SHADER_TESS_CTRL || + (shader->Stage == MESA_SHADER_TESS_EVAL && + mode == ir_var_shader_in))) { + exec_list *instructions = shader->ir; + exec_list new_instructions, new_variables; + + lower_packed_varyings_visitor visitor(mem_ctx, locations_used, mode, +gs_input_vertices, +_instructions, +_variables, +disable_varying_packing); + visitor.run(shader); + if (mode == ir_var_shader_out) { + if (shader->Stage == MESA_SHADER_GEOMETRY) { +/* For geometry shaders, outputs need to be lowered before each + * call to EmitVertex() + */ +lower_packed_varyings_gs_splicer splicer(mem_ctx, + _instructions); + +/* Add all the variables in first. */ +main_func_sig->body.head->insert_before(_variables); + +/* Now update all the EmitVertex instances */ +splicer.run(instructions); + } else { +/* For
Re: [Mesa-dev] [PATCH] draw: fix line stippling with unfilled prims
Reviewed-by: Brian PaulOn 01/06/2016 03:26 PM, srol...@vmware.com wrote: From: Roland Scheidegger The unfilled stage was not filling in the prim header, and the line stage then decided to reset the stipple counter or not based on the uninitialized data. This causes some failures in conform linestipple test (albeit quite randomly happening depending on environment). So fill in the prim header in the unfilled stage - I am not entirely sure if anybody really needs determinant after that stage, but there's at least later stages (wide line for instance) which copy over the determinant as well. --- src/gallium/auxiliary/draw/draw_pipe_unfilled.c | 56 + 1 file changed, 38 insertions(+), 18 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c index 8e6435c..b9ded14 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c +++ b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c @@ -86,27 +86,33 @@ inject_front_face_info(struct draw_stage *stage, } -static void point( struct draw_stage *stage, - struct vertex_header *v0 ) +static void point(struct draw_stage *stage, + struct prim_header *header, + struct vertex_header *v0) { struct prim_header tmp; + tmp.det = header->det; + tmp.flags = 0; tmp.v[0] = v0; - stage->next->point( stage->next, ); + stage->next->point(stage->next, ); } -static void line( struct draw_stage *stage, - struct vertex_header *v0, - struct vertex_header *v1 ) +static void line(struct draw_stage *stage, + struct prim_header *header, + struct vertex_header *v0, + struct vertex_header *v1) { struct prim_header tmp; + tmp.det = header->det; + tmp.flags = 0; tmp.v[0] = v0; tmp.v[1] = v1; - stage->next->line( stage->next, ); + stage->next->line(stage->next, ); } -static void points( struct draw_stage *stage, - struct prim_header *header ) +static void points(struct draw_stage *stage, + struct prim_header *header) { struct vertex_header *v0 = header->v[0]; struct vertex_header *v1 = header->v[1]; @@ -114,27 +120,41 @@ static void points( struct draw_stage *stage, inject_front_face_info(stage, header); - if ((header->flags & DRAW_PIPE_EDGE_FLAG_0) && v0->edgeflag) point( stage, v0 ); - if ((header->flags & DRAW_PIPE_EDGE_FLAG_1) && v1->edgeflag) point( stage, v1 ); - if ((header->flags & DRAW_PIPE_EDGE_FLAG_2) && v2->edgeflag) point( stage, v2 ); + if ((header->flags & DRAW_PIPE_EDGE_FLAG_0) && v0->edgeflag) + point(stage, header, v0); + if ((header->flags & DRAW_PIPE_EDGE_FLAG_1) && v1->edgeflag) + point(stage, header, v1); + if ((header->flags & DRAW_PIPE_EDGE_FLAG_2) && v2->edgeflag) + point(stage, header, v2); } -static void lines( struct draw_stage *stage, - struct prim_header *header ) +static void lines(struct draw_stage *stage, + struct prim_header *header) { struct vertex_header *v0 = header->v[0]; struct vertex_header *v1 = header->v[1]; struct vertex_header *v2 = header->v[2]; if (header->flags & DRAW_PIPE_RESET_STIPPLE) - stage->next->reset_stipple_counter( stage->next ); + /* + * XXX could revisit this. The only stage which cares is the line + * stipple stage. Could just emit correct reset flags here and not + * bother about all the calling through reset_stipple_counter + * stages. Though technically it is necessary if line stipple is + * handled by the driver, but this is not actually hooked up when + * using vbuf (vbuf stage reset_stipple_counter does nothing). + */ + stage->next->reset_stipple_counter(stage->next); inject_front_face_info(stage, header); - if ((header->flags & DRAW_PIPE_EDGE_FLAG_2) && v2->edgeflag) line( stage, v2, v0 ); - if ((header->flags & DRAW_PIPE_EDGE_FLAG_0) && v0->edgeflag) line( stage, v0, v1 ); - if ((header->flags & DRAW_PIPE_EDGE_FLAG_1) && v1->edgeflag) line( stage, v1, v2 ); + if ((header->flags & DRAW_PIPE_EDGE_FLAG_2) && v2->edgeflag) + line(stage, header, v2, v0); + if ((header->flags & DRAW_PIPE_EDGE_FLAG_0) && v0->edgeflag) + line(stage, header, v0, v1); + if ((header->flags & DRAW_PIPE_EDGE_FLAG_1) && v1->edgeflag) + line(stage, header, v1, v2); } ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 16/28] glsl: don't pack tessellation stages like we do other stages
On Tue, 2015-12-29 at 17:38 +1100, eocallag...@alterapraxis.com wrote: > On 2015-12-29 16:00, Timothy Arceri wrote: > > Tessellation shaders treat varyings as shared memory and > > invocations > > can access each others varyings therefore we can't use the existing > > method to lower them. > > > > This adds a check for these stages as following patches will > > allow explicit locations to be lowered even when the driver and > > existing > > tesselation checks ask for it to be disabled, we do this to enable > > support > > for the component layout qualifier. > > I find this a little hard to read and understand, could you brush it > up > a bit > please if that's ok? Sure I'll try again :) > > > --- > > src/glsl/lower_packed_varyings.cpp | 62 > > +- > > 1 file changed, 34 insertions(+), 28 deletions(-) > > > > diff --git a/src/glsl/lower_packed_varyings.cpp > > b/src/glsl/lower_packed_varyings.cpp > > index 2899846..e4e9a35 100644 > > --- a/src/glsl/lower_packed_varyings.cpp > > +++ b/src/glsl/lower_packed_varyings.cpp > > @@ -737,40 +737,46 @@ lower_packed_varyings(void *mem_ctx, unsigned > > locations_used, > >ir_variable_mode mode, unsigned > > gs_input_vertices, > >gl_shader *shader, bool > > disable_varying_packing) > > { > > - exec_list *instructions = shader->ir; > > ir_function *main_func = shader->symbols->get_function("main"); > > exec_list void_parameters; > > ir_function_signature *main_func_sig > >= main_func->matching_signature(NULL, _parameters, > > false); > > - exec_list new_instructions, new_variables; > > - lower_packed_varyings_visitor visitor(mem_ctx, locations_used, > > mode, > > - gs_input_vertices, > > - _instructions, > > - _variables, > > - disable_varying_packing); > > - visitor.run(shader); > > - if (mode == ir_var_shader_out) { > > - if (shader->Stage == MESA_SHADER_GEOMETRY) { > > - /* For geometry shaders, outputs need to be lowered > > before > > each call > > - * to EmitVertex() > > - */ > > - lower_packed_varyings_gs_splicer splicer(mem_ctx, > > _instructions); > > - > > - /* Add all the variables in first. */ > > - main_func_sig->body.head->insert_before(_variables); > > > > - /* Now update all the EmitVertex instances */ > > - splicer.run(instructions); > > + if (!(shader->Stage == MESA_SHADER_TESS_CTRL || > > + shader->Stage == MESA_SHADER_TESS_EVAL)) { > > + exec_list *instructions = shader->ir; > > + exec_list new_instructions, new_variables; > > + > > + lower_packed_varyings_visitor visitor(mem_ctx, > > locations_used, > > mode, > > +gs_input_vertices, > > +_instructions, > > +_variables, > > + > > disable_varying_packing); > > + visitor.run(shader); > > + if (mode == ir_var_shader_out) { > > + if (shader->Stage == MESA_SHADER_GEOMETRY) { > > +/* For geometry shaders, outputs need to be lowered > > before > > each > > + * call to EmitVertex() > > + */ > > +lower_packed_varyings_gs_splicer splicer(mem_ctx, > > + > > _instructions); > > + > > +/* Add all the variables in first. */ > > +main_func_sig->body.head > > ->insert_before(_variables); > > + > > +/* Now update all the EmitVertex instances */ > > +splicer.run(instructions); > > + } else { > > +/* For other shader types, outputs need to be lowered > > at > > the end > > + * of main() > > + */ > > +main_func_sig->body.append_list(_variables); > > +main_func_sig->body.append_list(_instructions); > > + } > >} else { > > - /* For other shader types, outputs need to be lowered at > > the > > end of > > - * main() > > - */ > > - main_func_sig->body.append_list(_variables); > > - main_func_sig->body.append_list(_instructions); > > + /* Shader inputs need to be lowered at the beginning of > > main() */ > > + main_func_sig->body.head > > ->insert_before(_instructions); > > + main_func_sig->body.head->insert_before(_variables); > >} > > - } else { > > - /* Shader inputs need to be lowered at the beginning of > > main() > > */ > > - main_func_sig->body.head->insert_before(_instructions); > > - main_func_sig->body.head->insert_before(_variables); > > } > > } > > ___ > mesa-dev
Re: [Mesa-dev] [PATCH 16/28] glsl: don't pack tessellation stages like we do other stages
On Wed, Jan 6, 2016 at 6:40 PM, Timothy Arceriwrote: > On Wed, 2016-01-06 at 17:50 -0500, Ilia Mirkin wrote: >> On Tue, Dec 29, 2015 at 12:00 AM, Timothy Arceri >> wrote: >> > Tessellation shaders treat varyings as shared memory and >> > invocations >> > can access each others varyings therefore we can't use the existing >> > method to lower them. >> >> That's not strictly true... this is only true of tess control outputs >> (which can be written by the current invocation, but also read in by >> other invocations, effectively acting as a shared memory -- both true >> of per-invocation outputs as well as per-patch outputs). Does that >> information change this patch at all? > > I don't think so. The problem is that the current packing code works > like this: > > - Change vars to be packed to temporaries, create new packed varyings. > - Copy *all* values from the new packed input varying to the > temporaries at the start of main. > - Copy *all* values from the temporaries to the new packed output vars > at the end of main (or before emit for GS). > > As well as the invocations stomping on each other this results in 32 > (GL_MAX_PATCH_VERTICES?) copies for each TCS input as it just copies > the full array. Presumably it also does this for GS? Although it's a lot more common for a single GS invocation to consume > > The current packing just doesn't work well for tessellation, its easier > to just disbale it for tessellation and do it all using a different > method rather than trying to mix and match. I thought it already *was* disabled... but I think you still have to have packing on TES outputs, because (a) your arguments against don't apply and (b) it might feed into transform feedback, which i have faint recollections must go through packing. > > >> >> > >> > This adds a check for these stages as following patches will >> > allow explicit locations to be lowered even when the driver and >> > existing >> > tesselation checks ask for it to be disabled, we do this to enable >> > support >> > for the component layout qualifier. >> > --- >> > src/glsl/lower_packed_varyings.cpp | 62 +- >> > >> > 1 file changed, 34 insertions(+), 28 deletions(-) >> > >> > diff --git a/src/glsl/lower_packed_varyings.cpp >> > b/src/glsl/lower_packed_varyings.cpp >> > index 2899846..e4e9a35 100644 >> > --- a/src/glsl/lower_packed_varyings.cpp >> > +++ b/src/glsl/lower_packed_varyings.cpp >> > @@ -737,40 +737,46 @@ lower_packed_varyings(void *mem_ctx, unsigned >> > locations_used, >> >ir_variable_mode mode, unsigned >> > gs_input_vertices, >> >gl_shader *shader, bool >> > disable_varying_packing) >> > { >> > - exec_list *instructions = shader->ir; >> > ir_function *main_func = shader->symbols->get_function("main"); >> > exec_list void_parameters; >> > ir_function_signature *main_func_sig >> >= main_func->matching_signature(NULL, _parameters, >> > false); >> > - exec_list new_instructions, new_variables; >> > - lower_packed_varyings_visitor visitor(mem_ctx, locations_used, >> > mode, >> > - gs_input_vertices, >> > - _instructions, >> > - _variables, >> > - disable_varying_packing); >> > - visitor.run(shader); >> > - if (mode == ir_var_shader_out) { >> > - if (shader->Stage == MESA_SHADER_GEOMETRY) { >> > - /* For geometry shaders, outputs need to be lowered >> > before each call >> > - * to EmitVertex() >> > - */ >> > - lower_packed_varyings_gs_splicer splicer(mem_ctx, >> > _instructions); >> > - >> > - /* Add all the variables in first. */ >> > - main_func_sig->body.head->insert_before(_variables); >> > >> > - /* Now update all the EmitVertex instances */ >> > - splicer.run(instructions); >> > + if (!(shader->Stage == MESA_SHADER_TESS_CTRL || >> > + shader->Stage == MESA_SHADER_TESS_EVAL)) { >> > + exec_list *instructions = shader->ir; >> > + exec_list new_instructions, new_variables; >> > + >> > + lower_packed_varyings_visitor visitor(mem_ctx, >> > locations_used, mode, >> > +gs_input_vertices, >> > +_instructions, >> > +_variables, >> > + >> > disable_varying_packing); >> > + visitor.run(shader); >> > + if (mode == ir_var_shader_out) { >> > + if (shader->Stage == MESA_SHADER_GEOMETRY) { >> > +/* For geometry shaders, outputs need to be lowered >> > before each >> > + * call to EmitVertex() >> > + */ >> > +lower_packed_varyings_gs_splicer splicer(mem_ctx, >> > + >> > _instructions); >> > + >> > +
Re: [Mesa-dev] [PATCH] glsl: replace null check with assert
Ping. Just looking over some older patches of mine. I made this change both as a clean up but also because the constant expression evaluation code is some of the most expensive in the compiler and this was right in the hot path for some of the AoA tests until I improved some other optimisation to avoid calling it so much. It's not going to make much of a difference but with enhanced layouts also now making use of the constant expression path it would still be good to land this. On Tue, 2015-07-14 at 23:30 +1000, Timothy Arceri wrote: > This was added in 54f583a20 since then error handling has improved. > > The test this was added to fix now fails earlier since 01822706ec > --- > src/glsl/ir_constant_expression.cpp | 4 +--- > 1 file changed, 1 insertion(+), 3 deletions(-) > > diff --git a/src/glsl/ir_constant_expression.cpp > b/src/glsl/ir_constant_expression.cpp > index 171b8e9..5732867 100644 > --- a/src/glsl/ir_constant_expression.cpp > +++ b/src/glsl/ir_constant_expression.cpp > @@ -1857,9 +1857,7 @@ ir_swizzle::constant_expression_value(struct > hash_table *variable_context) > ir_constant * > ir_dereference_variable::constant_expression_value(struct hash_table > *variable_context) > { > - /* This may occur during compile and var->type is > glsl_type::error_type */ > - if (!var) > - return NULL; > + assert(var); > > /* Give priority to the context hashtable, if it exists */ > if (variable_context) { ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] glsl: fix varying slot allocation for blocks and structs with explicit locations
On Wed, Jan 6, 2016 at 4:32 PM, Timothy Arceriwrote: > On Wed, 2016-01-06 at 09:46 -0500, Ilia Mirkin wrote: >> On Wed, Jan 6, 2016 at 4:32 AM, Timothy Arceri >> wrote: >> > Previously each member was being counted as using a single slot, >> > count_attribute_slots() fixes the count for array and struct >> > members. >> > >> > Also don't assign a negitive to the unsigned expl_location >> > variable. >> > --- >> > >> > Fixes these new piglit tests: >> >http://patchwork.freedesktop.org/patch/69531/ >> > >> > src/glsl/ast_to_hir.cpp | 9 + >> > 1 file changed, 5 insertions(+), 4 deletions(-) >> > >> > diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp >> > index 0197cdc..50d5e22 100644 >> > --- a/src/glsl/ast_to_hir.cpp >> > +++ b/src/glsl/ast_to_hir.cpp >> > @@ -6408,12 +6408,13 @@ >> > ast_process_struct_or_iface_block_members(exec_list *instructions, >> > if (process_qualifier_constant(state, , >> > "location", >> > qual->location, >> > _location)) { >> > fields[i].location = VARYING_SLOT_VAR0 + >> > qual_location; >> > - expl_location = fields[i].location + 1; >> > + expl_location = fields[i].location + >> > + fields[i].type->count_attribute_slots(false); >> > } >> > } else { >> > if (layout && layout->flags.q.explicit_location) { >> > fields[i].location = expl_location; >> > - expl_location = expl_location + 1; >> > + expl_location += fields[i].type >> > ->count_attribute_slots(false); >> > } else { >> > fields[i].location = -1; >> > } >> > @@ -6570,7 +6571,7 @@ ast_struct_specifier::hir(exec_list >> > *instructions, >> > >> > state->struct_specifier_depth++; >> > >> > - unsigned expl_location = -1; >> > + unsigned expl_location = 0; >> > if (layout && layout->flags.q.explicit_location) { >> >if (!process_qualifier_constant(state, , "location", >> >layout->location, >> > _location)) { >> > @@ -6763,7 +6764,7 @@ ast_interface_block::hir(exec_list >> > *instructions, >> >return NULL; >> > } >> > >> > - unsigned expl_location = -1; >> > + unsigned expl_location = 0; >> >> There are a number of places that check for location != -1 as a >> sanity >> check... won't this defeat that? > > No because we only use expl_location when the explicit location flag is > set and if there is an error we don't copy the value from > expl_location. > > I believe I initialised it to stop gcc complaining although I just > tried removing this and it no longer complains so I guess I can just > remove the initialisation altogether. > > Are you happy with the change otherwise? Oh I see what's going on now. I took a much more careful look at the surrounding logic and I think switching expl_location to be init to 0 is fine -- if it's set on the layout it'll be initialized, otherwise it will never be used. Basically "expl_location" is "what is the current location that we should assign the next variable to when there's no explicit location listed on the var, but there is one on the block". So actually as originally sent, your patch is Reviewed-by: Ilia Mirkin ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/3] nir/opcodes: Fix the folding expression for usub_borrow
--- src/glsl/nir/nir_opcodes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py index 1cd01a4..4bc6d16 100644 --- a/src/glsl/nir/nir_opcodes.py +++ b/src/glsl/nir/nir_opcodes.py @@ -366,7 +366,7 @@ binop_convert("uadd_carry", tbool, tuint, commutative, "src0 + src1 < src0") # returns a boolean representing the borrow resulting from the subtraction # of the two unsigned arguments. -binop_convert("usub_borrow", tbool, tuint, "", "src1 < src0") +binop_convert("usub_borrow", tbool, tuint, "", "src0 < src1") binop("fmod", tfloat, "", "src0 - src1 * floorf(src0 / src1)") binop("umod", tuint, "", "src1 == 0 ? 0 : src0 % src1") -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 16/28] glsl: don't pack tessellation stages like we do other stages
On Tue, Dec 29, 2015 at 12:00 AM, Timothy Arceriwrote: > Tessellation shaders treat varyings as shared memory and invocations > can access each others varyings therefore we can't use the existing > method to lower them. That's not strictly true... this is only true of tess control outputs (which can be written by the current invocation, but also read in by other invocations, effectively acting as a shared memory -- both true of per-invocation outputs as well as per-patch outputs). Does that information change this patch at all? > > This adds a check for these stages as following patches will > allow explicit locations to be lowered even when the driver and existing > tesselation checks ask for it to be disabled, we do this to enable support > for the component layout qualifier. > --- > src/glsl/lower_packed_varyings.cpp | 62 > +- > 1 file changed, 34 insertions(+), 28 deletions(-) > > diff --git a/src/glsl/lower_packed_varyings.cpp > b/src/glsl/lower_packed_varyings.cpp > index 2899846..e4e9a35 100644 > --- a/src/glsl/lower_packed_varyings.cpp > +++ b/src/glsl/lower_packed_varyings.cpp > @@ -737,40 +737,46 @@ lower_packed_varyings(void *mem_ctx, unsigned > locations_used, >ir_variable_mode mode, unsigned gs_input_vertices, >gl_shader *shader, bool disable_varying_packing) > { > - exec_list *instructions = shader->ir; > ir_function *main_func = shader->symbols->get_function("main"); > exec_list void_parameters; > ir_function_signature *main_func_sig >= main_func->matching_signature(NULL, _parameters, false); > - exec_list new_instructions, new_variables; > - lower_packed_varyings_visitor visitor(mem_ctx, locations_used, mode, > - gs_input_vertices, > - _instructions, > - _variables, > - disable_varying_packing); > - visitor.run(shader); > - if (mode == ir_var_shader_out) { > - if (shader->Stage == MESA_SHADER_GEOMETRY) { > - /* For geometry shaders, outputs need to be lowered before each call > - * to EmitVertex() > - */ > - lower_packed_varyings_gs_splicer splicer(mem_ctx, > _instructions); > - > - /* Add all the variables in first. */ > - main_func_sig->body.head->insert_before(_variables); > > - /* Now update all the EmitVertex instances */ > - splicer.run(instructions); > + if (!(shader->Stage == MESA_SHADER_TESS_CTRL || > + shader->Stage == MESA_SHADER_TESS_EVAL)) { > + exec_list *instructions = shader->ir; > + exec_list new_instructions, new_variables; > + > + lower_packed_varyings_visitor visitor(mem_ctx, locations_used, mode, > +gs_input_vertices, > +_instructions, > +_variables, > +disable_varying_packing); > + visitor.run(shader); > + if (mode == ir_var_shader_out) { > + if (shader->Stage == MESA_SHADER_GEOMETRY) { > +/* For geometry shaders, outputs need to be lowered before each > + * call to EmitVertex() > + */ > +lower_packed_varyings_gs_splicer splicer(mem_ctx, > + _instructions); > + > +/* Add all the variables in first. */ > +main_func_sig->body.head->insert_before(_variables); > + > +/* Now update all the EmitVertex instances */ > +splicer.run(instructions); > + } else { > +/* For other shader types, outputs need to be lowered at the end > + * of main() > + */ > +main_func_sig->body.append_list(_variables); > +main_func_sig->body.append_list(_instructions); > + } >} else { > - /* For other shader types, outputs need to be lowered at the end of > - * main() > - */ > - main_func_sig->body.append_list(_variables); > - main_func_sig->body.append_list(_instructions); > + /* Shader inputs need to be lowered at the beginning of main() */ > + main_func_sig->body.head->insert_before(_instructions); > + main_func_sig->body.head->insert_before(_variables); >} > - } else { > - /* Shader inputs need to be lowered at the beginning of main() */ > - main_func_sig->body.head->insert_before(_instructions); > - main_func_sig->body.head->insert_before(_variables); > } > } > -- > 2.4.3 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 16/28] glsl: don't pack tessellation stages like we do other stages
On Wed, 2016-01-06 at 18:45 -0500, Ilia Mirkin wrote: > On Wed, Jan 6, 2016 at 6:40 PM, Timothy Arceri >wrote: > > On Wed, 2016-01-06 at 17:50 -0500, Ilia Mirkin wrote: > > > On Tue, Dec 29, 2015 at 12:00 AM, Timothy Arceri > > > wrote: > > > > Tessellation shaders treat varyings as shared memory and > > > > invocations > > > > can access each others varyings therefore we can't use the > > > > existing > > > > method to lower them. > > > > > > That's not strictly true... this is only true of tess control > > > outputs > > > (which can be written by the current invocation, but also read in > > > by > > > other invocations, effectively acting as a shared memory -- both > > > true > > > of per-invocation outputs as well as per-patch outputs). Does > > > that > > > information change this patch at all? > > > > I don't think so. The problem is that the current packing code > > works > > like this: > > > > - Change vars to be packed to temporaries, create new packed > > varyings. > > - Copy *all* values from the new packed input varying to the > > temporaries at the start of main. > > - Copy *all* values from the temporaries to the new packed output > > vars > > at the end of main (or before emit for GS). > > > > As well as the invocations stomping on each other this results in > > 32 > > (GL_MAX_PATCH_VERTICES?) copies for each TCS input as it just > > copies > > the full array. > > Presumably it also does this for GS? Although it's a lot more common > for a single GS invocation to consume Right. I thought about changing GS to do it different also but until the backend can clean this up better it would likely make things even worse. > > > > > The current packing just doesn't work well for tessellation, its > > easier > > to just disbale it for tessellation and do it all using a different > > method rather than trying to mix and match. > > I thought it already *was* disabled... but I think you still have to > have packing on TES outputs, because (a) your arguments against don't > apply and (b) it might feed into transform feedback, which i have > faint recollections must go through packing. Yeah its a bit of a mess. Gallium tries to always disable packing unless transform feedback is enabled. Are there any Gallium drivers where its not enabled?? Then there is code that disables it for tessellation (except TES outputs), as far as I understand it yes varyings for transform feedback must go through packing. In which case I do need to allow these to be lowered for TES outputs thanks for point it out, will change this. > > > > > > > > > > > > > > > > This adds a check for these stages as following patches will > > > > allow explicit locations to be lowered even when the driver and > > > > existing > > > > tesselation checks ask for it to be disabled, we do this to > > > > enable > > > > support > > > > for the component layout qualifier. > > > > --- > > > > src/glsl/lower_packed_varyings.cpp | 62 +- > > > > > > > > > > > > 1 file changed, 34 insertions(+), 28 deletions(-) > > > > > > > > diff --git a/src/glsl/lower_packed_varyings.cpp > > > > b/src/glsl/lower_packed_varyings.cpp > > > > index 2899846..e4e9a35 100644 > > > > --- a/src/glsl/lower_packed_varyings.cpp > > > > +++ b/src/glsl/lower_packed_varyings.cpp > > > > @@ -737,40 +737,46 @@ lower_packed_varyings(void *mem_ctx, > > > > unsigned > > > > locations_used, > > > >ir_variable_mode mode, unsigned > > > > gs_input_vertices, > > > >gl_shader *shader, bool > > > > disable_varying_packing) > > > > { > > > > - exec_list *instructions = shader->ir; > > > > ir_function *main_func = shader->symbols > > > > ->get_function("main"); > > > > exec_list void_parameters; > > > > ir_function_signature *main_func_sig > > > >= main_func->matching_signature(NULL, _parameters, > > > > false); > > > > - exec_list new_instructions, new_variables; > > > > - lower_packed_varyings_visitor visitor(mem_ctx, > > > > locations_used, > > > > mode, > > > > - gs_input_vertices, > > > > - _instructions, > > > > - _variables, > > > > - > > > > disable_varying_packing); > > > > - visitor.run(shader); > > > > - if (mode == ir_var_shader_out) { > > > > - if (shader->Stage == MESA_SHADER_GEOMETRY) { > > > > - /* For geometry shaders, outputs need to be lowered > > > > before each call > > > > - * to EmitVertex() > > > > - */ > > > > - lower_packed_varyings_gs_splicer splicer(mem_ctx, > > > > _instructions); > > > > - > > > > - /* Add all the variables in first. */ > > > > - main_func_sig->body.head > > > > ->insert_before(_variables); > > > > > > > > - /* Now update all the
Re: [Mesa-dev] [PATCH 3/3] llvmpipe: add sse code for fixed position calculation
Am 04.01.2016 um 20:38 schrieb Jose Fonseca: > On 02/01/16 20:39, srol...@vmware.com wrote: >> From: Roland Scheidegger>> >> This is quite a few less instructions, albeit still do the 2 64bit muls >> with scalar c code (they'd need way more shuffles, plus fixup for the >> signed >> mul so it totally doesn't seem worth it - x86 can do 32x32->64bit signed >> scalar muls natively just fine after all (even on 32bit). >> >> (This still doesn't have a measurable performance impact in reality, >> although >> profiler seems to say time spent in setup indeed has gone down by 10% >> or so >> overall.) >> --- >> src/gallium/drivers/llvmpipe/lp_setup_tri.c | 58 >> + >> 1 file changed, 50 insertions(+), 8 deletions(-) >> >> diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c >> b/src/gallium/drivers/llvmpipe/lp_setup_tri.c >> index cb1d715..fefd1c1 100644 >> --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c >> +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c >> @@ -65,11 +65,11 @@ fixed_to_float(int a) >> struct fixed_position { >> int32_t x[4]; >> int32_t y[4]; >> - int64_t area; >> int32_t dx01; >> int32_t dy01; >> int32_t dx20; >> int32_t dy20; >> + int64_t area; >> }; >> >> >> @@ -866,29 +866,71 @@ static void retry_triangle_ccw( struct >> lp_setup_context *setup, >> >> /** >>* Calculate fixed position data for a triangle >> + * It is unfortunate we need to do that here (as we need area >> + * calculated in fixed point), as there's quite some code duplication >> + * to what is done in the jit setup prog. >>*/ >> static inline void >> -calc_fixed_position( struct lp_setup_context *setup, >> - struct fixed_position* position, >> - const float (*v0)[4], >> - const float (*v1)[4], >> - const float (*v2)[4]) >> +calc_fixed_position(struct lp_setup_context *setup, >> +struct fixed_position* position, >> +const float (*v0)[4], >> +const float (*v1)[4], >> +const float (*v2)[4]) >> { >> + /* >> +* The rounding may not be quite the same with PIPE_ARCH_SSE >> +* (util_iround right now only does nearest/even on x87, >> +* otherwise nearest/away-from-zero). >> +* Both should be acceptable, I think. >> +*/ >> +#if defined(PIPE_ARCH_SSE) >> + __m128d v0r, v1r, v2r; >> + __m128 vxy0xy2, vxy1xy0; >> + __m128i vxy0xy2i, vxy1xy0i; >> + __m128i dxdy0120, x0x2y0y2, x1x0y1y0, x0120, y0120; >> + __m128 pix_offset = _mm_set1_ps(setup->pixel_offset); >> + __m128 fixed_one = _mm_set1_ps((float)FIXED_ONE); >> + v0r = _mm_load_sd((const double *)v0[0]); >> + v1r = _mm_load_sd((const double *)v1[0]); >> + v2r = _mm_load_sd((const double *)v2[0]); >> + vxy0xy2 = (__m128)_mm_unpacklo_pd(v0r, v2r); >> + vxy1xy0 = (__m128)_mm_unpacklo_pd(v1r, v0r); >> + vxy0xy2 = _mm_sub_ps(vxy0xy2, pix_offset); >> + vxy1xy0 = _mm_sub_ps(vxy1xy0, pix_offset); >> + vxy0xy2 = _mm_mul_ps(vxy0xy2, fixed_one); >> + vxy1xy0 = _mm_mul_ps(vxy1xy0, fixed_one); >> + vxy0xy2i = _mm_cvtps_epi32(vxy0xy2); >> + vxy1xy0i = _mm_cvtps_epi32(vxy1xy0); >> + dxdy0120 = _mm_sub_epi32(vxy0xy2i, vxy1xy0i); >> + _mm_store_si128((__m128i *)>dx01, dxdy0120); >> + /* >> +* For the mul, would need some more shuffles, plus emulation >> +* for the signed mul (without sse41), so don't bother. >> +*/ >> + x0x2y0y2 = _mm_shuffle_epi32(vxy0xy2i, _MM_SHUFFLE(3,1,2,0)); >> + x1x0y1y0 = _mm_shuffle_epi32(vxy1xy0i, _MM_SHUFFLE(3,1,2,0)); >> + x0120 = _mm_unpacklo_epi32(x0x2y0y2, x1x0y1y0); >> + y0120 = _mm_unpackhi_epi32(x0x2y0y2, x1x0y1y0); >> + _mm_store_si128((__m128i *)>x[0], x0120); >> + _mm_store_si128((__m128i *)>y[0], y0120); >> + >> +#else >> position->x[0] = subpixel_snap(v0[0][0] - setup->pixel_offset); >> position->x[1] = subpixel_snap(v1[0][0] - setup->pixel_offset); >> position->x[2] = subpixel_snap(v2[0][0] - setup->pixel_offset); >> - position->x[3] = 0; >> + position->x[3] = 0; // should be unused >> >> position->y[0] = subpixel_snap(v0[0][1] - setup->pixel_offset); >> position->y[1] = subpixel_snap(v1[0][1] - setup->pixel_offset); >> position->y[2] = subpixel_snap(v2[0][1] - setup->pixel_offset); >> - position->y[3] = 0; >> + position->y[3] = 0; // should be unused >> >> position->dx01 = position->x[0] - position->x[1]; >> position->dy01 = position->y[0] - position->y[1]; >> >> position->dx20 = position->x[2] - position->x[0]; >> position->dy20 = position->y[2] - position->y[0]; >> +#endif >> >> position->area = IMUL64(position->dx01, position->dy20) - >>IMUL64(position->dx20, position->dy01); >> > > LGTM too. > > Reviewed-by: Jose Fonseca Hmm actually I suppose I didn't do enough testing with that. This fails one piglit (completely
Re: [Mesa-dev] [PATCH 1/4] vbo: create a new draw function interface for indirect draws
On 01/04/2016 07:22 PM, Ilia Mirkin wrote: > Sure, no problem. Do you think you'll have time to look at it in the > next day or two though? Yes. I'll review it on Thursday. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH V2 16/35] glsl: move packing rules for tessellation stages into the packing code
Whoops, the subject line should be [PATCH V2 16/28] ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH V2 16/35] glsl: move packing rules for tessellation stages into the packing code
On Wed, 2016-01-06 at 20:19 -0500, Ilia Mirkin wrote: > On Wed, Jan 6, 2016 at 8:00 PM, Timothy Arceri >wrote: > > Following patches will allow packing of varyings with explicit > > locations > > via the component layout qualifier. Moving the rules here will > > enable > > us to call an alternate path for packing tessellation stages with > > explicit locations. > > --- > > V2: move the tessellation packing rules, allow TES output to be > > packed. > > > > src/glsl/link_varyings.cpp | 17 ++ > > src/glsl/lower_packed_varyings.cpp | 63 +- > > > > 2 files changed, 38 insertions(+), 42 deletions(-) > > > > diff --git a/src/glsl/link_varyings.cpp > > b/src/glsl/link_varyings.cpp > > index be662bc..69e24e3 100644 > > --- a/src/glsl/link_varyings.cpp > > +++ b/src/glsl/link_varyings.cpp > > @@ -1640,18 +1640,7 @@ assign_varying_locations(struct gl_context > > *ctx, > >assert(!ctx->Extensions.EXT_transform_feedback); > > } > > > > - /* Tessellation shaders treat inputs and outputs as shared > > memory and can > > -* access inputs and outputs of other invocations. > > -* Therefore, they can't be lowered to temps easily (and > > definitely not > > -* efficiently). > > -*/ > > - bool disable_varying_packing = > > - ctx->Const.DisableVaryingPacking || > > - (consumer && consumer->Stage == MESA_SHADER_TESS_EVAL) || > > - (consumer && consumer->Stage == MESA_SHADER_TESS_CTRL) || > > - (producer && producer->Stage == MESA_SHADER_TESS_CTRL); > > - > > - varying_matches matches(disable_varying_packing, > > + varying_matches matches(ctx->Const.DisableVaryingPacking, > > producer ? producer->Stage : > > (gl_shader_stage)-1, > > consumer ? consumer->Stage : > > (gl_shader_stage)-1); > > hash_table *tfeedback_candidates > > @@ -1864,13 +1853,13 @@ assign_varying_locations(struct gl_context > > *ctx, > > > > if (producer) { > >lower_packed_varyings(mem_ctx, slots_used, > > ir_var_shader_out, > > -0, producer, disable_varying_packing); > > +0, producer, ctx > > ->Const.DisableVaryingPacking); > > } > > > > if (consumer) { > >lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_in, > > consumer_vertices, consumer, > > -disable_varying_packing); > > +ctx->Const.DisableVaryingPacking); > > } > > > > return true; > > diff --git a/src/glsl/lower_packed_varyings.cpp > > b/src/glsl/lower_packed_varyings.cpp > > index 2899846..4723c2b 100644 > > --- a/src/glsl/lower_packed_varyings.cpp > > +++ b/src/glsl/lower_packed_varyings.cpp > > @@ -737,40 +737,47 @@ lower_packed_varyings(void *mem_ctx, unsigned > > locations_used, > >ir_variable_mode mode, unsigned > > gs_input_vertices, > >gl_shader *shader, bool > > disable_varying_packing) > > { > > - exec_list *instructions = shader->ir; > > ir_function *main_func = shader->symbols->get_function("main"); > > exec_list void_parameters; > > ir_function_signature *main_func_sig > >= main_func->matching_signature(NULL, _parameters, > > false); > > - exec_list new_instructions, new_variables; > > - lower_packed_varyings_visitor visitor(mem_ctx, locations_used, > > mode, > > - gs_input_vertices, > > - _instructions, > > - _variables, > > - disable_varying_packing); > > - visitor.run(shader); > > - if (mode == ir_var_shader_out) { > > - if (shader->Stage == MESA_SHADER_GEOMETRY) { > > - /* For geometry shaders, outputs need to be lowered > > before each call > > - * to EmitVertex() > > - */ > > - lower_packed_varyings_gs_splicer splicer(mem_ctx, > > _instructions); > > - > > - /* Add all the variables in first. */ > > - main_func_sig->body.head->insert_before(_variables); > > > > - /* Now update all the EmitVertex instances */ > > - splicer.run(instructions); > > + if (!(shader->Stage == MESA_SHADER_TESS_CTRL || > > + (shader->Stage == MESA_SHADER_TESS_EVAL && > > + mode == ir_var_shader_in))) { > > To match what was being done before (and IMHO much more readable, you > might do something like > > if (shader->Stage == TESS_CTRL || (shader->stage == TESS_EVAL && mode > == in)) > disable_varying_packing = true; > > That would be equivalent to what was being done before right? Or if > you don't want the disable_varying_packing thing to be set to true in > that case, you could just return... would still be easier to read and > avoid the extra indent. > > -ilia The indenting is to reduce
Re: [Mesa-dev] [PATCH 09/23] radeonsi: use EXP_NULL for pixel shaders without outputs
On 06.01.2016 21:41, Marek Olšák wrote: > From: Marek Olšák> > This never happens currently. So this change is untested? If so, how confident are you that this won't break if it ever actually gets hit? :) -- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: quieten compiler warning about out-of-bounds access
On Tue, Jan 5, 2016 at 8:30 PM, Jason Ekstrandwrote: > > On Jan 4, 2016 8:30 PM, "Ilia Mirkin" wrote: >> >> gcc 4.9.3 shows the following error: >> >> brw_vue_map.c:260:20: warning: array subscript is above array bounds >> [-Warray-bounds] >> return brw_names[slot - VARYING_SLOT_MAX]; >> >> This is because BRW_VARYING_SLOT_COUNT is a valid value for the enum >> type. Adding an assert will generate no additional code but will teach >> the compiler to not complain. > > What of you build in release mode? You get the warning :) -ilia ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH V2 16/35] glsl: move packing rules for tessellation stages into the packing code
On Wed, Jan 6, 2016 at 8:00 PM, Timothy Arceriwrote: > Following patches will allow packing of varyings with explicit locations > via the component layout qualifier. Moving the rules here will enable > us to call an alternate path for packing tessellation stages with > explicit locations. > --- > V2: move the tessellation packing rules, allow TES output to be packed. > > src/glsl/link_varyings.cpp | 17 ++ > src/glsl/lower_packed_varyings.cpp | 63 > +- > 2 files changed, 38 insertions(+), 42 deletions(-) > > diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp > index be662bc..69e24e3 100644 > --- a/src/glsl/link_varyings.cpp > +++ b/src/glsl/link_varyings.cpp > @@ -1640,18 +1640,7 @@ assign_varying_locations(struct gl_context *ctx, >assert(!ctx->Extensions.EXT_transform_feedback); > } > > - /* Tessellation shaders treat inputs and outputs as shared memory and can > -* access inputs and outputs of other invocations. > -* Therefore, they can't be lowered to temps easily (and definitely not > -* efficiently). > -*/ > - bool disable_varying_packing = > - ctx->Const.DisableVaryingPacking || > - (consumer && consumer->Stage == MESA_SHADER_TESS_EVAL) || > - (consumer && consumer->Stage == MESA_SHADER_TESS_CTRL) || > - (producer && producer->Stage == MESA_SHADER_TESS_CTRL); > - > - varying_matches matches(disable_varying_packing, > + varying_matches matches(ctx->Const.DisableVaryingPacking, > producer ? producer->Stage : (gl_shader_stage)-1, > consumer ? consumer->Stage : (gl_shader_stage)-1); > hash_table *tfeedback_candidates > @@ -1864,13 +1853,13 @@ assign_varying_locations(struct gl_context *ctx, > > if (producer) { >lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_out, > -0, producer, disable_varying_packing); > +0, producer, ctx->Const.DisableVaryingPacking); > } > > if (consumer) { >lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_in, > consumer_vertices, consumer, > -disable_varying_packing); > +ctx->Const.DisableVaryingPacking); > } > > return true; > diff --git a/src/glsl/lower_packed_varyings.cpp > b/src/glsl/lower_packed_varyings.cpp > index 2899846..4723c2b 100644 > --- a/src/glsl/lower_packed_varyings.cpp > +++ b/src/glsl/lower_packed_varyings.cpp > @@ -737,40 +737,47 @@ lower_packed_varyings(void *mem_ctx, unsigned > locations_used, >ir_variable_mode mode, unsigned gs_input_vertices, >gl_shader *shader, bool disable_varying_packing) > { > - exec_list *instructions = shader->ir; > ir_function *main_func = shader->symbols->get_function("main"); > exec_list void_parameters; > ir_function_signature *main_func_sig >= main_func->matching_signature(NULL, _parameters, false); > - exec_list new_instructions, new_variables; > - lower_packed_varyings_visitor visitor(mem_ctx, locations_used, mode, > - gs_input_vertices, > - _instructions, > - _variables, > - disable_varying_packing); > - visitor.run(shader); > - if (mode == ir_var_shader_out) { > - if (shader->Stage == MESA_SHADER_GEOMETRY) { > - /* For geometry shaders, outputs need to be lowered before each call > - * to EmitVertex() > - */ > - lower_packed_varyings_gs_splicer splicer(mem_ctx, > _instructions); > - > - /* Add all the variables in first. */ > - main_func_sig->body.head->insert_before(_variables); > > - /* Now update all the EmitVertex instances */ > - splicer.run(instructions); > + if (!(shader->Stage == MESA_SHADER_TESS_CTRL || > + (shader->Stage == MESA_SHADER_TESS_EVAL && > + mode == ir_var_shader_in))) { To match what was being done before (and IMHO much more readable, you might do something like if (shader->Stage == TESS_CTRL || (shader->stage == TESS_EVAL && mode == in)) disable_varying_packing = true; That would be equivalent to what was being done before right? Or if you don't want the disable_varying_packing thing to be set to true in that case, you could just return... would still be easier to read and avoid the extra indent. -ilia > + exec_list *instructions = shader->ir; > + exec_list new_instructions, new_variables; > + > + lower_packed_varyings_visitor visitor(mem_ctx, locations_used, mode, > +gs_input_vertices, > +_instructions, > +
[Mesa-dev] [PATCH] i965: Explicitly write the "TR DS Cache Disable" bit at TCS EOT.
Bit 0 of the Patch Header is "TR DS Cache Disable". Setting that bit disables the DS Cache for tessellator-output topologies resulting in stitch-transition regions (but leaves it enabled for other cases). We probably shouldn't leave this to chance - the URB could contain garbage - which could result in the cache randomly being turned on or off. This patch makes the final EOT write 0 to the first DWord (which only contains this one bit). This ensures the cache is always on. Signed-off-by: Kenneth Graunke--- src/mesa/drivers/dri/i965/brw_vec4.cpp | 2 +- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 5 - src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index c6a52c5..20e6305 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -274,9 +274,9 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst) case SHADER_OPCODE_INT_QUOTIENT: case SHADER_OPCODE_INT_REMAINDER: case SHADER_OPCODE_POW: + case TCS_OPCODE_THREAD_END: return 2; case VS_OPCODE_URB_WRITE: - case TCS_OPCODE_THREAD_END: return 1; case VS_OPCODE_PULL_CONSTANT_LOAD: return 2; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 6b03a1c..7ae1059 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -981,15 +981,18 @@ generate_tcs_thread_end(struct brw_codegen *p, vec4_instruction *inst) brw_set_default_access_mode(p, BRW_ALIGN_1); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_MOV(p, header, brw_imm_ud(0)); + brw_MOV(p, get_element_ud(header, 5), brw_imm_ud(WRITEMASK_X << 8)); brw_MOV(p, get_element_ud(header, 0), retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD)); + brw_MOV(p, brw_message_reg(inst->base_mrf + 1), brw_imm_ud(0u)); brw_pop_insn_state(p); brw_urb_WRITE(p, brw_null_reg(), /* dest */ inst->base_mrf, /* starting mrf reg nr */ header, - BRW_URB_WRITE_EOT | inst->urb_write_flags, + BRW_URB_WRITE_EOT | BRW_URB_WRITE_OWORD | + BRW_URB_WRITE_USE_CHANNEL_MASKS, inst->mlen, 0, /* response len */ 0, /* urb destination offset */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp index fb6ca8e..a65a633 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp @@ -205,7 +205,7 @@ vec4_tcs_visitor::emit_thread_end() inst = emit(TCS_OPCODE_THREAD_END); inst->base_mrf = 14; - inst->mlen = 1; + inst->mlen = 2; } -- 2.6.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] draw: fix line stippling with unfilled prims
From: Roland ScheideggerThe unfilled stage was not filling in the prim header, and the line stage then decided to reset the stipple counter or not based on the uninitialized data. This causes some failures in conform linestipple test (albeit quite randomly happening depending on environment). So fill in the prim header in the unfilled stage - I am not entirely sure if anybody really needs determinant after that stage, but there's at least later stages (wide line for instance) which copy over the determinant as well. --- src/gallium/auxiliary/draw/draw_pipe_unfilled.c | 56 + 1 file changed, 38 insertions(+), 18 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c index 8e6435c..b9ded14 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c +++ b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c @@ -86,27 +86,33 @@ inject_front_face_info(struct draw_stage *stage, } -static void point( struct draw_stage *stage, - struct vertex_header *v0 ) +static void point(struct draw_stage *stage, + struct prim_header *header, + struct vertex_header *v0) { struct prim_header tmp; + tmp.det = header->det; + tmp.flags = 0; tmp.v[0] = v0; - stage->next->point( stage->next, ); + stage->next->point(stage->next, ); } -static void line( struct draw_stage *stage, - struct vertex_header *v0, - struct vertex_header *v1 ) +static void line(struct draw_stage *stage, + struct prim_header *header, + struct vertex_header *v0, + struct vertex_header *v1) { struct prim_header tmp; + tmp.det = header->det; + tmp.flags = 0; tmp.v[0] = v0; tmp.v[1] = v1; - stage->next->line( stage->next, ); + stage->next->line(stage->next, ); } -static void points( struct draw_stage *stage, - struct prim_header *header ) +static void points(struct draw_stage *stage, + struct prim_header *header) { struct vertex_header *v0 = header->v[0]; struct vertex_header *v1 = header->v[1]; @@ -114,27 +120,41 @@ static void points( struct draw_stage *stage, inject_front_face_info(stage, header); - if ((header->flags & DRAW_PIPE_EDGE_FLAG_0) && v0->edgeflag) point( stage, v0 ); - if ((header->flags & DRAW_PIPE_EDGE_FLAG_1) && v1->edgeflag) point( stage, v1 ); - if ((header->flags & DRAW_PIPE_EDGE_FLAG_2) && v2->edgeflag) point( stage, v2 ); + if ((header->flags & DRAW_PIPE_EDGE_FLAG_0) && v0->edgeflag) + point(stage, header, v0); + if ((header->flags & DRAW_PIPE_EDGE_FLAG_1) && v1->edgeflag) + point(stage, header, v1); + if ((header->flags & DRAW_PIPE_EDGE_FLAG_2) && v2->edgeflag) + point(stage, header, v2); } -static void lines( struct draw_stage *stage, - struct prim_header *header ) +static void lines(struct draw_stage *stage, + struct prim_header *header) { struct vertex_header *v0 = header->v[0]; struct vertex_header *v1 = header->v[1]; struct vertex_header *v2 = header->v[2]; if (header->flags & DRAW_PIPE_RESET_STIPPLE) - stage->next->reset_stipple_counter( stage->next ); + /* + * XXX could revisit this. The only stage which cares is the line + * stipple stage. Could just emit correct reset flags here and not + * bother about all the calling through reset_stipple_counter + * stages. Though technically it is necessary if line stipple is + * handled by the driver, but this is not actually hooked up when + * using vbuf (vbuf stage reset_stipple_counter does nothing). + */ + stage->next->reset_stipple_counter(stage->next); inject_front_face_info(stage, header); - if ((header->flags & DRAW_PIPE_EDGE_FLAG_2) && v2->edgeflag) line( stage, v2, v0 ); - if ((header->flags & DRAW_PIPE_EDGE_FLAG_0) && v0->edgeflag) line( stage, v0, v1 ); - if ((header->flags & DRAW_PIPE_EDGE_FLAG_1) && v1->edgeflag) line( stage, v1, v2 ); + if ((header->flags & DRAW_PIPE_EDGE_FLAG_2) && v2->edgeflag) + line(stage, header, v2, v0); + if ((header->flags & DRAW_PIPE_EDGE_FLAG_0) && v0->edgeflag) + line(stage, header, v0, v1); + if ((header->flags & DRAW_PIPE_EDGE_FLAG_1) && v1->edgeflag) + line(stage, header, v1, v2); } -- 2.1.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] nv50/ir: don't touch degree on physreg RIG nodes
These nodes don't go through reduction, so we shouldn't be increasing their degrees. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91895 Signed-off-by: Ilia MirkinCc: "11.0 11.1" --- I would like to see a *bunch* of testing on this before merging it... RA-land is far from my expertise. However it does fix the shaders in the original bug and doesn't regress the few additional things that I tried. src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp index cd8c42c..f1ffcba 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp @@ -1129,9 +1129,11 @@ GCRA::doCoalesce(ArrayList& insns, unsigned int mask) void GCRA::RIG_Node::addInterference(RIG_Node *node) { - this->degree += relDegree[node->colors][colors]; - node->degree += relDegree[colors][node->colors]; - + // don't add degree for physregs since they won't go through simplify() + if (this->reg < 0) + this->degree += relDegree[node->colors][colors]; + if (node->reg < 0) + node->degree += relDegree[colors][node->colors]; this->attach(node, Graph::Edge::CROSS); } -- 2.4.10 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 09/28] glsl: validate and store component layout qualifier in GLSL IR
On Mon, Dec 28, 2015 at 9:00 PM, Timothy Arceriwrote: > We make use of the existing IR field location_frac used for tracking > component locations. > --- > src/glsl/ast_to_hir.cpp | 38 ++ > src/glsl/ir.h | 5 + > 2 files changed, 43 insertions(+) > > diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp > index 1091c02..bb35d72 100644 > --- a/src/glsl/ast_to_hir.cpp > +++ b/src/glsl/ast_to_hir.cpp > @@ -3075,10 +3075,42 @@ apply_layout_qualifier_to_variable(const struct > ast_type_qualifier *qual, > > if (qual->flags.q.explicit_location) { >apply_explicit_location(qual, var, state, loc); > + > + if (qual->flags.q.explicit_component) { > + unsigned qual_component; > + if (process_qualifier_constant(state, loc, "component", > +qual->component, _component)) { > +const glsl_type *type = var->type->without_array(); > +unsigned components = type->component_slots(); > + > +if (type->is_matrix() || type->is_record()) { > + _mesa_glsl_error(loc, state, "component layout qualifier " > +"cannot be applied to a matrix, a structure, > " > +"a block, or an array containing any of " > +"these."); > +} else if (qual_component != 0 && > +(qual_component + components - 1) > 3) { > + _mesa_glsl_error(loc, state, "component overflow (%u > 3)", > +(qual_component + components - 1)); > +} else if (qual_component == 1 && type->is_double()) { > + /* We don't bother checking for 3 as it should be caught by > the > +* overflow check above. > +*/ > + _mesa_glsl_error(loc, state, "doubles cannot begin at " > +"component 1 or 3"); > +} else { > + var->data.explicit_component = true; > + var->data.location_frac = qual_component; > +} > + } > + } > } else if (qual->flags.q.explicit_index) { >if (!qual->flags.q.subroutine_def) > _mesa_glsl_error(loc, state, >"explicit index requires explicit location"); > + } else if (qual->flags.q.explicit_component) { > + _mesa_glsl_error(loc, state, > + "explicit component requires explicit location"); > } > > if (qual->flags.q.explicit_binding) { > @@ -6660,6 +6692,12 @@ ast_interface_block::hir(exec_list *instructions, > "Interface block sets both readonly and writeonly"); > } > > + if (this->layout.flags.q.explicit_component) { > + _mesa_glsl_error(, state, "component layout qualifier cannot be " > + "applied to a matrix, a structure, a block, or an " > + "array containing any of these."); > + } > + > unsigned qual_stream; > if (!process_qualifier_constant(state, , "stream", > this->layout.stream, > _stream) || > diff --git a/src/glsl/ir.h b/src/glsl/ir.h > index 159f94d..d604a1f 100644 > --- a/src/glsl/ir.h > +++ b/src/glsl/ir.h > @@ -714,6 +714,11 @@ public: >unsigned explicit_binding:1; > >/** > + * Was an initial component explicitly set in the shader? > + */ > + unsigned explicit_component:1; > + > + /** > * Does this variable have an initializer? > * > * This is used by the linker to cross-validiate initializers of global > -- > 2.4.3 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev Reviewed-by: Anuj Phogat ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 06/28] glsl: fix overlapping of varying locations for arrays and structs
On Mon, Dec 28, 2015 at 9:00 PM, Timothy Arceriwrote: > Previously we were only reserving a single location for arrays and > structs. > > We also didn't take into account implicit locations clashing with > explicit locations when assigning locations for their arrays or > structs. > > This patch fixes both issues. > > V5: fix regression for patch inputs/outputs in tessellation shaders > V4: just use count_attribute_slots() to get the number of slots, > also calculate the correct number of slots to reserve for gs and > tess stages by making use of the new get_varying_type() helper. > V3: handle arrays of structs > V2: also fix for arrays of arrays and structs. > --- > src/glsl/link_varyings.cpp | 80 > +++--- > 1 file changed, 68 insertions(+), 12 deletions(-) > > diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp > index d9550df..34e8418 100644 > --- a/src/glsl/link_varyings.cpp > +++ b/src/glsl/link_varyings.cpp > @@ -825,7 +825,8 @@ public: > gl_shader_stage consumer_stage); > ~varying_matches(); > void record(ir_variable *producer_var, ir_variable *consumer_var); > - unsigned assign_locations(uint64_t reserved_slots, bool separate_shader); > + unsigned assign_locations(struct gl_shader_program *prog, > + uint64_t reserved_slots, bool separate_shader); > void store_locations() const; > > private: > @@ -1042,7 +1043,9 @@ varying_matches::record(ir_variable *producer_var, > ir_variable *consumer_var) > * passed to varying_matches::record(). > */ > unsigned > -varying_matches::assign_locations(uint64_t reserved_slots, bool > separate_shader) > +varying_matches::assign_locations(struct gl_shader_program *prog, > + uint64_t reserved_slots, > + bool separate_shader) > { > /* We disable varying sorting for separate shader programs for the > * following reasons: > @@ -1079,10 +1082,21 @@ varying_matches::assign_locations(uint64_t > reserved_slots, bool separate_shader) > for (unsigned i = 0; i < this->num_matches; i++) { >unsigned *location = _location; > > - if ((this->matches[i].consumer_var && > - this->matches[i].consumer_var->data.patch) || > - (this->matches[i].producer_var && > - this->matches[i].producer_var->data.patch)) > + const ir_variable *var; > + const glsl_type *type; > + bool is_vertex_input = false; > + if (matches[i].consumer_var) { > + var = matches[i].consumer_var; > + type = get_varying_type(var, consumer_stage); > + is_vertex_input = false; This is not required. is_vertex_input is already initialized to false. > + if (consumer_stage == MESA_SHADER_VERTEX) > +is_vertex_input = true; > + } else { > + var = matches[i].producer_var; > + type = get_varying_type(var, producer_stage); > + } > + > + if (var->data.patch) > location = _patch_location; > >/* Advance to the next slot if this varying has a different packing > @@ -1094,9 +1108,45 @@ varying_matches::assign_locations(uint64_t > reserved_slots, bool separate_shader) >!= this->matches[i].packing_class) { > *location = ALIGN(*location, 4); >} > - while ((*location < MAX_VARYING * 4u) && > -(reserved_slots & (1u << *location / 4u))) { > - *location = ALIGN(*location + 1, 4); > + > + unsigned num_elements = type->count_attribute_slots(is_vertex_input); > + unsigned slot_end = this->disable_varying_packing ? 4 : > + type->without_array()->vector_elements; > + slot_end += *location - 1; > + > + /* FIXME: We could be smarter in the below code and loop back over > + * trying to fill any locations that we skipped because we couldn't > pack > + * the varying between an explicit location. For now just let the user > + * hit the linking error if we run out of room and suggest they use > + * explicit locations. > + */ > + for (unsigned j = 0; j < num_elements; j++) { > + while ((slot_end < MAX_VARYING * 4u) && > +((reserved_slots & (1u << *location / 4u) || > + (reserved_slots & (1u << slot_end / 4u) { > + > +*location = ALIGN(*location + 1, 4); > +slot_end = *location; > + > +/* reset the counter and try again */ > +j = 0; > + } > + > + /* Increase the slot to make sure there is enough room for next > + * array element. > + */ > + if (this->disable_varying_packing) > +slot_end += 4; > + else > +slot_end += type->without_array()->vector_elements; > + } > + > + if (!var->data.patch && *location >= MAX_VARYING * 4u) { > + linker_error(prog, "insufficient
Re: [Mesa-dev] [PATCH 07/28] glsl: don't try adding build-ins to explicit locations bitmask
s/build-ins/built-ins in commit message On Mon, Dec 28, 2015 at 9:00 PM, Timothy Arceriwrote: > --- > src/glsl/link_varyings.cpp | 4 +++- > 1 file changed, 3 insertions(+), 1 deletion(-) > > diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp > index 34e8418..ee7cae0 100644 > --- a/src/glsl/link_varyings.cpp > +++ b/src/glsl/link_varyings.cpp > @@ -1530,7 +1530,9 @@ reserved_varying_slot(struct gl_shader *stage, > ir_variable_mode io_mode) > foreach_in_list(ir_instruction, node, stage->ir) { >ir_variable *const var = node->as_variable(); > > - if (var == NULL || var->data.mode != io_mode || > !var->data.explicit_location) > + if (var == NULL || var->data.mode != io_mode || > + !var->data.explicit_location || > + var->data.location < VARYING_SLOT_VAR0) > continue; > >var_slot = var->data.location - VARYING_SLOT_VAR0; > -- > 2.4.3 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev Reviewed-by: Anuj Phogat ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 08/28] glsl: parse component layout qualifier
On Mon, Dec 28, 2015 at 9:00 PM, Timothy Arceriwrote: > --- > src/glsl/ast.h | 14 ++ > src/glsl/ast_type.cpp | 3 +++ > src/glsl/glsl_parser.yy | 11 +++ > 3 files changed, 28 insertions(+) > > diff --git a/src/glsl/ast.h b/src/glsl/ast.h > index f8ab0b7..e22deed 100644 > --- a/src/glsl/ast.h > +++ b/src/glsl/ast.h > @@ -490,6 +490,12 @@ struct ast_type_qualifier { > */ > unsigned explicit_index:1; > > +/** > + * Flag set if GL_ARB_enhanced_layouts "component" layout > + * qualifier is used. > + */ > +unsigned explicit_component:1; > + > /** >* Flag set if GL_ARB_shading_language_420pack "binding" layout >* qualifier is used. > @@ -595,6 +601,14 @@ struct ast_type_qualifier { > */ > ast_expression *index; > > + /** > +* Component specified via GL_ARB_enhaced_layouts > +* > +* \note > +* This field is only valid if \c explicit_component is set. > +*/ > + ast_expression *component; > + > /** Maximum output vertices in GLSL 1.50 geometry shaders. */ > ast_layout_expression *max_vertices; > > diff --git a/src/glsl/ast_type.cpp b/src/glsl/ast_type.cpp > index 8643b7b..7330a34 100644 > --- a/src/glsl/ast_type.cpp > +++ b/src/glsl/ast_type.cpp > @@ -273,6 +273,9 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc, > if (q.flags.q.explicit_index) >this->index = q.index; > > + if (q.flags.q.explicit_component) > + this->component = q.component; > + > if (q.flags.q.explicit_binding) >this->binding = q.binding; > > diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy > index 51796a6..6b634f2 100644 > --- a/src/glsl/glsl_parser.yy > +++ b/src/glsl/glsl_parser.yy > @@ -1476,6 +1476,17 @@ layout_qualifier_id: > $$.location = $3; >} > > + if (match_layout_qualifier("component", $1, state) == 0) { > + if (!state->has_enhanced_layouts()) { > +_mesa_glsl_error(& @1, state, > + "component qualifier requires " > + "GLSL 4.40 or ARB_enhanced_layouts"); > + } else { > +$$.flags.q.explicit_component = 1; > +$$.component = $3; > + } > + } > + >if (match_layout_qualifier("index", $1, state) == 0) { > if (state->es_shader && !state->EXT_blend_func_extended_enable) { > _mesa_glsl_error(& @3, state, "index layout qualifier requires > EXT_blend_func_extended"); > -- > 2.4.3 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev Reviewed-by: Anuj Phogat ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 10/28] glsl: fix cross validation for explicit locations on structs and arrays
On Mon, Dec 28, 2015 at 9:00 PM, Timothy Arceriwrote: > --- > src/glsl/link_varyings.cpp | 43 ++- > 1 file changed, 30 insertions(+), 13 deletions(-) > > diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp > index ee7cae0..dea8741 100644 > --- a/src/glsl/link_varyings.cpp > +++ b/src/glsl/link_varyings.cpp > @@ -239,18 +239,24 @@ cross_validate_outputs_to_inputs(struct > gl_shader_program *prog, > /* User-defined varyings with explicit locations are handled >* differently because they do not need to have matching names. >*/ > - const unsigned idx = var->data.location - VARYING_SLOT_VAR0; > + const glsl_type *type = get_varying_type(var, producer->Stage); > + unsigned num_elements = type->count_attribute_slots(false); > + unsigned idx = var->data.location - VARYING_SLOT_VAR0; > + unsigned slot_limit = idx + num_elements; > > - if (explicit_locations[idx] != NULL) { > -linker_error(prog, > + while(idx < slot_limit) { > +if (explicit_locations[idx] != NULL) { > + linker_error(prog, > "%s shader has multiple outputs explicitly " > "assigned to location %d\n", > _mesa_shader_stage_to_string(producer->Stage), > idx); > -return; > - } > + return; > +} > > - explicit_locations[idx] = var; > +explicit_locations[idx] = var; > +idx++; > + } >} > } > > @@ -298,14 +304,25 @@ cross_validate_outputs_to_inputs(struct > gl_shader_program *prog, > ir_variable *output = NULL; > if (input->data.explicit_location > && input->data.location >= VARYING_SLOT_VAR0) { > -output = explicit_locations[input->data.location - > VARYING_SLOT_VAR0]; > > -if (output == NULL) { > - linker_error(prog, > -"%s shader input `%s' with explicit location " > -"has no matching output\n", > -_mesa_shader_stage_to_string(consumer->Stage), > -input->name); > +const glsl_type *type = get_varying_type(input, consumer->Stage); > +unsigned num_elements = type->count_attribute_slots(false); > +unsigned idx = input->data.location - VARYING_SLOT_VAR0; > +unsigned slot_limit = idx + num_elements; > + > +while(idx < slot_limit) { > + output = explicit_locations[idx]; > + > + if (output == NULL || > + input->data.location != output->data.location) { > + linker_error(prog, > + "%s shader input `%s' with explicit location " > + "has no matching output\n", > + _mesa_shader_stage_to_string(consumer->Stage), > + input->name); > + break; > + } > + idx++; > } > } else { > output = parameters.get_variable(input->name); > -- > 2.4.3 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev Reviewed-by: Anuj Phogat ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/3] st/mesa: check state->mesa in early return check in st_validate_state()
On 06/01/16 16:50, Brian Paul wrote: We were checking the dirty->st flags but not the dirty->mesa flags. When we took the early return, we didn't clear the dirty->mesa flags so the next time we called st_validate_state() we'd often flush the glBitmap cache. And since st_validate_state() is called from st_Bitmap(), it meant we flushed the bitmap cache for every glBitmap() call. This change seems to recover most of the performance loss observed with the ipers demo on llvmpipe since commit commit 36c93a6fae27561. Cc: mesa-sta...@lists.freedesktop.org --- src/mesa/state_tracker/st_atom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/state_tracker/st_atom.c b/src/mesa/state_tracker/st_atom.c index 43dbadd..c1a9d00 100644 --- a/src/mesa/state_tracker/st_atom.c +++ b/src/mesa/state_tracker/st_atom.c @@ -188,7 +188,7 @@ void st_validate_state( struct st_context *st ) st_manager_validate_framebuffers(st); - if (state->st == 0) + if (state->st == 0 && state->mesa == 0) return; /*printf("%s %x/%x\n", __func__, state->mesa, state->st);*/ Good stuff. Series looks good to me. Reviewed-by: Jose FonsecaJose ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] glsl: fix varying slot allocation for blocks and structs with explicit locations
On Wed, 2016-01-06 at 09:46 -0500, Ilia Mirkin wrote: > On Wed, Jan 6, 2016 at 4:32 AM, Timothy Arceri >wrote: > > Previously each member was being counted as using a single slot, > > count_attribute_slots() fixes the count for array and struct > > members. > > > > Also don't assign a negitive to the unsigned expl_location > > variable. > > --- > > > > Fixes these new piglit tests: > >http://patchwork.freedesktop.org/patch/69531/ > > > > src/glsl/ast_to_hir.cpp | 9 + > > 1 file changed, 5 insertions(+), 4 deletions(-) > > > > diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp > > index 0197cdc..50d5e22 100644 > > --- a/src/glsl/ast_to_hir.cpp > > +++ b/src/glsl/ast_to_hir.cpp > > @@ -6408,12 +6408,13 @@ > > ast_process_struct_or_iface_block_members(exec_list *instructions, > > if (process_qualifier_constant(state, , > > "location", > > qual->location, > > _location)) { > > fields[i].location = VARYING_SLOT_VAR0 + > > qual_location; > > - expl_location = fields[i].location + 1; > > + expl_location = fields[i].location + > > + fields[i].type->count_attribute_slots(false); > > } > > } else { > > if (layout && layout->flags.q.explicit_location) { > > fields[i].location = expl_location; > > - expl_location = expl_location + 1; > > + expl_location += fields[i].type > > ->count_attribute_slots(false); > > } else { > > fields[i].location = -1; > > } > > @@ -6570,7 +6571,7 @@ ast_struct_specifier::hir(exec_list > > *instructions, > > > > state->struct_specifier_depth++; > > > > - unsigned expl_location = -1; > > + unsigned expl_location = 0; > > if (layout && layout->flags.q.explicit_location) { > >if (!process_qualifier_constant(state, , "location", > >layout->location, > > _location)) { > > @@ -6763,7 +6764,7 @@ ast_interface_block::hir(exec_list > > *instructions, > >return NULL; > > } > > > > - unsigned expl_location = -1; > > + unsigned expl_location = 0; > > There are a number of places that check for location != -1 as a > sanity > check... won't this defeat that? No because we only use expl_location when the explicit location flag is set and if there is an error we don't copy the value from expl_location. I believe I initialised it to stop gcc complaining although I just tried removing this and it no longer complains so I guess I can just remove the initialisation altogether. Are you happy with the change otherwise? > > > if (layout.flags.q.explicit_location) { > >if (!process_qualifier_constant(state, , "location", > >layout.location, > > _location)) { > > -- > > 2.4.3 > > > > ___ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 4/4] main: s/GLuint/GLbitfield for state bitmasks
On 06/01/16 16:51, Brian Paul wrote: --- src/mesa/main/api_arrayelt.c | 4 ++-- src/mesa/main/api_arrayelt.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mesa/main/api_arrayelt.c b/src/mesa/main/api_arrayelt.c index 92d8238..c84db5f 100644 --- a/src/mesa/main/api_arrayelt.c +++ b/src/mesa/main/api_arrayelt.c @@ -65,7 +65,7 @@ typedef struct { typedef struct { AEarray arrays[32]; AEattrib attribs[VERT_ATTRIB_MAX + 1]; - GLuint NewState; + GLbitfield NewState; /* List of VBOs we need to map before executing ArrayElements */ struct gl_buffer_object *vbo[VERT_ATTRIB_MAX]; @@ -1802,7 +1802,7 @@ _ae_ArrayElement(GLint elt) void -_ae_invalidate_state(struct gl_context *ctx, GLuint new_state) +_ae_invalidate_state(struct gl_context *ctx, GLbitfield new_state) { AEcontext *actx = AE_CONTEXT(ctx); diff --git a/src/mesa/main/api_arrayelt.h b/src/mesa/main/api_arrayelt.h index 39fdeb9..03cd9ec 100644 --- a/src/mesa/main/api_arrayelt.h +++ b/src/mesa/main/api_arrayelt.h @@ -33,7 +33,7 @@ extern GLboolean _ae_create_context( struct gl_context *ctx ); extern void _ae_destroy_context( struct gl_context *ctx ); -extern void _ae_invalidate_state( struct gl_context *ctx, GLuint new_state ); +extern void _ae_invalidate_state( struct gl_context *ctx, GLbitfield new_state ); extern void GLAPIENTRY _ae_ArrayElement( GLint elt ); /* May optionally be called before a batch of element calls: Series is Reviewed-by: Jose Fonseca___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] st/mesa: check texture target in allocate_full_mipmap()
On 06/01/16 17:10, Brian Paul wrote: Some kinds of textures never have mipmaps. 3D textures seldom have mipmaps. --- src/mesa/state_tracker/st_cb_texture.c | 14 ++ 1 file changed, 14 insertions(+) diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 867d4da..f8b3679 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -404,6 +404,16 @@ static boolean allocate_full_mipmap(const struct st_texture_object *stObj, const struct st_texture_image *stImage) { + switch (stObj->base.Target) { + case GL_TEXTURE_RECTANGLE_NV: + case GL_TEXTURE_BUFFER: + case GL_TEXTURE_EXTERNAL_OES: + case GL_TEXTURE_2D_MULTISAMPLE: + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: + /* these texture types cannot be mipmapped */ + return FALSE; + } + Makes sense. if (stImage->base.Level > 0 || stObj->base.GenerateMipmap) return TRUE; @@ -420,6 +430,10 @@ allocate_full_mipmap(const struct st_texture_object *stObj, /* not a mipmap minification filter */ return FALSE; + if (stObj->base.Target == GL_TEXTURE_3D) + /* 3D textures are seldom mipmapped */ + return FALSE; + return TRUE; } I don't have much first hand experience on how 3D apps use 3D volumes., but I'd imagine that in most cases they would want to use mipmaps to avoid aliasing effects, unless memory is tight. Anyway, my understanding this function just makes a guess, and it's ok to mis-guess. So, series is Reviewed-by: Jose FonsecaJose ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 06/28] glsl: fix overlapping of varying locations for arrays and structs
Thanks alot for reviewing these :) Are you able to take a quick look at 5 you seem to have missed it. I'd like to push these bug fixes as soon as possible. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] llvmpipe: avoid most 64 bit math in rasterization
On 06/01/16 18:18, Roland Scheidegger wrote: Am 06.01.2016 um 17:31 schrieb Jose Fonseca: On 06/01/16 16:26, Jose Fonseca wrote: On 06/01/16 00:06, srol...@vmware.com wrote: From: Roland ScheideggerThe trick here is to recognize that in the c + n * dcdx calculations, not only can the lower FIXED_ORDER bits not change (as the dcdx values have those all zero) but that this means the sign bit of the calculations cannot be different as well, that is sign(c + n*dcdx) == sign((c >> FIXED_ORDER) + n*(dcdx >> FIXED_ORDER)). That shaves off more than enough bits to never require 64bit masks. A shifted plane c value could still easily exceed 32 bits, however since we throw out planes which are trivial accept even before binning (and similarly don't even get to see tris for which there was a trivial reject plane)) this is never a problem. The idea isnt't all that revolutionary, in fact something similar was tried ages ago (9773722c2b09d5f0615a47cecf4347859474dc56) back when the values were only 32 bit anyway. I believe now it didn't quite work then because the adjustment needed for testing trivial reject / partial masks wasn't handled correctly. This still keeps the separate 32/64 bit paths for now, as the 32 bit one still looks minimally simpler (and also because if we'd pass in dcdx/dcdy/eo unscaled from setup which would be a good reason to ditch the 32 bit path, we'd need to change the special-purpose rasterization functions for small tris). This passes piglit triangle-rasterization (-fbo -auto -max_size -subpixelbits 8). It still fails triangle-rasterization-overdraw -max_size (no change, fails everything at position 2048 - interestingly for softpipe, nvidia maxwell 1 blob, and amd evergreen open-source drivers the test fails as well but at 4096 - seems like we're missing a float mantissa bit somewhere!). I don't think that's how the test is supposed to be run. If you do an apitrace, you'll see the test creates a fbo with 1000x1000, a viewport with 16Kx16K, and does a readpixels of 4Kx4K... The problem is that the generic "-fbo" option is not useful for this, as we can't reliably resize it after the fact. Take a look at tests/general/triangle-rasterization.cpp -- it has a different option "-use_fbo" that creates its own fbo. OK I was running that the wrong way too I think. This one still passes with -max_size -use_fbo -subpixelbits 8 (takes _forever_ though - all due to convert_ubyte in readpixel path...) triangle-rasterization-overdraw with just -auto passes. The max_size parameter is a bit confusing since it won't do anything at all without -fbo as piglit_width/height will just get overwritten to window_width/height (and with fbo it will just fail badly). Increasing the window size manually to 8192/8192 won't really work neither as the size will be cut down to screen size. However, increasing this and then use -fbo actually does the right thing. And passes. Sounds great then. I can't spot anything wrong with the change: Reviewed-by: Jose Fonseca Would be nice if piglit could pick up those size parameters _after_ piglit_init... It might be worthwhile to modify the piglit test to bail when the passed options are bound to not work. Jose Roland Jose Jose --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 84 +-- src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h | 107 + 2 files changed, 133 insertions(+), 58 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index c9b9221..a4dd6ef 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -64,43 +64,43 @@ block_full_16(struct lp_rasterizer_task *task, } static inline unsigned -build_mask_linear(int64_t c, int64_t dcdx, int64_t dcdy) +build_mask_linear(int32_t c, int32_t dcdx, int32_t dcdy) { unsigned mask = 0; - int64_t c0 = c; - int64_t c1 = c0 + dcdy; - int64_t c2 = c1 + dcdy; - int64_t c3 = c2 + dcdy; - - mask |= ((c0 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 0); - mask |= ((c0 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 1); - mask |= ((c0 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 2); - mask |= ((c0 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 3); - mask |= ((c1 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 4); - mask |= ((c1 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 5); - mask |= ((c1 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 6); - mask |= ((c1 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 7); - mask |= ((c2 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 8); - mask |= ((c2 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 9); - mask |= ((c2 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 10); - mask |= ((c2 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 11); - mask |= ((c3 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 12); - mask |= ((c3 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 13); - mask |= ((c3 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 14); - mask |= ((c3 + 3 * dcdx) >> FIXED_SHIFT) & (1
Re: [Mesa-dev] [PATCH 05/28] glsl: create helper to remove outer vertex index array used by some stages
On Mon, Dec 28, 2015 at 9:00 PM, Timothy Arceriwrote: > This will be used in the following patch for calculating array sizes correctly > when reserving explicit varying locations. > --- > src/glsl/link_varyings.cpp | 36 ++-- > 1 file changed, 26 insertions(+), 10 deletions(-) > > diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp > index 2ff4552..d9550df 100644 > --- a/src/glsl/link_varyings.cpp > +++ b/src/glsl/link_varyings.cpp > @@ -41,6 +41,29 @@ > > > /** > + * Get the varying type stripped of the outermost array if we're processing > + * a stage whose varyings are arrays indexed by a vertex number (such as > + * geometry shader inputs). > + */ > +static const glsl_type * > +get_varying_type(const ir_variable *var, gl_shader_stage stage) > +{ > + const glsl_type *type = var->type; > + > + if (!var->data.patch && > + ((var->data.mode == ir_var_shader_out && > + stage == MESA_SHADER_TESS_CTRL) || > +(var->data.mode == ir_var_shader_in && > + (stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_TESS_EVAL || > + stage == MESA_SHADER_GEOMETRY { > + assert(type->is_array()); > + type = type->fields.array; > + } > + > + return type; > +} > + > +/** > * Validate the types and qualifiers of an output from one stage against the > * matching input to another stage. > */ > @@ -981,18 +1004,11 @@ varying_matches::record(ir_variable *producer_var, > ir_variable *consumer_var) > this->matches[this->num_matches].packing_order >= this->compute_packing_order(var); > if (this->disable_varying_packing) { > - const struct glsl_type *type = var->type; >unsigned slots; > + gl_shader_stage stage = > + (producer_var != NULL) ? producer_stage : consumer_stage; > > - /* Some shader stages have 2-dimensional varyings. Use the inner type. > */ > - if (!var->data.patch && > - ((var == producer_var && producer_stage == MESA_SHADER_TESS_CTRL) > || > - (var == consumer_var && (consumer_stage == MESA_SHADER_TESS_CTRL > || > -consumer_stage == MESA_SHADER_TESS_EVAL > || > -consumer_stage == > MESA_SHADER_GEOMETRY { > - assert(type->is_array()); > - type = type->fields.array; > - } > + const glsl_type *type = get_varying_type(var, stage); > >if (type->is_array()) { > slots = 1; > -- > 2.4.3 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev Reviewed-by: Anuj Phogat ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 23/23] radeonsi: adjust the parameters of si_shader_dump
I like the code structure established by this series. Patches 20-23 are Reviewed-by: Nicolai HähnleOn 06.01.2016 07:41, Marek Olšák wrote: From: Marek Olšák The function will be extended to dump all binaries shaders will consist of, so si_shader* makes sense here. --- src/gallium/drivers/radeonsi/si_compute.c | 6 ++ src/gallium/drivers/radeonsi/si_shader.c | 18 +++--- src/gallium/drivers/radeonsi/si_shader.h | 7 ++- 3 files changed, 11 insertions(+), 20 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index ffac656..5a08cbf 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -125,8 +125,7 @@ static void *si_create_compute_state( si_compile_llvm(sctx->screen, >kernels[i].binary, >kernels[i].config, sctx->tm, mod, >b.debug, TGSI_PROCESSOR_COMPUTE); - si_shader_dump(sctx->screen, >kernels[i].binary, - >kernels[i].config, + si_shader_dump(sctx->screen, >kernels[i], >b.debug, TGSI_PROCESSOR_COMPUTE); si_shader_binary_upload(sctx->screen, >kernels[i]); LLVMDisposeModule(mod); @@ -143,8 +142,7 @@ static void *si_create_compute_state( init_scratch_buffer(sctx, program); si_shader_binary_read_config(>shader.binary, >shader.config, 0); - si_shader_dump(sctx->screen, >shader.binary, - >shader.config, >b.debug, + si_shader_dump(sctx->screen, >shader, >b.debug, TGSI_PROCESSOR_COMPUTE); si_shader_binary_upload(sctx->screen, >shader); diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 58d16cf..b1a9a1e 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -3873,17 +3873,15 @@ static void si_shader_dump_stats(struct si_screen *sscreen, conf->lds_size, conf->scratch_bytes_per_wave); } -void si_shader_dump(struct si_screen *sscreen, - struct radeon_shader_binary *binary, - struct si_shader_config *conf, - struct pipe_debug_callback *debug, - unsigned processor) +void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader, + struct pipe_debug_callback *debug, unsigned processor) { if (r600_can_dump_shader(>b, processor)) if (!(sscreen->b.debug_flags & DBG_NO_ASM)) - si_shader_dump_disassembly(binary, debug); + si_shader_dump_disassembly(>binary, debug); - si_shader_dump_stats(sscreen, conf, binary->code_size, debug, processor); + si_shader_dump_stats(sscreen, >config, +shader->binary.code_size, debug, processor); } int si_compile_llvm(struct si_screen *sscreen, @@ -3996,8 +3994,7 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen, bld_base->base.gallivm->module, debug, TGSI_PROCESSOR_GEOMETRY); if (!r) { - si_shader_dump(sscreen, _shader_ctx->shader->binary, - _shader_ctx->shader->config, debug, + si_shader_dump(sscreen, si_shader_ctx->shader, debug, TGSI_PROCESSOR_GEOMETRY); r = si_shader_binary_upload(sscreen, si_shader_ctx->shader); } @@ -4202,8 +4199,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, goto out; } - si_shader_dump(sscreen, >binary, >config, - debug, si_shader_ctx.type); + si_shader_dump(sscreen, shader, debug, si_shader_ctx.type); r = si_shader_binary_upload(sscreen, shader); if (r) { diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 712bcd9..1635358 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -344,11 +344,8 @@ void si_shader_destroy(struct si_shader *shader); void si_shader_destroy_binary(struct radeon_shader_binary *binary); unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index); int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader); -void si_shader_dump(struct si_screen *sscreen, - struct radeon_shader_binary *binary, - struct si_shader_config *conf, - struct pipe_debug_callback *debug, - unsigned processor); +void si_shader_dump(struct
Re: [Mesa-dev] [PATCH] gallium/r600: Replace ALIGN_DIVUP with DIV_ROUND_UP
Pushed. On 06.01.2016 12:10, Krzysztof A. Sobiecki wrote: Nicolai Hähnlewrites: On 30.12.2015 13:44, Krzysztof A. Sobiecki wrote: Nicolai Hähnle writes: On 30.12.2015 08:42, Krzysztof A. Sobiecki wrote: Nicolai Hähnle writes: On 29.12.2015 14:27, Krzysztof A. Sobiecki wrote: From: Krzysztof Sobiecki ALIGN_DIVUP is a driver specific(r600g) macro that duplicates DIV_ROUND_UP functionality. Replacing it with DIV_ROUND_UP eliminates this problems. Those macros are actually slightly different, and the assembly generated by the ALIGN_DIVUP looks clearly better to me. I remember seeing a very long thread about this not so long ago - what was the resolution there? Cheers, Nicolai I would like to remove ALIGN_DIVUP first and then debate with implementation DIV_ROUND_UP should use. btw. I prefer 1 + ((x - 1) / y) That produces an incorrect result when x is an unsigned type and equal to 0 -- and that is something that existing code definitely relies on. Cheers, Nicolai Then what about (x / y) + (i % y != 0) Generates similar assembly to the DIV_ROUND_UP version. Anyway, now that I look at it again I'd say just go ahead and add my R-b. Yes, the assembly looks slightly worse, but only slightly, and avoiding surprises with overflows down the line seems like a good idea. Cheers, Nicolai I don't have commit access, can you push it, sorry. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 10/23] radeonsi: move MRT color exporting into a separate function
On 06.01.2016 07:41, Marek Olšák wrote: From: Marek OlšákThis will be used by a fragment shader epilog. --- src/gallium/drivers/radeonsi/si_shader.c | 93 +++- 1 file changed, 55 insertions(+), 38 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 4204db0..f60b560 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2135,6 +2135,57 @@ static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base, args, 9, 0); } +static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base, + LLVMValueRef *color, unsigned index, + bool is_last) +{ + struct si_shader_context *si_shader_ctx = si_shader_context(bld_base); + struct lp_build_context *base = _base->base; + LLVMValueRef args[9]; + int i; + + /* Clamp color */ + if (si_shader_ctx->shader->key.ps.clamp_color) + for (i = 0; i < 4; i++) + color[i] = radeon_llvm_saturate(bld_base, color[i]); + + /* Alpha to one */ + if (si_shader_ctx->shader->key.ps.alpha_to_one) + color[3] = base->one; + + /* Alpha test */ + if (index == 0 && + si_shader_ctx->shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS) + si_alpha_test(bld_base, color[3]); + + /* Line & polygon smoothing */ + if (si_shader_ctx->shader->key.ps.poly_line_smoothing) + color[3] = si_scale_alpha_by_sample_mask(bld_base, color[3]); + + /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */ + if (index == 0 && + si_shader_ctx->shader->key.ps.last_cbuf > 0) { + for (int c = 1; c <= si_shader_ctx->shader->key.ps.last_cbuf; c++) { + si_llvm_init_export_args(bld_base, color, +V_008DFC_SQ_EXP_MRT + c, args); + lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export", + LLVMVoidTypeInContext(base->gallivm->context), + args, 9, 0); + } + } + + /* Export */ + si_llvm_init_export_args(bld_base, color, V_008DFC_SQ_EXP_MRT + index, +args); + if (is_last) { + args[1] = bld_base->uint_bld.one; /* whether the EXEC mask is valid */ + args[2] = bld_base->uint_bld.one; /* DONE bit */ + } + lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export", + LLVMVoidTypeInContext(base->gallivm->context), + args, 9, 0); +} + static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base) { struct si_shader_context * si_shader_ctx = si_shader_context(bld_base); @@ -2177,7 +2228,7 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base) for (i = 0; i < info->num_outputs; i++) { unsigned semantic_name = info->output_semantic_name[i]; unsigned semantic_index = info->output_semantic_index[i]; - unsigned target, j; + unsigned j; LLVMValueRef color[4] = {}; /* Select the correct target */ @@ -2195,53 +2246,19 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base) si_shader_ctx->radeon_bld.soa.outputs[i][0], ""); continue; case TGSI_SEMANTIC_COLOR: - target = V_008DFC_SQ_EXP_MRT + semantic_index; - for (j = 0; j < 4; j++) color[j] = LLVMBuildLoad(builder, si_shader_ctx->radeon_bld.soa.outputs[i][j], ""); - if (si_shader_ctx->shader->key.ps.clamp_color) - for (j = 0; j < 4; j++) - color[j] = radeon_llvm_saturate(bld_base, color[j]); - - if (si_shader_ctx->shader->key.ps.alpha_to_one) - color[3] = base->one; - - if (semantic_index == 0 && - si_shader_ctx->shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS) - si_alpha_test(bld_base, color[3]); - - if (si_shader_ctx->shader->key.ps.poly_line_smoothing) - color[3] = si_scale_alpha_by_sample_mask(bld_base, color[3]); - break; + si_export_mrt_color(bld_base, color, semantic_index, + last_color_export == i); + continue;
[Mesa-dev] [Bug 93577] Total war: Attila crashes at startup
https://bugs.freedesktop.org/show_bug.cgi?id=93577 --- Comment #5 from ArneJ--- You're right, it needs a GL 4.3 Core Context. I was able to start the game with the following launch options in steam: MESA_GL_VERSION_OVERRIDE=4.3 MESA_GLSL_VERSION_OVERRIDE=430 %command% It runs quite well with medium settings at 1920x1080 on my R9 270X with mesa 11.1.0 (I just tested a little bit of the prologue). I didn't see any issues so it's quite possible that the game requests a 4.3 context but doesn't need any extensions that are not available on radeonsi. -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 19/23] radeonsi: separate shader dumping code to si_shader_dump and *_dump_stats
On 06.01.2016 07:41, Marek Olšák wrote: From: Marek OlšákEventually, I'd like to dump stats for several combined binaries, which is why you don't see a binary parameter in si_shader_dump_stats --- src/gallium/drivers/radeonsi/si_shader.c | 42 +++- 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 0773fff..95cdf8a 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -3853,31 +3853,49 @@ static void si_shader_dump_disassembly(const struct radeon_shader_binary *binary } } -void si_shader_binary_read(struct si_screen *sscreen, - struct radeon_shader_binary *binary, - struct si_shader_config *conf, - struct pipe_debug_callback *debug, - unsigned processor) +static void si_shader_dump_stats(struct si_screen *sscreen, +struct si_shader_config *conf, +unsigned code_size, +struct pipe_debug_callback *debug, +unsigned processor) { - si_shader_binary_read_config(binary, conf, 0); - if (r600_can_dump_shader(>b, processor)) { - if (!(sscreen->b.debug_flags & DBG_NO_ASM)) - si_shader_dump_disassembly(binary, debug); - fprintf(stderr, "*** SHADER STATS ***\n" "SGPRS: %d\nVGPRS: %d\nCode Size: %d bytes\nLDS: %d blocks\n" "Scratch: %d bytes per wave\n\n", - conf->num_sgprs, conf->num_vgprs, binary->code_size, + conf->num_sgprs, conf->num_vgprs, code_size, conf->lds_size, conf->scratch_bytes_per_wave); } pipe_debug_message(debug, SHADER_INFO, "Shader Stats: SGPRS: %d VGPRS: %d Code Size: %d LDS: %d Scratch: %d", - conf->num_sgprs, conf->num_vgprs, binary->code_size, + conf->num_sgprs, conf->num_vgprs, code_size, conf->lds_size, conf->scratch_bytes_per_wave); } +static void si_shader_dump(struct si_screen *sscreen, + struct radeon_shader_binary *binary, + struct si_shader_config *conf, + struct pipe_debug_callback *debug, + unsigned processor) +{ + if (r600_can_dump_shader(>b, processor)) + if (!(sscreen->b.debug_flags & DBG_NO_ASM)) + si_shader_dump_disassembly(binary, debug); I prefer to have braces around an if-block that spans multiple lines, but that's a bit of a bike-sheddy quibble. Either way, patches 11-19 are Reviewed-by: Nicolai Hähnle + + si_shader_dump_stats(sscreen, conf, binary->code_size, debug, processor); +} + +void si_shader_binary_read(struct si_screen *sscreen, + struct radeon_shader_binary *binary, + struct si_shader_config *conf, + struct pipe_debug_callback *debug, + unsigned processor) +{ + si_shader_binary_read_config(binary, conf, 0); + si_shader_dump(sscreen, binary, conf, debug, processor); +} + int si_compile_llvm(struct si_screen *sscreen, struct radeon_shader_binary *binary, struct si_shader_config *conf, ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev