On Sun, Oct 07, 2012 at 09:11:16PM +0200, Vincent Lejeune wrote: > --- > src/gallium/drivers/r600/eg_asm.c | 17 ++++++++ > src/gallium/drivers/r600/r600_asm.c | 17 ++++++++ > src/gallium/drivers/r600/r600_asm.h | 2 + > src/gallium/drivers/r600/r600_llvm.c | 66 > ++++++++++++++++++++++++++++---- > src/gallium/drivers/r600/r600_shader.c | 53 +++++++++++++++++++++++-- > src/gallium/drivers/radeon/radeon_llvm.h | 4 ++ > 6 files changed, 147 insertions(+), 12 deletions(-) > > diff --git a/src/gallium/drivers/r600/eg_asm.c > b/src/gallium/drivers/r600/eg_asm.c > index 00ac4a8..69617d9 100644 > --- a/src/gallium/drivers/r600/eg_asm.c > +++ b/src/gallium/drivers/r600/eg_asm.c > @@ -144,3 +144,20 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, > struct r600_bytecode_cf *cf) > } > return 0; > } > + > +void eg_bytecode_export_read(struct r600_bytecode_output *output, uint32_t > word0, uint32_t word1) > +{ > + output->array_base = G_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(word0); > + output->type = G_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(word0); > + output->gpr = G_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(word0); > + output->elem_size = G_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(word0); > + > + output->swizzle_x = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(word1); > + output->swizzle_y = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(word1); > + output->swizzle_z = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(word1); > + output->swizzle_w = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(word1); > + output->burst_count = G_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(word1); > + output->end_of_program = > G_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(word1); > + output->inst = > EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(word1)); > + output->barrier = G_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(word1); > +} > diff --git a/src/gallium/drivers/r600/r600_asm.c > b/src/gallium/drivers/r600/r600_asm.c > index 63bd8e9..3a6bce0 100644 > --- a/src/gallium/drivers/r600/r600_asm.c > +++ b/src/gallium/drivers/r600/r600_asm.c > @@ -2939,3 +2939,20 @@ void r600_bytecode_alu_read(struct r600_bytecode_alu > *alu, uint32_t word0, uint3 > G_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(word1); > } > } > + > +void r600_bytecode_export_read(struct r600_bytecode_output *output, uint32_t > word0, uint32_t word1) > +{ > + output->array_base = G_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(word0); > + output->type = G_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(word0); > + output->gpr = G_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(word0); > + output->elem_size = G_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(word0); > + > + output->swizzle_x = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(word1); > + output->swizzle_y = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(word1); > + output->swizzle_z = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(word1); > + output->swizzle_w = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(word1); > + output->burst_count = G_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(word1); > + output->end_of_program = > G_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(word1); > + output->inst = > EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(word1)); > + output->barrier = G_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(word1); > +} > diff --git a/src/gallium/drivers/r600/r600_asm.h > b/src/gallium/drivers/r600/r600_asm.h > index 403365b..6d57778 100644 > --- a/src/gallium/drivers/r600/r600_asm.h > +++ b/src/gallium/drivers/r600/r600_asm.h > @@ -243,5 +243,7 @@ int r600_vertex_elements_build_fetch_shader(struct > r600_context *rctx, struct r6 > void r700_bytecode_cf_vtx_build(uint32_t *bytecode, const struct > r600_bytecode_cf *cf); > int r700_bytecode_alu_build(struct r600_bytecode *bc, struct > r600_bytecode_alu *alu, unsigned id); > void r700_bytecode_alu_read(struct r600_bytecode_alu *alu, uint32_t word0, > uint32_t word1); > +void r600_bytecode_export_read(struct r600_bytecode_output *output, uint32_t > word0, uint32_t word1); > +void eg_bytecode_export_read(struct r600_bytecode_output *output, uint32_t > word0, uint32_t word1); > > #endif > diff --git a/src/gallium/drivers/r600/r600_llvm.c > b/src/gallium/drivers/r600/r600_llvm.c > index 71ea578..dddc867 100644 > --- a/src/gallium/drivers/r600/r600_llvm.c > +++ b/src/gallium/drivers/r600/r600_llvm.c > @@ -115,6 +115,8 @@ static void llvm_emit_epilogue(struct > lp_build_tgsi_context * bld_base) > struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); > struct lp_build_context * base = &bld_base->base; > unsigned i; > + > + unsigned color_count = 0; > > /* Add the necessary export instructions */ > for (i = 0; i < ctx->output_reg_count; i++) { > @@ -123,18 +125,66 @@ static void llvm_emit_epilogue(struct > lp_build_tgsi_context * bld_base) > LLVMValueRef output; > unsigned adjusted_reg_idx = i + > ctx->reserved_reg_count; > - LLVMValueRef reg_index = lp_build_const_int32( > - base->gallivm, > - radeon_llvm_reg_index_soa(adjusted_reg_idx, > chan)); > > output = LLVMBuildLoad(base->gallivm->builder, > ctx->soa.outputs[i][chan], ""); > > - lp_build_intrinsic_binary( > - base->gallivm->builder, > - "llvm.AMDGPU.store.output", > - LLVMVoidTypeInContext(base->gallivm->context), > - output, reg_index); > + if (ctx->type == TGSI_PROCESSOR_VERTEX) { > + LLVMValueRef reg_index = lp_build_const_int32( > + base->gallivm, > + > radeon_llvm_reg_index_soa(adjusted_reg_idx, chan)); > + lp_build_intrinsic_binary( > + base->gallivm->builder, > + "llvm.AMDGPU.store.output", > + > LLVMVoidTypeInContext(base->gallivm->context), > + output, reg_index); > + } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { > + switch (ctx->r600_outputs[i].name) { > + case TGSI_SEMANTIC_COLOR: > + if ( color_count/4 < > ctx->color_buffer_count + ctx->extra_buffer) { > + if (ctx->fs_color_all) { > + for (unsigned j = 0; j > < ctx->color_buffer_count; j++) { > + LLVMValueRef > reg_index = lp_build_const_int32( > + > base->gallivm, > + (j * 4) > + chan); > + > lp_build_intrinsic_binary( > + > base->gallivm->builder, > + > "llvm.R600.store.pixel.color", > + > LLVMVoidTypeInContext(base->gallivm->context), > + output, > reg_index); > + } > + } else { > + LLVMValueRef reg_index > = lp_build_const_int32( > + base->gallivm, > + > (color_count++/4) * 4 + chan); > + > lp_build_intrinsic_binary( > + > base->gallivm->builder, > + > "llvm.R600.store.pixel.color", > + > LLVMVoidTypeInContext(base->gallivm->context), > + output, > reg_index); > + } > + } > + break; > + case TGSI_SEMANTIC_POSITION: > + if (chan != 2) > + continue; > + lp_build_intrinsic_unary( > + base->gallivm->builder, > + "llvm.R600.store.pixel.depth", > + > LLVMVoidTypeInContext(base->gallivm->context), > + output); > + break; > + case TGSI_SEMANTIC_STENCIL: > + if (chan != 1) > + continue; > + lp_build_intrinsic_unary( > + base->gallivm->builder, > + "llvm.R600.store.pixel.stencil", > + > LLVMVoidTypeInContext(base->gallivm->context), > + output); > + break; > + } > + } > } > } > } > diff --git a/src/gallium/drivers/r600/r600_shader.c > b/src/gallium/drivers/r600/r600_shader.c > index bf4877a..56e25b5 100644 > --- a/src/gallium/drivers/r600/r600_shader.c > +++ b/src/gallium/drivers/r600/r600_shader.c > @@ -522,6 +522,21 @@ static int r600_vtx_from_byte_stream(struct > r600_shader_ctx *ctx, > return bytes_read; > } > > +static int r600_export_from_byte_stream(struct r600_shader_ctx *ctx, > + unsigned char * bytes, unsigned bytes_read) > +{ > + struct r600_bytecode_output output; > + memset(&output, 0, sizeof(struct r600_bytecode_output)); > + uint32_t word0 = i32_from_byte_stream(bytes, &bytes_read); > + uint32_t word1 = i32_from_byte_stream(bytes, &bytes_read); > + if (ctx->bc->chip_class >= EVERGREEN) > + eg_bytecode_export_read(&output, word0,word1); > + else > + r600_bytecode_export_read(&output, word0,word1); > + r600_bytecode_add_output(ctx->bc, &output); > + return bytes_read; > +} > + > static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx, > unsigned char * bytes, unsigned num_bytes) > { > @@ -556,6 +571,10 @@ static void r600_bytecode_from_byte_stream(struct > r600_shader_ctx *ctx, > bytes_read = r600_vtx_from_byte_stream(ctx, bytes, > bytes_read); > break; > + case 5: > + bytes_read = r600_export_from_byte_stream(ctx, bytes, > + bytes_read); > + break; > default: > /* XXX: Error here */ > break; > @@ -1336,7 +1355,11 @@ static int r600_shader_from_tgsi(struct r600_screen > *rscreen, > radeon_llvm_ctx.two_side = shader->two_side; > radeon_llvm_ctx.face_input = ctx.face_gpr; > radeon_llvm_ctx.r600_inputs = ctx.shader->input; > + radeon_llvm_ctx.r600_outputs = ctx.shader->output; > + radeon_llvm_ctx.color_buffer_count = MAX2(key.nr_cbufs , 1); > radeon_llvm_ctx.chip_class = ctx.bc->chip_class; > + radeon_llvm_ctx.fs_color_all = shader->fs_write_all && > (rscreen->chip_class >= EVERGREEN); > + radeon_llvm_ctx.extra_buffer = key.dual_src_blend; > mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens); > if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) { > dump = 1; > @@ -1598,6 +1621,24 @@ static int r600_shader_from_tgsi(struct r600_screen > *rscreen, > } > break; > case TGSI_PROCESSOR_FRAGMENT:
I think we can remove some duplicated code in the hunk below: > + if (use_llvm) { > + if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { > + /* never export more colors than the number of > CBs */ > + if (next_pixel_base && next_pixel_base >= > key.nr_cbufs + key.dual_src_blend) { > + /* skip export */ > + j--; > + continue; > + } The if statement above looks the same for poth the llvm and non-llvm path. > + next_pixel_base++; > + shader->nr_ps_color_exports++; The shader->nr_ps_color_exports++ statement is also the same for the llvm and non-llvm paths. > + if (shader->fs_write_all && > (rscreen->chip_class >= EVERGREEN) && key.nr_cbufs) { > + shader->nr_ps_color_exports += > key.nr_cbufs - 1; > + next_pixel_base += key.nr_cbufs - 1; > + } > + } else { > + continue; > + } > + } else { > if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { > /* never export more colors than the number of > CBs */ > if (next_pixel_base && next_pixel_base >= > key.nr_cbufs + key.dual_src_blend) { You can move the if(use_llvm) check down a little more to reduce this duplicated code, for example: + if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { + /* never export more colors than the number of CBs */ + if (next_pixel_base && next_pixel_base >= key.nr_cbufs + key.dual_src_blend) { + /* skip export */ + j--; + continue; + } + shader->nr_ps_color_exports++; + if (use_llvm) { + next_pixel_base++; + if (shader->fs_write_all && (rscreen->chip_class >= EVERGREEN) && key.nr_cbufs) { + shader->nr_ps_color_exports += key.nr_cbufs - 1; + next_pixel_base += key.nr_cbufs - 1; + } + } else { + /* non-llvm code goes here */ + } + } else { + continue; + } > @@ -1644,6 +1685,7 @@ static int r600_shader_from_tgsi(struct r600_screen > *rscreen, > r = -EINVAL; > goto out_err; > } > + } > break; > default: > R600_ERR("unsupported processor type %d\n", ctx.type); > @@ -1706,10 +1748,13 @@ static int r600_shader_from_tgsi(struct r600_screen > *rscreen, > } > } > /* add output to bytecode */ > - for (i = 0; i < noutput; i++) { > - r = r600_bytecode_add_output(ctx.bc, &output[i]); > - if (r) > - goto out_err; > + if (!use_llvm || ctx.type != TGSI_PROCESSOR_FRAGMENT || > + (ctx.type == TGSI_PROCESSOR_FRAGMENT && next_pixel_base == 0)) { > + for (i = 0; i < noutput; i++) { > + r = r600_bytecode_add_output(ctx.bc, &output[i]); > + if (r) > + goto out_err; > + } > } > /* add program end */ > if (ctx.bc->chip_class == CAYMAN) > diff --git a/src/gallium/drivers/radeon/radeon_llvm.h > b/src/gallium/drivers/radeon/radeon_llvm.h > index 6118b11..948e8cf 100644 > --- a/src/gallium/drivers/radeon/radeon_llvm.h > +++ b/src/gallium/drivers/radeon/radeon_llvm.h > @@ -59,6 +59,10 @@ struct radeon_llvm_context { > unsigned face_input; > unsigned two_side; > struct r600_shader_io * r600_inputs; > + struct r600_shader_io * r600_outputs; > + unsigned color_buffer_count; > + unsigned extra_buffer; > + unsigned fs_color_all; > > /*=== Front end configuration ===*/ > > -- > 1.7.11.4 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev