From: Dave Airlie <airl...@redhat.com> this passes the TBO tests and textureSize fine. --- src/gallium/drivers/r600/r600_pipe.c | 4 +- src/gallium/drivers/r600/r600_pipe.h | 8 +-- src/gallium/drivers/r600/r600_shader.c | 73 ++++++++++++++++++++++-- src/gallium/drivers/r600/r600_shader.h | 1 + src/gallium/drivers/r600/r600_state.c | 47 +++++++++++++++ src/gallium/drivers/r600/r600_state_common.c | 85 +++++++++++++++++++++++----- 6 files changed, 194 insertions(+), 24 deletions(-)
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 4d2fc2d..8ba94f0 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -416,6 +416,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_COMPUTE: case PIPE_CAP_START_INSTANCE: case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: + case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: return 1; case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: @@ -425,7 +426,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) return 256; case PIPE_CAP_GLSL_FEATURE_LEVEL: - return family >= CHIP_CEDAR ? 140 : 130; + return 140; case PIPE_CAP_TEXTURE_MULTISAMPLE: return rscreen->msaa_texture_support != MSAA_TEXTURE_SAMPLE_ZERO; @@ -439,7 +440,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) /* Supported on Evergreen. */ case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: case PIPE_CAP_CUBE_MAP_ARRAY: - case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: return family >= CHIP_CEDAR ? 1 : 0; /* Unsupported features. */ diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 6a13dc0..d983718 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -46,7 +46,7 @@ /* start driver buffers after user buffers */ #define R600_UCP_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS) #define R600_TXQ_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1) -#define R600_BUFFER_TXQ_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2) +#define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2) #define R600_MAX_CONST_BUFFER_SIZE 4096 @@ -331,7 +331,7 @@ struct r600_samplerview_state { uint32_t compressed_depthtex_mask; /* which textures are depth */ uint32_t compressed_colortex_mask; boolean dirty_txq_constants; - boolean dirty_buffer_txq_constants; + boolean dirty_buffer_constants; }; struct r600_sampler_states { @@ -349,8 +349,8 @@ struct r600_textures_info { /* cube array txq workaround */ uint32_t *txq_constants; - /* buffer txq workaround */ - uint32_t *buffer_txq_constants; + /* buffer related workarounds */ + uint32_t *buffer_constants; }; struct r600_fence { diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 620da85..d7651be 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -3902,6 +3902,7 @@ static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_l struct r600_bytecode_alu alu; struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; int src_gpr, r, i; + int id = tgsi_tex_get_src_gpr(ctx, 1); src_gpr = tgsi_tex_get_src_gpr(ctx, 0); if (src_requires_loading) { @@ -3923,7 +3924,7 @@ static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_l memset(&vtx, 0, sizeof(vtx)); vtx.inst = 0; - vtx.buffer_id = tgsi_tex_get_src_gpr(ctx, 1) + R600_MAX_CONST_BUFFERS;; + vtx.buffer_id = id + R600_MAX_CONST_BUFFERS; vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ vtx.src_gpr = src_gpr; vtx.mega_fetch_count = 16; @@ -3937,6 +3938,58 @@ static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_l if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) return r; + + if (ctx->bc->chip_class >= EVERGREEN) + return 0; + + for (i = 0; i < 4; i++) { + int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) + continue; + + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT); + + alu.dst.chan = i; + alu.dst.sel = vtx.dst_gpr; + alu.dst.write = 1; + + alu.src[0].sel = vtx.dst_gpr; + alu.src[0].chan = i; + + alu.src[1].sel = 512 + (id * 2); + alu.src[1].chan = i % 4; + alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; + + if (i == lasti) + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) { + return r; + } + } + + if (inst->Dst[0].Register.WriteMask & 3) { + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT); + + alu.dst.chan = 3; + alu.dst.sel = vtx.dst_gpr; + alu.dst.write = 1; + + alu.src[0].sel = vtx.dst_gpr; + alu.src[0].chan = 3; + + alu.src[1].sel = 512 + (id * 2) + 1; + alu.src[1].chan = 0; + alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; + + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) { + return r; + } + } return 0; } @@ -3950,9 +4003,16 @@ static int r600_do_buffer_txq(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - alu.src[0].sel = 512 + (id / 4); - alu.src[0].kc_bank = R600_BUFFER_TXQ_CONST_BUFFER; - alu.src[0].chan = id % 4; + /* on evergreen this is here */ + if (ctx->bc->chip_class >= EVERGREEN) { + alu.src[0].sel = 512 + (id / 4); + alu.src[0].chan = id % 4; + } else { + /* r600 we have them at channel 2 of the second dword */ + alu.src[0].sel = 512 + (id * 2) + 1; + alu.src[0].chan = 1; + } + alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); alu.last = 1; r = r600_bytecode_add_alu(ctx->bc, &alu); @@ -4001,11 +4061,14 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) if (inst->Texture.Texture == TGSI_TEXTURE_BUFFER) { if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) { + ctx->shader->uses_tex_buffers = true; ctx->shader->has_txq_buffer = true; return r600_do_buffer_txq(ctx); } - else if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) + else if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) { + ctx->shader->uses_tex_buffers = true; return do_vtx_fetch_inst(ctx, src_requires_loading); + } } if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) { diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index 88f71ad..19c2769 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -62,6 +62,7 @@ struct r600_shader { boolean vs_out_point_size; boolean has_txq_cube_array_z_comp; boolean has_txq_buffer; + boolean uses_tex_buffers; }; struct r600_shader_key { diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index e2d0f75..b4dc846 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -976,6 +976,50 @@ static void *r600_create_sampler_state(struct pipe_context *ctx, return ss; } +static struct pipe_sampler_view * +texture_buffer_sampler_view(struct r600_pipe_sampler_view *view, + unsigned width0, unsigned height0) + +{ + struct pipe_context *ctx = view->base.context; + struct r600_texture *tmp = (struct r600_texture*)view->base.texture; + uint64_t va; + int stride = util_format_get_blocksize(view->base.format); + unsigned format, num_format, format_comp, endian; + + const struct util_format_description *desc; + + r600_vertex_data_type(view->base.format, + &format, &num_format, &format_comp, + &endian); + + desc = util_format_description(view->base.format); + + va = r600_resource_va(ctx->screen, view->base.texture); + view->tex_resource = &tmp->resource; + + view->skip_mip_address_reloc = true; + view->tex_resource_words[0] = va; + view->tex_resource_words[1] = width0 - 1; + view->tex_resource_words[2] = S_038008_BASE_ADDRESS_HI(va >> 32UL) | + S_038008_STRIDE(stride) | + S_038008_DATA_FORMAT(format) | + S_038008_NUM_FORMAT_ALL(num_format) | + S_038008_FORMAT_COMP_ALL(format_comp) | + S_038008_SRF_MODE_ALL(1) | + S_038008_ENDIAN_SWAP(endian); + view->tex_resource_words[3] = 0; + /* + * in theory dword 4 is for number of elements, for use with resinfo, + * but it seems to utterly fail to work, the amd gpu shader analyser + * uses a const buffer to store the element sizes for buffer txq + */ + view->tex_resource_words[4] = 0; + view->tex_resource_words[5] = 0; + view->tex_resource_words[6] = S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_BUFFER); + return &view->base; +} + struct pipe_sampler_view * r600_create_sampler_view_custom(struct pipe_context *ctx, struct pipe_resource *texture, @@ -1000,6 +1044,9 @@ r600_create_sampler_view_custom(struct pipe_context *ctx, view->base.reference.count = 1; view->base.context = ctx; + if (texture->target == PIPE_BUFFER) + return texture_buffer_sampler_view(view, texture->width0, 1); + swizzle[0] = state->swizzle_r; swizzle[1] = state->swizzle_g; swizzle[2] = state->swizzle_b; diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index a452510..c8f4479 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -626,7 +626,7 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader, dst->views.compressed_depthtex_mask &= dst->views.enabled_mask; dst->views.compressed_colortex_mask &= dst->views.enabled_mask; dst->views.dirty_txq_constants = TRUE; - dst->views.dirty_buffer_txq_constants = TRUE; + dst->views.dirty_buffer_constants = TRUE; r600_sampler_views_dirty(rctx, &dst->views); if (dirty_sampler_states_mask) { @@ -1025,7 +1025,55 @@ static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask rctx->sample_mask.atom.dirty = true; } -static void r600_setup_txq_buffer_constants(struct r600_context *rctx, int shader_type) +static void r600_setup_buffer_constants(struct r600_context *rctx, int shader_type) +{ + struct r600_textures_info *samplers = &rctx->samplers[shader_type]; + int bits; + uint32_t array_size; + struct pipe_constant_buffer cb; + int i, j; + + if (!samplers->views.dirty_buffer_constants) + return; + + samplers->views.dirty_buffer_constants = FALSE; + + bits = util_last_bit(samplers->views.enabled_mask); + array_size = bits * 8 * sizeof(uint32_t) * 4; + samplers->buffer_constants = realloc(samplers->buffer_constants, array_size); + memset(samplers->buffer_constants, 0, array_size); + for (i = 0; i < bits; i++) { + if (samplers->views.enabled_mask & (1 << i)) { + int offset = i * 8; + const struct util_format_description *desc; + desc = util_format_description(samplers->views.views[i]->base.format); + + for (j = 0; j < 4; j++) + if (j < desc->nr_channels) + samplers->buffer_constants[offset+j] = 0xffffffff; + else + samplers->buffer_constants[offset+j] = 0x0; + if (desc->nr_channels < 4) { + if (desc->channel[0].pure_integer) + samplers->buffer_constants[offset+4] = 1; + else + samplers->buffer_constants[offset+4] = 0x3f800000; + } else + samplers->buffer_constants[offset + 4] = 0; + + samplers->buffer_constants[offset + 5] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format); + } + } + + cb.buffer = NULL; + cb.user_buffer = samplers->buffer_constants; + cb.buffer_offset = 0; + cb.buffer_size = array_size; + rctx->context.set_constant_buffer(&rctx->context, shader_type, R600_BUFFER_INFO_CONST_BUFFER, &cb); + pipe_resource_reference(&cb.buffer, NULL); +} + +static void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type) { struct r600_textures_info *samplers = &rctx->samplers[shader_type]; int bits; @@ -1033,24 +1081,24 @@ static void r600_setup_txq_buffer_constants(struct r600_context *rctx, int shade struct pipe_constant_buffer cb; int i; - if (!samplers->views.dirty_buffer_txq_constants) + if (!samplers->views.dirty_buffer_constants) return; - samplers->views.dirty_buffer_txq_constants = FALSE; + samplers->views.dirty_buffer_constants = FALSE; bits = util_last_bit(samplers->views.enabled_mask); array_size = bits * sizeof(uint32_t) * 4; - samplers->buffer_txq_constants = realloc(samplers->buffer_txq_constants, array_size); - memset(samplers->buffer_txq_constants, 0, array_size); + samplers->buffer_constants = realloc(samplers->buffer_constants, array_size); + memset(samplers->buffer_constants, 0, array_size); for (i = 0; i < bits; i++) if (samplers->views.enabled_mask & (1 << i)) - samplers->buffer_txq_constants[i] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format); + samplers->buffer_constants[i] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format); cb.buffer = NULL; - cb.user_buffer = samplers->buffer_txq_constants; + cb.user_buffer = samplers->buffer_constants; cb.buffer_offset = 0; cb.buffer_size = array_size; - rctx->context.set_constant_buffer(&rctx->context, shader_type, R600_BUFFER_TXQ_CONST_BUFFER, &cb); + rctx->context.set_constant_buffer(&rctx->context, shader_type, R600_BUFFER_INFO_CONST_BUFFER, &cb); pipe_resource_reference(&cb.buffer, NULL); } @@ -1121,15 +1169,26 @@ static bool r600_update_derived_state(struct r600_context *rctx) if (ps_dirty) r600_context_pipe_state_set(rctx, &rctx->ps_shader->current->rstate); + /* on R600 we stuff masks + txq info into one constant buffer */ + /* on evergreen we only need a txq info one */ + if (rctx->chip_class < EVERGREEN) { + if (rctx->ps_shader && rctx->ps_shader->current->shader.uses_tex_buffers) + r600_setup_buffer_constants(rctx, PIPE_SHADER_FRAGMENT); + if (rctx->vs_shader && rctx->vs_shader->current->shader.uses_tex_buffers) + r600_setup_buffer_constants(rctx, PIPE_SHADER_VERTEX); + } else { + if (rctx->ps_shader && rctx->ps_shader->current->shader.has_txq_buffer) + eg_setup_buffer_constants(rctx, PIPE_SHADER_FRAGMENT); + if (rctx->vs_shader && rctx->vs_shader->current->shader.has_txq_buffer) + eg_setup_buffer_constants(rctx, PIPE_SHADER_VERTEX); + } + + if (rctx->ps_shader && rctx->ps_shader->current->shader.has_txq_cube_array_z_comp) r600_setup_txq_cube_array_constants(rctx, PIPE_SHADER_FRAGMENT); if (rctx->vs_shader && rctx->vs_shader->current->shader.has_txq_cube_array_z_comp) r600_setup_txq_cube_array_constants(rctx, PIPE_SHADER_VERTEX); - if (rctx->ps_shader && rctx->ps_shader->current->shader.has_txq_buffer) - r600_setup_txq_buffer_constants(rctx, PIPE_SHADER_FRAGMENT); - if (rctx->vs_shader && rctx->vs_shader->current->shader.has_txq_buffer) - r600_setup_txq_buffer_constants(rctx, PIPE_SHADER_VERTEX); if (rctx->chip_class < EVERGREEN && rctx->ps_shader && rctx->vs_shader) { if (!r600_adjust_gprs(rctx)) { -- 1.8.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev