Re: [Mesa-dev] [PATCH 2/2] r600g: texture buffer object + glsl 1.40 enable support
On Wed, Dec 19, 2012 at 02:07:22PM -0500, Jerome Glisse wrote: > On Wed, Dec 19, 2012 at 12:33 PM, Tom Stellard wrote: > > On Sun, Dec 16, 2012 at 08:33:23PM +1000, Dave Airlie wrote: > >> From: Dave Airlie > >> > >> This adds TBO support to r600g, and with GLSL 1.40 enabled, > >> we now get 3.1 core profiles advertised for r600g. > >> > >> This code is evergreen only so far, but I don't think there is > >> much to make it work on r600/700/cayman other than testing. > >> > >> a) buffer txq is broken like cube map txq, this sucks, fix it the > >> exact same way. > >> > >> b) buffer fetches are done with a vertex clause, > >> > >> c) vertex swizzling offsets are different than texture swizzles, > >> but we still need to use the combiner, so make it configurable. > >> > >> d) add implementation of UCMP. > >> > >> TODO: r600/700/cayman testin > >> Signed-off-by: Dave Airlie > >> --- > >> src/gallium/drivers/r600/evergreen_state.c | 55 > >> src/gallium/drivers/r600/r600_asm.c | 2 +- > >> src/gallium/drivers/r600/r600_asm.h | 2 + > >> src/gallium/drivers/r600/r600_pipe.c | 4 +- > >> src/gallium/drivers/r600/r600_pipe.h | 10 +++- > >> src/gallium/drivers/r600/r600_shader.c | 75 > >> > >> src/gallium/drivers/r600/r600_shader.h | 1 + > >> src/gallium/drivers/r600/r600_state_common.c | 58 + > >> src/gallium/drivers/r600/r600_texture.c | 16 -- > >> 9 files changed, 204 insertions(+), 19 deletions(-) > >> > > > > [snip] > > > >> diff --git a/src/gallium/drivers/r600/r600_shader.c > >> b/src/gallium/drivers/r600/r600_shader.c > >> index feb7001..60667e7 100644 > >> --- a/src/gallium/drivers/r600/r600_shader.c > >> +++ b/src/gallium/drivers/r600/r600_shader.c > >> @@ -3819,6 +3819,71 @@ static inline unsigned tgsi_tex_get_src_gpr(struct > >> r600_shader_ctx *ctx, > >> return ctx->file_offset[inst->Src[index].Register.File] + > >> inst->Src[index].Register.Index; > >> } > >> > >> +static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean > >> src_requires_loading) > >> +{ > >> + struct r600_bytecode_vtx vtx; > >> + struct r600_bytecode_alu alu; > >> + struct tgsi_full_instruction *inst = > >> &ctx->parse.FullToken.FullInstruction; > >> + int src_gpr, r, i; > >> + > >> + src_gpr = tgsi_tex_get_src_gpr(ctx, 0); > >> + if (src_requires_loading) { > >> + for (i = 0; i < 4; i++) { > >> + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); > >> + alu.inst = > >> CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); > >> + r600_bytecode_src(&alu.src[0], &ctx->src[0], i); > >> + alu.dst.sel = ctx->temp_reg; > >> + alu.dst.chan = i; > >> + if (i == 3) > >> + alu.last = 1; > >> + alu.dst.write = 1; > >> + r = r600_bytecode_add_alu(ctx->bc, &alu); > >> + if (r) > >> + return r; > >> + } > >> + src_gpr = ctx->temp_reg; > >> + } > >> + > >> + memset(&vtx, 0, sizeof(vtx)); > >> + vtx.inst = 0; > >> + vtx.buffer_id = tgsi_tex_get_src_gpr(ctx, 1) + > >> R600_MAX_CONST_BUFFERS;; > >> + vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ > >> + vtx.src_gpr = src_gpr; > >> + vtx.mega_fetch_count = 16; > >> + vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + > >> inst->Dst[0].Register.Index; > >> + vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; > >>/* SEL_X */ > >> + vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; > >>/* SEL_Y */ > >> + vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; > >>/* SEL_Z */ > >> + vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; > >>/* SEL_W */ > >> + vtx.use_const_fields = 1; > >> + vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ > >> + > > > > According to the docs, srf_mode_all will be ignored if use_const_fields > > is set. However, based on my tests while running compute shaders, other > > fields like data_format, which are supposed to be ignored weren't being > > ignored unless the were set to zero. So, I think it would be safer > > here to set srf_mode_all to zero and make sure that bit gets set on > > the resource. > > > > > >> + if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) > >> + return r; > >> + return 0; > >> +} > >> + > > > > Otherwise, this code for vtx fetch looks good to me. One problem I ran into > > with vtx fetch instructions while working on compute shaders was that > > the GPU will hang if you write to vtx.src_gpr in the > > instruction group following the vtx fetch. Here is a simple example: > > > > %T2_X = MOV %ZERO > > %T3_X = VTX_RE
Re: [Mesa-dev] [PATCH 2/2] r600g: texture buffer object + glsl 1.40 enable support
On Wed, Dec 19, 2012 at 12:33 PM, Tom Stellard wrote: > On Sun, Dec 16, 2012 at 08:33:23PM +1000, Dave Airlie wrote: >> From: Dave Airlie >> >> This adds TBO support to r600g, and with GLSL 1.40 enabled, >> we now get 3.1 core profiles advertised for r600g. >> >> This code is evergreen only so far, but I don't think there is >> much to make it work on r600/700/cayman other than testing. >> >> a) buffer txq is broken like cube map txq, this sucks, fix it the >> exact same way. >> >> b) buffer fetches are done with a vertex clause, >> >> c) vertex swizzling offsets are different than texture swizzles, >> but we still need to use the combiner, so make it configurable. >> >> d) add implementation of UCMP. >> >> TODO: r600/700/cayman testin >> Signed-off-by: Dave Airlie >> --- >> src/gallium/drivers/r600/evergreen_state.c | 55 >> src/gallium/drivers/r600/r600_asm.c | 2 +- >> src/gallium/drivers/r600/r600_asm.h | 2 + >> src/gallium/drivers/r600/r600_pipe.c | 4 +- >> src/gallium/drivers/r600/r600_pipe.h | 10 +++- >> src/gallium/drivers/r600/r600_shader.c | 75 >> >> src/gallium/drivers/r600/r600_shader.h | 1 + >> src/gallium/drivers/r600/r600_state_common.c | 58 + >> src/gallium/drivers/r600/r600_texture.c | 16 -- >> 9 files changed, 204 insertions(+), 19 deletions(-) >> > > [snip] > >> diff --git a/src/gallium/drivers/r600/r600_shader.c >> b/src/gallium/drivers/r600/r600_shader.c >> index feb7001..60667e7 100644 >> --- a/src/gallium/drivers/r600/r600_shader.c >> +++ b/src/gallium/drivers/r600/r600_shader.c >> @@ -3819,6 +3819,71 @@ static inline unsigned tgsi_tex_get_src_gpr(struct >> r600_shader_ctx *ctx, >> return ctx->file_offset[inst->Src[index].Register.File] + >> inst->Src[index].Register.Index; >> } >> >> +static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean >> src_requires_loading) >> +{ >> + struct r600_bytecode_vtx vtx; >> + struct r600_bytecode_alu alu; >> + struct tgsi_full_instruction *inst = >> &ctx->parse.FullToken.FullInstruction; >> + int src_gpr, r, i; >> + >> + src_gpr = tgsi_tex_get_src_gpr(ctx, 0); >> + if (src_requires_loading) { >> + for (i = 0; i < 4; i++) { >> + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); >> + alu.inst = >> CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); >> + r600_bytecode_src(&alu.src[0], &ctx->src[0], i); >> + alu.dst.sel = ctx->temp_reg; >> + alu.dst.chan = i; >> + if (i == 3) >> + alu.last = 1; >> + alu.dst.write = 1; >> + r = r600_bytecode_add_alu(ctx->bc, &alu); >> + if (r) >> + return r; >> + } >> + src_gpr = ctx->temp_reg; >> + } >> + >> + memset(&vtx, 0, sizeof(vtx)); >> + vtx.inst = 0; >> + vtx.buffer_id = tgsi_tex_get_src_gpr(ctx, 1) + R600_MAX_CONST_BUFFERS;; >> + vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ >> + vtx.src_gpr = src_gpr; >> + vtx.mega_fetch_count = 16; >> + vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + >> inst->Dst[0].Register.Index; >> + vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; >> /* SEL_X */ >> + vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; >> /* SEL_Y */ >> + vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; >> /* SEL_Z */ >> + vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; >> /* SEL_W */ >> + vtx.use_const_fields = 1; >> + vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ >> + > > According to the docs, srf_mode_all will be ignored if use_const_fields > is set. However, based on my tests while running compute shaders, other > fields like data_format, which are supposed to be ignored weren't being > ignored unless the were set to zero. So, I think it would be safer > here to set srf_mode_all to zero and make sure that bit gets set on > the resource. > > >> + if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) >> + return r; >> + return 0; >> +} >> + > > Otherwise, this code for vtx fetch looks good to me. One problem I ran into > with vtx fetch instructions while working on compute shaders was that > the GPU will hang if you write to vtx.src_gpr in the > instruction group following the vtx fetch. Here is a simple example: > > %T2_X = MOV %ZERO > %T3_X = VTX_READ_eg %T2_X, 24 > %T2_X = MOV %ZERO > > I'm not sure if this happens on all GPU variants, but I was able to > consistently reproduce this on my SUMO. You may want to keep an eye > out for this in case you run into any unexplainable hangs. > The vtx fetch group had the barrier flag set ? Cheers, Jerome
Re: [Mesa-dev] [PATCH 2/2] r600g: texture buffer object + glsl 1.40 enable support
On Sun, Dec 16, 2012 at 08:33:23PM +1000, Dave Airlie wrote: > From: Dave Airlie > > This adds TBO support to r600g, and with GLSL 1.40 enabled, > we now get 3.1 core profiles advertised for r600g. > > This code is evergreen only so far, but I don't think there is > much to make it work on r600/700/cayman other than testing. > > a) buffer txq is broken like cube map txq, this sucks, fix it the > exact same way. > > b) buffer fetches are done with a vertex clause, > > c) vertex swizzling offsets are different than texture swizzles, > but we still need to use the combiner, so make it configurable. > > d) add implementation of UCMP. > > TODO: r600/700/cayman testin > Signed-off-by: Dave Airlie > --- > src/gallium/drivers/r600/evergreen_state.c | 55 > src/gallium/drivers/r600/r600_asm.c | 2 +- > src/gallium/drivers/r600/r600_asm.h | 2 + > src/gallium/drivers/r600/r600_pipe.c | 4 +- > src/gallium/drivers/r600/r600_pipe.h | 10 +++- > src/gallium/drivers/r600/r600_shader.c | 75 > > src/gallium/drivers/r600/r600_shader.h | 1 + > src/gallium/drivers/r600/r600_state_common.c | 58 + > src/gallium/drivers/r600/r600_texture.c | 16 -- > 9 files changed, 204 insertions(+), 19 deletions(-) > [snip] > diff --git a/src/gallium/drivers/r600/r600_shader.c > b/src/gallium/drivers/r600/r600_shader.c > index feb7001..60667e7 100644 > --- a/src/gallium/drivers/r600/r600_shader.c > +++ b/src/gallium/drivers/r600/r600_shader.c > @@ -3819,6 +3819,71 @@ static inline unsigned tgsi_tex_get_src_gpr(struct > r600_shader_ctx *ctx, > return ctx->file_offset[inst->Src[index].Register.File] + > inst->Src[index].Register.Index; > } > > +static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean > src_requires_loading) > +{ > + struct r600_bytecode_vtx vtx; > + struct r600_bytecode_alu alu; > + struct tgsi_full_instruction *inst = > &ctx->parse.FullToken.FullInstruction; > + int src_gpr, r, i; > + > + src_gpr = tgsi_tex_get_src_gpr(ctx, 0); > + if (src_requires_loading) { > + for (i = 0; i < 4; i++) { > + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); > + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); > + r600_bytecode_src(&alu.src[0], &ctx->src[0], i); > + alu.dst.sel = ctx->temp_reg; > + alu.dst.chan = i; > + if (i == 3) > + alu.last = 1; > + alu.dst.write = 1; > + r = r600_bytecode_add_alu(ctx->bc, &alu); > + if (r) > + return r; > + } > + src_gpr = ctx->temp_reg; > + } > + > + memset(&vtx, 0, sizeof(vtx)); > + vtx.inst = 0; > + vtx.buffer_id = tgsi_tex_get_src_gpr(ctx, 1) + R600_MAX_CONST_BUFFERS;; > + vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ > + vtx.src_gpr = src_gpr; > + vtx.mega_fetch_count = 16; > + vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + > inst->Dst[0].Register.Index; > + vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; > /* SEL_X */ > + vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; > /* SEL_Y */ > + vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; > /* SEL_Z */ > + vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; > /* SEL_W */ > + vtx.use_const_fields = 1; > + vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ > + According to the docs, srf_mode_all will be ignored if use_const_fields is set. However, based on my tests while running compute shaders, other fields like data_format, which are supposed to be ignored weren't being ignored unless the were set to zero. So, I think it would be safer here to set srf_mode_all to zero and make sure that bit gets set on the resource. > + if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) > + return r; > + return 0; > +} > + Otherwise, this code for vtx fetch looks good to me. One problem I ran into with vtx fetch instructions while working on compute shaders was that the GPU will hang if you write to vtx.src_gpr in the instruction group following the vtx fetch. Here is a simple example: %T2_X = MOV %ZERO %T3_X = VTX_READ_eg %T2_X, 24 %T2_X = MOV %ZERO I'm not sure if this happens on all GPU variants, but I was able to consistently reproduce this on my SUMO. You may want to keep an eye out for this in case you run into any unexplainable hangs. -Tom ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] r600g: texture buffer object + glsl 1.40 enable support
On Sun, Dec 16, 2012 at 8:33 PM, Dave Airlie wrote: > From: Dave Airlie > > This adds TBO support to r600g, and with GLSL 1.40 enabled, > we now get 3.1 core profiles advertised for r600g. > > This code is evergreen only so far, but I don't think there is > much to make it work on r600/700/cayman other than testing. > > a) buffer txq is broken like cube map txq, this sucks, fix it the > exact same way. > > b) buffer fetches are done with a vertex clause, > > c) vertex swizzling offsets are different than texture swizzles, > but we still need to use the combiner, so make it configurable. > > d) add implementation of UCMP. oh this is part of the UBO patch, also the return after txq/txf check is wrong, as some things init texture to 0 (like DDX). Dave. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/2] r600g: texture buffer object + glsl 1.40 enable support
From: Dave Airlie This adds TBO support to r600g, and with GLSL 1.40 enabled, we now get 3.1 core profiles advertised for r600g. This code is evergreen only so far, but I don't think there is much to make it work on r600/700/cayman other than testing. a) buffer txq is broken like cube map txq, this sucks, fix it the exact same way. b) buffer fetches are done with a vertex clause, c) vertex swizzling offsets are different than texture swizzles, but we still need to use the combiner, so make it configurable. d) add implementation of UCMP. TODO: r600/700/cayman testin Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/evergreen_state.c | 55 src/gallium/drivers/r600/r600_asm.c | 2 +- src/gallium/drivers/r600/r600_asm.h | 2 + src/gallium/drivers/r600/r600_pipe.c | 4 +- src/gallium/drivers/r600/r600_pipe.h | 10 +++- src/gallium/drivers/r600/r600_shader.c | 75 src/gallium/drivers/r600/r600_shader.h | 1 + src/gallium/drivers/r600/r600_state_common.c | 58 + src/gallium/drivers/r600/r600_texture.c | 16 -- 9 files changed, 204 insertions(+), 19 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 996c1b4..49564e7 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -969,6 +969,58 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx, return ss; } +static struct pipe_sampler_view * +texture_buffer_sampler_view(struct r600_pipe_sampler_view *view, + unsigned width0, unsigned height0) + +{ + struct pipe_context *ctx = view->base.context; + struct r600_texture *tmp = (struct r600_texture*)view->base.texture; + uint64_t va; + int stride = util_format_get_blocksize(view->base.format); + unsigned format, num_format, format_comp, endian; + unsigned swizzle_res; + unsigned char swizzle[4]; + const struct util_format_description *desc; + + swizzle[0] = view->base.swizzle_r; + swizzle[1] = view->base.swizzle_g; + swizzle[2] = view->base.swizzle_b; + swizzle[3] = view->base.swizzle_a; + + r600_vertex_data_type(view->base.format, + &format, &num_format, &format_comp, + &endian); + + desc = util_format_description(view->base.format); + + swizzle_res = r600_get_swizzle_combined(desc->swizzle, swizzle, TRUE); + + va = r600_resource_va(ctx->screen, view->base.texture); + view->tex_resource = &tmp->resource; + + view->skip_mip_address_reloc = true; + view->tex_resource_words[0] = va; + view->tex_resource_words[1] = width0 - 1; + view->tex_resource_words[2] = S_030008_BASE_ADDRESS_HI(va >> 32UL) | + S_030008_STRIDE(stride) | + S_030008_DATA_FORMAT(format) | + S_030008_NUM_FORMAT_ALL(num_format) | + S_030008_FORMAT_COMP_ALL(format_comp) | + S_030008_SRF_MODE_ALL(1) | + S_030008_ENDIAN_SWAP(endian); + view->tex_resource_words[3] = swizzle_res; + /* +* in theory dword 4 is for number of elements, for use with resinfo, +* but it seems to utterly fail to work, the amd gpu shader analyser +* uses a const buffer to store the element sizes for buffer txq +*/ + view->tex_resource_words[4] = 0; + view->tex_resource_words[5] = view->tex_resource_words[6] = 0; + view->tex_resource_words[7] = S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_BUFFER); + return &view->base; +} + struct pipe_sampler_view * evergreen_create_sampler_view_custom(struct pipe_context *ctx, struct pipe_resource *texture, @@ -997,6 +1049,9 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx, view->base.reference.count = 1; view->base.context = ctx; + if (texture->target == PIPE_BUFFER) + return texture_buffer_sampler_view(view, width0, height0); + swizzle[0] = state->swizzle_r; swizzle[1] = state->swizzle_g; swizzle[2] = state->swizzle_b; diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 268137f..4bb22bd 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -2607,7 +2607,7 @@ void r600_bytecode_dump(struct r600_bytecode *bc) fprintf(stderr, "--\n"); } -static void r600_vertex_data_type(enum pipe_format pformat, +void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, unsigned *num_format, unsigned *format_comp, unsigned *endian) { diff --git a/src/gallium/drivers/r600/r600_asm