Re: [Mesa-dev] [PATCH 2/2] r600g: texture buffer object + glsl 1.40 enable support

2012-12-19 Thread Tom Stellard
On Wed, Dec 19, 2012 at 02:07:22PM -0500, Jerome Glisse wrote:
> On Wed, Dec 19, 2012 at 12:33 PM, Tom Stellard  wrote:
> > On Sun, Dec 16, 2012 at 08:33:23PM +1000, Dave Airlie wrote:
> >> From: Dave Airlie 
> >>
> >> This adds TBO support to r600g, and with GLSL 1.40 enabled,
> >> we now get 3.1 core profiles advertised for r600g.
> >>
> >> This code is evergreen only so far, but I don't think there is
> >> much to make it work on r600/700/cayman other than testing.
> >>
> >> a) buffer txq is broken like cube map txq, this sucks, fix it the
> >> exact same way.
> >>
> >> b) buffer fetches are done with a vertex clause,
> >>
> >> c) vertex swizzling offsets are different than texture swizzles,
> >> but we still need to use the combiner, so make it configurable.
> >>
> >> d) add implementation of UCMP.
> >>
> >> TODO: r600/700/cayman testin
> >> Signed-off-by: Dave Airlie 
> >> ---
> >>  src/gallium/drivers/r600/evergreen_state.c   | 55 
> >>  src/gallium/drivers/r600/r600_asm.c  |  2 +-
> >>  src/gallium/drivers/r600/r600_asm.h  |  2 +
> >>  src/gallium/drivers/r600/r600_pipe.c |  4 +-
> >>  src/gallium/drivers/r600/r600_pipe.h | 10 +++-
> >>  src/gallium/drivers/r600/r600_shader.c   | 75 
> >> 
> >>  src/gallium/drivers/r600/r600_shader.h   |  1 +
> >>  src/gallium/drivers/r600/r600_state_common.c | 58 +
> >>  src/gallium/drivers/r600/r600_texture.c  | 16 --
> >>  9 files changed, 204 insertions(+), 19 deletions(-)
> >>
> >
> > [snip]
> >
> >> diff --git a/src/gallium/drivers/r600/r600_shader.c 
> >> b/src/gallium/drivers/r600/r600_shader.c
> >> index feb7001..60667e7 100644
> >> --- a/src/gallium/drivers/r600/r600_shader.c
> >> +++ b/src/gallium/drivers/r600/r600_shader.c
> >> @@ -3819,6 +3819,71 @@ static inline unsigned tgsi_tex_get_src_gpr(struct 
> >> r600_shader_ctx *ctx,
> >>   return ctx->file_offset[inst->Src[index].Register.File] + 
> >> inst->Src[index].Register.Index;
> >>  }
> >>
> >> +static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean 
> >> src_requires_loading)
> >> +{
> >> + struct r600_bytecode_vtx vtx;
> >> + struct r600_bytecode_alu alu;
> >> + struct tgsi_full_instruction *inst = 
> >> &ctx->parse.FullToken.FullInstruction;
> >> + int src_gpr, r, i;
> >> +
> >> + src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
> >> + if (src_requires_loading) {
> >> + for (i = 0; i < 4; i++) {
> >> + memset(&alu, 0, sizeof(struct r600_bytecode_alu));
> >> + alu.inst = 
> >> CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
> >> + r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
> >> + alu.dst.sel = ctx->temp_reg;
> >> + alu.dst.chan = i;
> >> + if (i == 3)
> >> + alu.last = 1;
> >> + alu.dst.write = 1;
> >> + r = r600_bytecode_add_alu(ctx->bc, &alu);
> >> + if (r)
> >> + return r;
> >> + }
> >> + src_gpr = ctx->temp_reg;
> >> + }
> >> +
> >> + memset(&vtx, 0, sizeof(vtx));
> >> + vtx.inst = 0;
> >> + vtx.buffer_id = tgsi_tex_get_src_gpr(ctx, 1) + 
> >> R600_MAX_CONST_BUFFERS;;
> >> + vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
> >> + vtx.src_gpr = src_gpr;
> >> + vtx.mega_fetch_count = 16;
> >> + vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + 
> >> inst->Dst[0].Register.Index;
> >> + vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;   
> >>/* SEL_X */
> >> + vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;   
> >>/* SEL_Y */
> >> + vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;   
> >>/* SEL_Z */
> >> + vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;   
> >>/* SEL_W */
> >> + vtx.use_const_fields = 1;
> >> + vtx.srf_mode_all = 1;   /* SRF_MODE_NO_ZERO */
> >> +
> >
> > According to the docs, srf_mode_all will be ignored if use_const_fields
> > is set.  However, based on my tests while running compute shaders, other
> > fields like data_format, which are supposed to be ignored weren't being
> > ignored unless the were set to zero.  So, I think it would be safer
> > here to set srf_mode_all to zero and make sure that bit gets set on
> > the resource.
> >
> >
> >> + if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
> >> + return r;
> >> + return 0;
> >> +}
> >> +
> >
> > Otherwise, this code for vtx fetch looks good to me.  One problem I ran into
> > with vtx fetch instructions while working on compute shaders was that
> > the GPU will hang if you write to vtx.src_gpr in the
> > instruction group following the vtx fetch.  Here is a simple example:
> >
> > %T2_X = MOV %ZERO
> > %T3_X = VTX_RE

Re: [Mesa-dev] [PATCH 2/2] r600g: texture buffer object + glsl 1.40 enable support

2012-12-19 Thread Jerome Glisse
On Wed, Dec 19, 2012 at 12:33 PM, Tom Stellard  wrote:
> On Sun, Dec 16, 2012 at 08:33:23PM +1000, Dave Airlie wrote:
>> From: Dave Airlie 
>>
>> This adds TBO support to r600g, and with GLSL 1.40 enabled,
>> we now get 3.1 core profiles advertised for r600g.
>>
>> This code is evergreen only so far, but I don't think there is
>> much to make it work on r600/700/cayman other than testing.
>>
>> a) buffer txq is broken like cube map txq, this sucks, fix it the
>> exact same way.
>>
>> b) buffer fetches are done with a vertex clause,
>>
>> c) vertex swizzling offsets are different than texture swizzles,
>> but we still need to use the combiner, so make it configurable.
>>
>> d) add implementation of UCMP.
>>
>> TODO: r600/700/cayman testin
>> Signed-off-by: Dave Airlie 
>> ---
>>  src/gallium/drivers/r600/evergreen_state.c   | 55 
>>  src/gallium/drivers/r600/r600_asm.c  |  2 +-
>>  src/gallium/drivers/r600/r600_asm.h  |  2 +
>>  src/gallium/drivers/r600/r600_pipe.c |  4 +-
>>  src/gallium/drivers/r600/r600_pipe.h | 10 +++-
>>  src/gallium/drivers/r600/r600_shader.c   | 75 
>> 
>>  src/gallium/drivers/r600/r600_shader.h   |  1 +
>>  src/gallium/drivers/r600/r600_state_common.c | 58 +
>>  src/gallium/drivers/r600/r600_texture.c  | 16 --
>>  9 files changed, 204 insertions(+), 19 deletions(-)
>>
>
> [snip]
>
>> diff --git a/src/gallium/drivers/r600/r600_shader.c 
>> b/src/gallium/drivers/r600/r600_shader.c
>> index feb7001..60667e7 100644
>> --- a/src/gallium/drivers/r600/r600_shader.c
>> +++ b/src/gallium/drivers/r600/r600_shader.c
>> @@ -3819,6 +3819,71 @@ static inline unsigned tgsi_tex_get_src_gpr(struct 
>> r600_shader_ctx *ctx,
>>   return ctx->file_offset[inst->Src[index].Register.File] + 
>> inst->Src[index].Register.Index;
>>  }
>>
>> +static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean 
>> src_requires_loading)
>> +{
>> + struct r600_bytecode_vtx vtx;
>> + struct r600_bytecode_alu alu;
>> + struct tgsi_full_instruction *inst = 
>> &ctx->parse.FullToken.FullInstruction;
>> + int src_gpr, r, i;
>> +
>> + src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
>> + if (src_requires_loading) {
>> + for (i = 0; i < 4; i++) {
>> + memset(&alu, 0, sizeof(struct r600_bytecode_alu));
>> + alu.inst = 
>> CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
>> + r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
>> + alu.dst.sel = ctx->temp_reg;
>> + alu.dst.chan = i;
>> + if (i == 3)
>> + alu.last = 1;
>> + alu.dst.write = 1;
>> + r = r600_bytecode_add_alu(ctx->bc, &alu);
>> + if (r)
>> + return r;
>> + }
>> + src_gpr = ctx->temp_reg;
>> + }
>> +
>> + memset(&vtx, 0, sizeof(vtx));
>> + vtx.inst = 0;
>> + vtx.buffer_id = tgsi_tex_get_src_gpr(ctx, 1) + R600_MAX_CONST_BUFFERS;;
>> + vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
>> + vtx.src_gpr = src_gpr;
>> + vtx.mega_fetch_count = 16;
>> + vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + 
>> inst->Dst[0].Register.Index;
>> + vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 
>>  /* SEL_X */
>> + vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 
>>  /* SEL_Y */
>> + vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 
>>  /* SEL_Z */
>> + vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 
>>  /* SEL_W */
>> + vtx.use_const_fields = 1;
>> + vtx.srf_mode_all = 1;   /* SRF_MODE_NO_ZERO */
>> +
>
> According to the docs, srf_mode_all will be ignored if use_const_fields
> is set.  However, based on my tests while running compute shaders, other
> fields like data_format, which are supposed to be ignored weren't being
> ignored unless the were set to zero.  So, I think it would be safer
> here to set srf_mode_all to zero and make sure that bit gets set on
> the resource.
>
>
>> + if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
>> + return r;
>> + return 0;
>> +}
>> +
>
> Otherwise, this code for vtx fetch looks good to me.  One problem I ran into
> with vtx fetch instructions while working on compute shaders was that
> the GPU will hang if you write to vtx.src_gpr in the
> instruction group following the vtx fetch.  Here is a simple example:
>
> %T2_X = MOV %ZERO
> %T3_X = VTX_READ_eg %T2_X, 24
> %T2_X = MOV %ZERO
>
> I'm not sure if this happens on all GPU variants, but I was able to
> consistently reproduce this on my SUMO.  You may want to keep an eye
> out for this in case you run into any unexplainable hangs.
>

The vtx fetch group had the barrier flag set ?

Cheers,
Jerome

Re: [Mesa-dev] [PATCH 2/2] r600g: texture buffer object + glsl 1.40 enable support

2012-12-19 Thread Tom Stellard
On Sun, Dec 16, 2012 at 08:33:23PM +1000, Dave Airlie wrote:
> From: Dave Airlie 
> 
> This adds TBO support to r600g, and with GLSL 1.40 enabled,
> we now get 3.1 core profiles advertised for r600g.
> 
> This code is evergreen only so far, but I don't think there is
> much to make it work on r600/700/cayman other than testing.
> 
> a) buffer txq is broken like cube map txq, this sucks, fix it the
> exact same way.
> 
> b) buffer fetches are done with a vertex clause,
> 
> c) vertex swizzling offsets are different than texture swizzles,
> but we still need to use the combiner, so make it configurable.
> 
> d) add implementation of UCMP.
> 
> TODO: r600/700/cayman testin
> Signed-off-by: Dave Airlie 
> ---
>  src/gallium/drivers/r600/evergreen_state.c   | 55 
>  src/gallium/drivers/r600/r600_asm.c  |  2 +-
>  src/gallium/drivers/r600/r600_asm.h  |  2 +
>  src/gallium/drivers/r600/r600_pipe.c |  4 +-
>  src/gallium/drivers/r600/r600_pipe.h | 10 +++-
>  src/gallium/drivers/r600/r600_shader.c   | 75 
> 
>  src/gallium/drivers/r600/r600_shader.h   |  1 +
>  src/gallium/drivers/r600/r600_state_common.c | 58 +
>  src/gallium/drivers/r600/r600_texture.c  | 16 --
>  9 files changed, 204 insertions(+), 19 deletions(-)
> 

[snip]

> diff --git a/src/gallium/drivers/r600/r600_shader.c 
> b/src/gallium/drivers/r600/r600_shader.c
> index feb7001..60667e7 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -3819,6 +3819,71 @@ static inline unsigned tgsi_tex_get_src_gpr(struct 
> r600_shader_ctx *ctx,
>   return ctx->file_offset[inst->Src[index].Register.File] + 
> inst->Src[index].Register.Index;
>  }
>  
> +static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean 
> src_requires_loading)
> +{
> + struct r600_bytecode_vtx vtx;
> + struct r600_bytecode_alu alu;
> + struct tgsi_full_instruction *inst = 
> &ctx->parse.FullToken.FullInstruction;
> + int src_gpr, r, i;
> +
> + src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
> + if (src_requires_loading) {
> + for (i = 0; i < 4; i++) {
> + memset(&alu, 0, sizeof(struct r600_bytecode_alu));
> + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
> + r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
> + alu.dst.sel = ctx->temp_reg;
> + alu.dst.chan = i;
> + if (i == 3)
> + alu.last = 1;
> + alu.dst.write = 1;
> + r = r600_bytecode_add_alu(ctx->bc, &alu);
> + if (r)
> + return r;
> + }
> + src_gpr = ctx->temp_reg;
> + }
> +
> + memset(&vtx, 0, sizeof(vtx));
> + vtx.inst = 0;
> + vtx.buffer_id = tgsi_tex_get_src_gpr(ctx, 1) + R600_MAX_CONST_BUFFERS;;
> + vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
> + vtx.src_gpr = src_gpr;
> + vtx.mega_fetch_count = 16;
> + vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + 
> inst->Dst[0].Register.Index;
> + vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;  
> /* SEL_X */
> + vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;  
> /* SEL_Y */
> + vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;  
> /* SEL_Z */
> + vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;  
> /* SEL_W */
> + vtx.use_const_fields = 1;
> + vtx.srf_mode_all = 1;   /* SRF_MODE_NO_ZERO */
> +

According to the docs, srf_mode_all will be ignored if use_const_fields
is set.  However, based on my tests while running compute shaders, other
fields like data_format, which are supposed to be ignored weren't being
ignored unless the were set to zero.  So, I think it would be safer
here to set srf_mode_all to zero and make sure that bit gets set on
the resource.


> + if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
> + return r;
> + return 0;
> +}
> +

Otherwise, this code for vtx fetch looks good to me.  One problem I ran into
with vtx fetch instructions while working on compute shaders was that
the GPU will hang if you write to vtx.src_gpr in the
instruction group following the vtx fetch.  Here is a simple example:

%T2_X = MOV %ZERO
%T3_X = VTX_READ_eg %T2_X, 24
%T2_X = MOV %ZERO

I'm not sure if this happens on all GPU variants, but I was able to
consistently reproduce this on my SUMO.  You may want to keep an eye
out for this in case you run into any unexplainable hangs.

-Tom
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] r600g: texture buffer object + glsl 1.40 enable support

2012-12-16 Thread Dave Airlie
On Sun, Dec 16, 2012 at 8:33 PM, Dave Airlie  wrote:
> From: Dave Airlie 
>
> This adds TBO support to r600g, and with GLSL 1.40 enabled,
> we now get 3.1 core profiles advertised for r600g.
>
> This code is evergreen only so far, but I don't think there is
> much to make it work on r600/700/cayman other than testing.
>
> a) buffer txq is broken like cube map txq, this sucks, fix it the
> exact same way.
>
> b) buffer fetches are done with a vertex clause,
>
> c) vertex swizzling offsets are different than texture swizzles,
> but we still need to use the combiner, so make it configurable.
>
> d) add implementation of UCMP.

oh this is part of the UBO patch,
also the return after txq/txf check is wrong, as some things init
texture to 0 (like DDX).

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] r600g: texture buffer object + glsl 1.40 enable support

2012-12-16 Thread Dave Airlie
From: Dave Airlie 

This adds TBO support to r600g, and with GLSL 1.40 enabled,
we now get 3.1 core profiles advertised for r600g.

This code is evergreen only so far, but I don't think there is
much to make it work on r600/700/cayman other than testing.

a) buffer txq is broken like cube map txq, this sucks, fix it the
exact same way.

b) buffer fetches are done with a vertex clause,

c) vertex swizzling offsets are different than texture swizzles,
but we still need to use the combiner, so make it configurable.

d) add implementation of UCMP.

TODO: r600/700/cayman testin
Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/r600/evergreen_state.c   | 55 
 src/gallium/drivers/r600/r600_asm.c  |  2 +-
 src/gallium/drivers/r600/r600_asm.h  |  2 +
 src/gallium/drivers/r600/r600_pipe.c |  4 +-
 src/gallium/drivers/r600/r600_pipe.h | 10 +++-
 src/gallium/drivers/r600/r600_shader.c   | 75 
 src/gallium/drivers/r600/r600_shader.h   |  1 +
 src/gallium/drivers/r600/r600_state_common.c | 58 +
 src/gallium/drivers/r600/r600_texture.c  | 16 --
 9 files changed, 204 insertions(+), 19 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 996c1b4..49564e7 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -969,6 +969,58 @@ static void *evergreen_create_sampler_state(struct 
pipe_context *ctx,
return ss;
 }
 
+static struct pipe_sampler_view *
+texture_buffer_sampler_view(struct r600_pipe_sampler_view *view,
+   unsigned width0, unsigned height0)
+   
+{
+   struct pipe_context *ctx = view->base.context;
+   struct r600_texture *tmp = (struct r600_texture*)view->base.texture;
+   uint64_t va;
+   int stride = util_format_get_blocksize(view->base.format);
+   unsigned format, num_format, format_comp, endian;
+   unsigned swizzle_res;
+   unsigned char swizzle[4];
+   const struct util_format_description *desc;
+
+   swizzle[0] = view->base.swizzle_r;
+   swizzle[1] = view->base.swizzle_g;
+   swizzle[2] = view->base.swizzle_b;
+   swizzle[3] = view->base.swizzle_a;
+
+   r600_vertex_data_type(view->base.format,
+ &format, &num_format, &format_comp,
+ &endian);
+
+   desc = util_format_description(view->base.format);
+
+   swizzle_res = r600_get_swizzle_combined(desc->swizzle, swizzle, TRUE);
+
+   va = r600_resource_va(ctx->screen, view->base.texture);
+   view->tex_resource = &tmp->resource;
+
+   view->skip_mip_address_reloc = true;
+   view->tex_resource_words[0] = va;
+   view->tex_resource_words[1] = width0 - 1;
+   view->tex_resource_words[2] = S_030008_BASE_ADDRESS_HI(va >> 32UL) |
+   S_030008_STRIDE(stride) |
+   S_030008_DATA_FORMAT(format) |
+   S_030008_NUM_FORMAT_ALL(num_format) |
+   S_030008_FORMAT_COMP_ALL(format_comp) |
+   S_030008_SRF_MODE_ALL(1) |
+   S_030008_ENDIAN_SWAP(endian);
+   view->tex_resource_words[3] = swizzle_res;
+   /*
+* in theory dword 4 is for number of elements, for use with resinfo,
+* but it seems to utterly fail to work, the amd gpu shader analyser
+* uses a const buffer to store the element sizes for buffer txq
+*/
+   view->tex_resource_words[4] = 0;
+   view->tex_resource_words[5] = view->tex_resource_words[6] = 0;
+   view->tex_resource_words[7] = 
S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_BUFFER);
+   return &view->base;
+}
+
 struct pipe_sampler_view *
 evergreen_create_sampler_view_custom(struct pipe_context *ctx,
 struct pipe_resource *texture,
@@ -997,6 +1049,9 @@ evergreen_create_sampler_view_custom(struct pipe_context 
*ctx,
view->base.reference.count = 1;
view->base.context = ctx;
 
+   if (texture->target == PIPE_BUFFER)
+   return texture_buffer_sampler_view(view, width0, height0);
+
swizzle[0] = state->swizzle_r;
swizzle[1] = state->swizzle_g;
swizzle[2] = state->swizzle_b;
diff --git a/src/gallium/drivers/r600/r600_asm.c 
b/src/gallium/drivers/r600/r600_asm.c
index 268137f..4bb22bd 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -2607,7 +2607,7 @@ void r600_bytecode_dump(struct r600_bytecode *bc)
fprintf(stderr, "--\n");
 }
 
-static void r600_vertex_data_type(enum pipe_format pformat,
+void r600_vertex_data_type(enum pipe_format pformat,
  unsigned *format,
  unsigned *num_format, unsigned *format_comp, 
unsigned *endian)
 {
diff --git a/src/gallium/drivers/r600/r600_asm