On Wed, 2012-01-18 at 21:49 +0000, Dave Airlie wrote:
> From: Dave Airlie <[email protected]>
> 
> So it appears R600s (except rv670) do AR handling different using a different
> opcode. This patch fixes up r600g to work properly on r600.
> 
> This fixes ~100 piglit tests here (in GLSL1.30 mode) on rv610.
> 
> v3: add index_mode as per the docs.
> 
> This still fails any dst relative tests for some reason I can't quite see yet,
> but it passes a lot more tests than without.

I guess it's the problem described in the r6xx_r7xx_3d.pdf:

"6.1.4 Shader GPR Indexing may return incorrect result
This affects R600, RV630 and RV610, but not RV670 or RS780.
...
        MOV R[A0.x +2], R33
        ADD R20, R20, R2 // H/w thinks R2 is the same as the prev dest 
                                and will substitute PV"

Vadim

> 
> Signed-off-by: Dave Airlie <[email protected]>
> ---
>  src/gallium/drivers/r600/r600_asm.c    |   48 ++++++++++++++++++++++++++++---
>  src/gallium/drivers/r600/r600_asm.h    |    8 ++++-
>  src/gallium/drivers/r600/r600_shader.c |    6 +++-
>  src/gallium/drivers/r600/r600_sq.h     |    7 ++++
>  4 files changed, 62 insertions(+), 7 deletions(-)
> 
> diff --git a/src/gallium/drivers/r600/r600_asm.c 
> b/src/gallium/drivers/r600/r600_asm.c
> index 8234744..aad286b 100644
> --- a/src/gallium/drivers/r600/r600_asm.c
> +++ b/src/gallium/drivers/r600/r600_asm.c
> @@ -94,6 +94,7 @@ static inline unsigned int 
> r600_bytecode_get_num_operands(struct r600_bytecode *
>               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV:
>               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA:
>               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR:
> +             case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT:
>               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT:
>               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT:
>               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR:
> @@ -249,10 +250,11 @@ static struct r600_bytecode_tex *r600_bytecode_tex(void)
>       return tex;
>  }
>  
> -void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class)
> +void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class 
> chip_class, unsigned ar_handling)
>  {
>       LIST_INITHEAD(&bc->cf);
>       bc->chip_class = chip_class;
> +     bc->ar_handling = ar_handling;
>  }
>  
>  static int r600_bytecode_add_cf(struct r600_bytecode *bc)
> @@ -441,7 +443,8 @@ static int is_alu_mova_inst(struct r600_bytecode *bc, 
> struct r600_bytecode_alu *
>               return !alu->is_op3 && (
>                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA ||
>                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR 
> ||
> -                     alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT);
> +                     alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT ||
> +                     alu->inst == 
> V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT);
>       case EVERGREEN:
>       case CAYMAN:
>       default:
> @@ -457,7 +460,8 @@ static int is_alu_vec_unit_inst(struct r600_bytecode *bc, 
> struct r600_bytecode_a
>       case R600:
>       case R700:
>               return is_alu_reduction_inst(bc, alu) ||
> -                     is_alu_mova_inst(bc, alu);
> +                     (is_alu_mova_inst(bc, alu) && 
> +                      (alu->inst != 
> V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT));
>       case EVERGREEN:
>       case CAYMAN:
>       default:
> @@ -478,6 +482,7 @@ static int is_alu_trans_unit_inst(struct r600_bytecode 
> *bc, struct r600_bytecode
>       case R700:
>               if (!alu->is_op3)
>                       return alu->inst == 
> V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT ||
> +                             alu->inst == 
> V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT ||
>                               alu->inst == 
> V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT ||
>                               alu->inst == 
> V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT ||
>                               alu->inst == 
> V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT ||
> @@ -1236,12 +1241,43 @@ static int r600_bytecode_alloc_kcache_lines(struct 
> r600_bytecode *bc, struct r60
>       return 0;
>  }
>  
> +
> +/* load AR register from gpr (bc->ar_reg) with MOVA_INT */
> +static int load_ar_r6xx(struct r600_bytecode *bc)
> +{
> +     struct r600_bytecode_alu alu;
> +     int r;
> +
> +     if (bc->ar_loaded)
> +             return 0;
> +
> +     /* hack to avoid making MOVA the last instruction in the clause */
> +     if ((bc->cf_last->ndw>>1) >= 110)
> +             bc->force_add_cf = 1;
> +
> +     memset(&alu, 0, sizeof(alu));
> +     alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT;
> +     alu.src[0].sel = bc->ar_reg;
> +     alu.last = 1;
> +     alu.index_mode = INDEX_MODE_LOOP;
> +     r = r600_bytecode_add_alu(bc, &alu);
> +     if (r)
> +             return r;
> +
> +     bc->cf_last->r6xx_uses_waterfall = 1;
> +     bc->ar_loaded = 1;
> +     return 0;
> +}
> +
>  /* load AR register from gpr (bc->ar_reg) with MOVA_INT */
>  static int load_ar(struct r600_bytecode *bc)
>  {
>       struct r600_bytecode_alu alu;
>       int r;
>  
> +     if (bc->ar_handling)
> +             return load_ar_r6xx(bc);
> +
>       if (bc->ar_loaded)
>               return 0;
>  
> @@ -1599,6 +1635,7 @@ static int r600_bytecode_alu_build(struct r600_bytecode 
> *bc, struct r600_bytecod
>                               S_SQ_ALU_WORD0_SRC1_REL(alu->src[1].rel) |
>                               S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) |
>                               S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) |
> +                             S_SQ_ALU_WORD0_INDEX_MODE(alu->index_mode) |
>                               S_SQ_ALU_WORD0_LAST(alu->last);
>  
>       if (alu->is_op3) {
> @@ -2286,7 +2323,8 @@ void r600_bytecode_dump(struct r600_bytecode *bc)
>                       fprintf(stderr, "SRC1(SEL:%d ", alu->src[1].sel);
>                       fprintf(stderr, "REL:%d ", alu->src[1].rel);
>                       fprintf(stderr, "CHAN:%d ", alu->src[1].chan);
> -                     fprintf(stderr, "NEG:%d) ", alu->src[1].neg);
> +                     fprintf(stderr, "NEG:%d ", alu->src[1].neg);
> +                     fprintf(stderr, "IM:%d) ", alu->index_mode);
>                       fprintf(stderr, "LAST:%d)\n", alu->last);
>                       id++;
>                       fprintf(stderr, "%04d %08X %c ", id, bc->bytecode[id], 
> alu->last ? '*' : ' ');
> @@ -2565,7 +2603,7 @@ int r600_vertex_elements_build_fetch_shader(struct 
> r600_pipe_context *rctx, stru
>       }
>  
>       memset(&bc, 0, sizeof(bc));
> -     r600_bytecode_init(&bc, rctx->chip_class);
> +     r600_bytecode_init(&bc, rctx->chip_class, 0);
>  
>       for (i = 0; i < ve->count; i++) {
>               if (elements[i].instance_divisor > 1) {
> diff --git a/src/gallium/drivers/r600/r600_asm.h 
> b/src/gallium/drivers/r600/r600_asm.h
> index d0ff75d..40abb25 100644
> --- a/src/gallium/drivers/r600/r600_asm.h
> +++ b/src/gallium/drivers/r600/r600_asm.h
> @@ -54,6 +54,7 @@ struct r600_bytecode_alu {
>       unsigned                        bank_swizzle;
>       unsigned                        bank_swizzle_force;
>       unsigned                        omod;
> +     unsigned                        index_mode;
>  };
>  
>  struct r600_bytecode_tex {
> @@ -176,6 +177,10 @@ struct r600_cf_callstack {
>       int                             max;
>  };
>  
> +#define AR_HANDLE_NORMAL 0
> +#define AR_HANDLE_RV6XX 1 /* except RV670 */
> +
> +
>  struct r600_bytecode {
>       enum chip_class                 chip_class;
>       int                             type;
> @@ -194,13 +199,14 @@ struct r600_bytecode {
>       struct r600_cf_callstack        callstack[SQ_MAX_CALL_DEPTH];
>       unsigned        ar_loaded;
>       unsigned        ar_reg;
> +     unsigned        ar_handling;
>  };
>  
>  /* eg_asm.c */
>  int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf 
> *cf);
>  
>  /* r600_asm.c */
> -void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class 
> chip_class);
> +void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class 
> chip_class, unsigned ar_handling);
>  void r600_bytecode_clear(struct r600_bytecode *bc);
>  int r600_bytecode_add_alu(struct r600_bytecode *bc, const struct 
> r600_bytecode_alu *alu);
>  int r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct 
> r600_bytecode_vtx *vtx);
> diff --git a/src/gallium/drivers/r600/r600_shader.c 
> b/src/gallium/drivers/r600/r600_shader.c
> index 59d41cf..1f19190 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -804,10 +804,14 @@ static int r600_shader_from_tgsi(struct 
> r600_pipe_context * rctx, struct r600_pi
>       unsigned output_done, noutput;
>       unsigned opcode;
>       int i, j, r = 0, pos0;
> +     unsigned ar_handling = AR_HANDLE_NORMAL;
> +
> +     if ((rctx->chip_class == R600) && (rctx->family != CHIP_RV670))
> +             ar_handling = AR_HANDLE_RV6XX;
>  
>       ctx.bc = &shader->bc;
>       ctx.shader = shader;
> -     r600_bytecode_init(ctx.bc, rctx->chip_class);
> +     r600_bytecode_init(ctx.bc, rctx->chip_class, ar_handling);
>       ctx.tokens = tokens;
>       tgsi_scan_shader(tokens, &ctx.info);
>       tgsi_parse_init(&ctx.parse, tokens);
> diff --git a/src/gallium/drivers/r600/r600_sq.h 
> b/src/gallium/drivers/r600/r600_sq.h
> index b9c4126..4b2a19a 100644
> --- a/src/gallium/drivers/r600/r600_sq.h
> +++ b/src/gallium/drivers/r600/r600_sq.h
> @@ -471,4 +471,11 @@
>  #define SQ_ALU_SCL_122                           0x00000001
>  #define SQ_ALU_SCL_212                           0x00000002
>  #define SQ_ALU_SCL_221                           0x00000003
> +
> +#define   INDEX_MODE_AR_X 0
> +#define   INDEX_MODE_AR_Y 1
> +#define   INDEX_MODE_AR_Z 2
> +#define   INDEX_MODE_AR_W 3
> +#define   INDEX_MODE_LOOP 4
> +
>  #endif



_______________________________________________
mesa-dev mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to