Am 02.02.2018 um 00:23 schrieb Dave Airlie: > On 2 February 2018 at 09:23, Dave Airlie <airl...@gmail.com> wrote: >> On 2 February 2018 at 02:05, Roland Scheidegger <srol...@vmware.com> wrote: >>> Am 01.02.2018 um 09:21 schrieb Dave Airlie: >>>> From: Dave Airlie <airl...@redhat.com> >>>> >>>> This passes the CTS and piglit tests. >>>> >>>> This also disable sb for helper invocations until it doesn't >>>> mess up the VPM flags. >>>> >>>> Thanks to Ilia and Glenn for advice, and Roland for working >>>> out the working evergreen path. >>>> --- >>>> src/gallium/drivers/r600/r600_asm.c | 7 +- >>>> src/gallium/drivers/r600/r600_isa.c | 1 + >>>> src/gallium/drivers/r600/r600_isa.h | 5 +- >>>> src/gallium/drivers/r600/r600_shader.c | 113 >>>> +++++++++++++++++++++++++++++++++ >>>> src/gallium/drivers/r600/r600_shader.h | 1 + >>>> src/gallium/drivers/r600/r600_sq.h | 2 + >>>> 6 files changed, 126 insertions(+), 3 deletions(-) >>>> >>>> diff --git a/src/gallium/drivers/r600/r600_asm.c >>>> b/src/gallium/drivers/r600/r600_asm.c >>>> index 21d069d..ec2d34e 100644 >>>> --- a/src/gallium/drivers/r600/r600_asm.c >>>> +++ b/src/gallium/drivers/r600/r600_asm.c >>>> @@ -2099,9 +2099,12 @@ void r600_bytecode_disasm(struct r600_bytecode *bc) >>>> fprintf(stderr, "%04d %08X %08X %s ", id, >>>> bc->bytecode[id], >>>> bc->bytecode[id + 1], >>>> cfop->name); >>>> fprintf(stderr, "%d @%d ", cf->ndw / 4, >>>> cf->addr); >>>> - fprintf(stderr, "\n"); >>>> + if (cf->vpm) >>>> + fprintf(stderr, "VPM "); >>>> if (cf->end_of_program) >>>> fprintf(stderr, "EOP "); >>>> + fprintf(stderr, "\n"); >>>> + >>>> } else if (cfop->flags & CF_EXP) { >>>> int o = 0; >>>> const char *exp_type[] = {"PIXEL", "POS ", >>>> "PARAM"}; >>>> @@ -2198,6 +2201,8 @@ void r600_bytecode_disasm(struct r600_bytecode *bc) >>>> fprintf(stderr, "POP:%X ", >>>> cf->pop_count); >>>> if (cf->count && (cfop->flags & CF_EMIT)) >>>> fprintf(stderr, "STREAM%d ", >>>> cf->count); >>>> + if (cf->vpm) >>>> + fprintf(stderr, "VPM "); >>>> if (cf->end_of_program) >>>> fprintf(stderr, "EOP "); >>>> fprintf(stderr, "\n"); >>>> diff --git a/src/gallium/drivers/r600/r600_isa.c >>>> b/src/gallium/drivers/r600/r600_isa.c >>>> index 2633cdc..611b370 100644 >>>> --- a/src/gallium/drivers/r600/r600_isa.c >>>> +++ b/src/gallium/drivers/r600/r600_isa.c >>>> @@ -506,6 +506,7 @@ static const struct cf_op_info cf_op_table[] = { >>>> {"ALU_EXT", { -1, -1, 0x0C, 0x0C >>>> }, CF_CLAUSE | CF_ALU | CF_ALU_EXT }, >>>> {"ALU_CONTINUE", { 0x0D, 0x0D, 0x0D, -1 >>>> }, CF_CLAUSE | CF_ALU }, >>>> {"ALU_BREAK", { 0x0E, 0x0E, 0x0E, -1 >>>> }, CF_CLAUSE | CF_ALU }, >>>> + {"ALU_VALID_PIXEL_MODE", { -1, -1, -1, 0x0E >>>> }, CF_CLAUSE | CF_ALU }, >>>> {"ALU_ELSE_AFTER", { 0x0F, 0x0F, 0x0F, 0x0F >>>> }, CF_CLAUSE | CF_ALU }, >>>> {"CF_NATIVE", { 0x00, 0x00, 0x00, 0x00 >>>> }, 0 } >>>> }; >>>> diff --git a/src/gallium/drivers/r600/r600_isa.h >>>> b/src/gallium/drivers/r600/r600_isa.h >>>> index f6e2697..fcaf1f7 100644 >>>> --- a/src/gallium/drivers/r600/r600_isa.h >>>> +++ b/src/gallium/drivers/r600/r600_isa.h >>>> @@ -646,10 +646,11 @@ struct cf_op_info >>>> #define CF_OP_ALU_EXT 84 >>>> #define CF_OP_ALU_CONTINUE 85 >>>> #define CF_OP_ALU_BREAK 86 >>>> -#define CF_OP_ALU_ELSE_AFTER 87 >>>> +#define CF_OP_ALU_VALID_PIXEL_MODE 87 >>>> +#define CF_OP_ALU_ELSE_AFTER 88 >>>> >>>> /* CF_NATIVE means that r600_bytecode_cf contains pre-encoded native data >>>> */ >>>> -#define CF_NATIVE 88 >>>> +#define CF_NATIVE 89 >>>> >>>> enum r600_chip_class { >>>> ISA_CC_R600, >>>> diff --git a/src/gallium/drivers/r600/r600_shader.c >>>> b/src/gallium/drivers/r600/r600_shader.c >>>> index a462691..9388db9 100644 >>>> --- a/src/gallium/drivers/r600/r600_shader.c >>>> +++ b/src/gallium/drivers/r600/r600_shader.c >>>> @@ -197,6 +197,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx, >>>> >>>> use_sb &= !shader->shader.uses_atomics; >>>> use_sb &= !shader->shader.uses_images; >>>> + use_sb &= !shader->shader.uses_helper_invocation; >>>> >>>> /* Check if the bytecode has already been built. */ >>>> if (!shader->shader.bc.bytecode) { >>>> @@ -346,6 +347,7 @@ struct r600_shader_ctx { >>>> boolean clip_vertex_write; >>>> unsigned cv_output; >>>> unsigned edgeflag_output; >>>> + int helper_invoc_reg; >>>> int cs_block_size_reg; >>>> int cs_grid_size_reg; >>>> bool cs_block_size_loaded, cs_grid_size_loaded; >>>> @@ -1295,6 +1297,93 @@ static int load_sample_position(struct >>>> r600_shader_ctx *ctx, struct r600_shader_ >>>> return t1; >>>> } >>>> >>>> +static int eg_load_helper_invocation(struct r600_shader_ctx *ctx) >>>> +{ >>>> + int r; >>>> + struct r600_bytecode_alu alu; >>>> + >>>> + /* do a vtx fetch with wqm set on the vtx fetch */ >>>> + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); >>>> + alu.op = ALU_OP1_MOV; >>>> + alu.dst.sel = ctx->helper_invoc_reg; >>>> + alu.dst.chan = 0; >>>> + alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; >>>> + alu.src[0].value = 0xffffffff; >>>> + alu.dst.write = 1; >>>> + alu.last = 1; >>>> + r = r600_bytecode_add_alu(ctx->bc, &alu); >>>> + if (r) >>>> + return r; >>>> + >>>> + /* do a vtx fetch in VPM mode */ >>>> + struct r600_bytecode_vtx vtx; >>>> + memset(&vtx, 0, sizeof(vtx)); >>>> + vtx.op = FETCH_OP_GET_BUFFER_RESINFO; >>>> + vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER; >>>> + vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; >>>> + vtx.src_gpr = 0; >>>> + vtx.mega_fetch_count = 16; /* no idea here really... */ >>>> + vtx.dst_gpr = ctx->helper_invoc_reg; >>>> + vtx.dst_sel_x = 4; >>>> + vtx.dst_sel_y = 7; /* SEL_Y */ >>>> + vtx.dst_sel_z = 7; /* SEL_Z */ >>>> + vtx.dst_sel_w = 7; /* SEL_W */ >>>> + vtx.data_format = FMT_32; >>>> + if ((r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx))) >>>> + return r; >>>> + ctx->bc->cf_last->vpm = 1; >>>> + >>>> + /* compare the result with 0 */ >>>> + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); >>>> + alu.op = ALU_OP3_CNDE_INT; >>>> + alu.is_op3 = 1; >>>> + alu.dst.sel = ctx->helper_invoc_reg; >>>> + alu.dst.chan = 0; >>>> + alu.dst.write = 1; >>>> + alu.src[0].sel = ctx->helper_invoc_reg; >>>> + alu.src[0].chan = 0; >>>> + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; >>>> + alu.src[1].value = 0x0; >>>> + alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; >>>> + alu.src[2].value = 0xffffffff; >>>> + alu.last = 1; >>>> + r = r600_bytecode_add_alu(ctx->bc, &alu); >>>> + if (r) >>>> + return r; >>> I realized this only later, this alu conditional is completely >>> unnecessary, just skip it... > > It might be for this test, but I don't think it is in general. We want boolean > which I think is 0 or 0xffffffff, not 0.0 or 1.0.
The initial alu reg write was 0xffffffff, and the resinfo gives you back a fixed 0, so... Roland _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev