Re: [Mesa-dev] Was: Re: [PATCH] r600g: Add support for PK2H/UP2H
Pushed. Thanks for the reminder. Marek On Wed, Sep 21, 2016 at 11:20 PM, Dieter Nützelwrote: > Ping. - Again. > > Ilia and Marek voted for it. > > Any progress? > Anyone, Marek, Nicolai? > Should I rebase? > > Dieter > >> [Mesa-dev] [PATCH] r600g: Add support for PK2H/UP2H >> >> Glenn Kennard glenn.kennard at gmail.com >> Sun Jan 3 14:47:18 PST 2016 >> Previous message: [Mesa-dev] [PATCH 1/2] WIP gallivm: add support for >> PK2H/UP2H Next message: [Mesa-dev] [PATCH] mesa: use gl_shader_variable in >> program resource list Messages sorted by: [ date ] [ thread ] [ subject ] >> [ >> author ] >> Based off of Ilia's original patch, but with output values replicated so >> that it matches the TGSI semantics. >> >> Signed-off-by: Glenn Kennard >> --- >> >> src/gallium/drivers/r600/r600_pipe.c | 2 +- >> src/gallium/drivers/r600/r600_shader.c | 107 >> +++-- 2 files changed, 104 insertions(+), 5 >> deletions(-) >> >> diff --git a/src/gallium/drivers/r600/r600_pipe.c >> b/src/gallium/drivers/r600/r600_pipe.c index d71082f..3b5d26c 100644 >> --- a/src/gallium/drivers/r600/r600_pipe.c >> +++ b/src/gallium/drivers/r600/r600_pipe.c >> @@ -328,6 +328,7 @@ static int r600_get_param(struct pipe_screen* pscreen, >> enum pipe_cap param)> >> case PIPE_CAP_TEXTURE_QUERY_LOD: >> case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: >> >> case PIPE_CAP_SAMPLER_VIEW_TARGET: >> + case PIPE_CAP_TGSI_PACK_HALF_FLOAT: >> return family >= CHIP_CEDAR ? 1 : 0; >> case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: >> return family >= CHIP_CEDAR ? 4 : 0; >> >> @@ -349,7 +350,6 @@ static int r600_get_param(struct pipe_screen* pscreen, >> enum pipe_cap param)> >> case PIPE_CAP_SHAREABLE_SHADERS: >> case PIPE_CAP_CLEAR_TEXTURE: >> >> case PIPE_CAP_DRAW_PARAMETERS: >> - case PIPE_CAP_TGSI_PACK_HALF_FLOAT: >> return 0; >> case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: >> diff --git a/src/gallium/drivers/r600/r600_shader.c >> b/src/gallium/drivers/r600/r600_shader.c index 9c040ae..7b1eade 100644 >> --- a/src/gallium/drivers/r600/r600_shader.c >> +++ b/src/gallium/drivers/r600/r600_shader.c >> @@ -8960,6 +8960,105 @@ static int tgsi_umad(struct r600_shader_ctx *ctx) >> >> return 0; >> >> } >> >> +static int tgsi_pk2h(struct r600_shader_ctx *ctx) >> +{ >> + struct tgsi_full_instruction *inst = >> >parse.FullToken.FullInstruction; >> + struct r600_bytecode_alu alu; >> + int r, i; >> + int lasti = >> tgsi_last_instruction(inst->Dst[0].Register.WriteMask); >> + >> + /* temp.xy = f32_to_f16(src) */ >> + memset(, 0, sizeof(struct r600_bytecode_alu)); >> + alu.op = ALU_OP1_FLT32_TO_FLT16; >> + alu.dst.chan = 0; >> + alu.dst.sel = ctx->temp_reg; >> + alu.dst.write = 1; >> + r600_bytecode_src([0], >src[0], 0); >> + r = r600_bytecode_add_alu(ctx->bc, ); >> + if (r) >> + return r; >> + alu.dst.chan = 1; >> + r600_bytecode_src([0], >src[0], 1); >> + alu.last = 1; >> + r = r600_bytecode_add_alu(ctx->bc, ); >> + if (r) >> + return r; >> + >> + /* dst.x = temp.y * 0x1 + temp.x */ >> + for (i = 0; i < lasti + 1; i++) { >> + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) >> + continue; >> + >> + memset(, 0, sizeof(struct r600_bytecode_alu)); >> + alu.op = ALU_OP3_MULADD_UINT24; >> + alu.is_op3 = 1; >> + tgsi_dst(ctx, >Dst[0], i, ); >> + alu.last = i == lasti; >> + alu.src[0].sel = ctx->temp_reg; >> + alu.src[0].chan = 1; >> + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; >> + alu.src[1].value = 0x1; >> + alu.src[2].sel = ctx->temp_reg; >> + alu.src[2].chan = 0; >> + r = r600_bytecode_add_alu(ctx->bc, ); >> + if (r) >> + return r; >> + } >> + >> + return 0; >> +} >> + >> +static int tgsi_up2h(struct r600_shader_ctx *ctx) >> +{ >> + struct tgsi_full_instruction *inst = >> >parse.FullToken.FullInstruction; >> + struct r600_bytecode_alu alu; >> + int r, i; >> + int lasti = >> tgsi_last_instruction(inst->Dst[0].Register.WriteMask); >> + >> + /* temp.x = src.x */ >> + /* note: no need to mask out the high bits */ >> + memset(, 0, sizeof(struct r600_bytecode_alu)); >> + alu.op = ALU_OP1_MOV; >> + alu.dst.chan = 0; >> + alu.dst.sel = ctx->temp_reg; >> + alu.dst.write = 1; >> + r600_bytecode_src([0], >src[0], 0); >> + r = r600_bytecode_add_alu(ctx->bc, ); >> + if (r) >> + return r; >> + >> + /* temp.y = src.x >> 16 */ >> + memset(, 0, sizeof(struct r600_bytecode_alu)); >> + alu.op =
[Mesa-dev] Was: Re: [PATCH] r600g: Add support for PK2H/UP2H
Ping. - Again. Ilia and Marek voted for it. Any progress? Anyone, Marek, Nicolai? Should I rebase? Dieter [Mesa-dev] [PATCH] r600g: Add support for PK2H/UP2H Glenn Kennard glenn.kennard at gmail.com Sun Jan 3 14:47:18 PST 2016 Previous message: [Mesa-dev] [PATCH 1/2] WIP gallivm: add support for PK2H/UP2H Next message: [Mesa-dev] [PATCH] mesa: use gl_shader_variable in program resource list Messages sorted by: [ date ] [ thread ] [ subject ] [ author ] Based off of Ilia's original patch, but with output values replicated so that it matches the TGSI semantics. Signed-off-by: Glenn Kennard --- src/gallium/drivers/r600/r600_pipe.c | 2 +- src/gallium/drivers/r600/r600_shader.c | 107 +++-- 2 files changed, 104 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index d71082f..3b5d26c 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -328,6 +328,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)> case PIPE_CAP_TEXTURE_QUERY_LOD: case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: case PIPE_CAP_SAMPLER_VIEW_TARGET: + case PIPE_CAP_TGSI_PACK_HALF_FLOAT: return family >= CHIP_CEDAR ? 1 : 0; case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: return family >= CHIP_CEDAR ? 4 : 0; @@ -349,7 +350,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)> case PIPE_CAP_SHAREABLE_SHADERS: case PIPE_CAP_CLEAR_TEXTURE: case PIPE_CAP_DRAW_PARAMETERS: - case PIPE_CAP_TGSI_PACK_HALF_FLOAT: return 0; case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 9c040ae..7b1eade 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -8960,6 +8960,105 @@ static int tgsi_umad(struct r600_shader_ctx *ctx) return 0; } +static int tgsi_pk2h(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = >parse.FullToken.FullInstruction; + struct r600_bytecode_alu alu; + int r, i; + int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); + + /* temp.xy = f32_to_f16(src) */ + memset(, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP1_FLT32_TO_FLT16; + alu.dst.chan = 0; + alu.dst.sel = ctx->temp_reg; + alu.dst.write = 1; + r600_bytecode_src([0], >src[0], 0); + r = r600_bytecode_add_alu(ctx->bc, ); + if (r) + return r; + alu.dst.chan = 1; + r600_bytecode_src([0], >src[0], 1); + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, ); + if (r) + return r; + + /* dst.x = temp.y * 0x1 + temp.x */ + for (i = 0; i < lasti + 1; i++) { + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) + continue; + + memset(, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP3_MULADD_UINT24; + alu.is_op3 = 1; + tgsi_dst(ctx, >Dst[0], i, ); + alu.last = i == lasti; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 1; + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = 0x1; + alu.src[2].sel = ctx->temp_reg; + alu.src[2].chan = 0; + r = r600_bytecode_add_alu(ctx->bc, ); + if (r) + return r; + } + + return 0; +} + +static int tgsi_up2h(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = >parse.FullToken.FullInstruction; + struct r600_bytecode_alu alu; + int r, i; + int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); + + /* temp.x = src.x */ + /* note: no need to mask out the high bits */ + memset(, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP1_MOV; + alu.dst.chan = 0; + alu.dst.sel = ctx->temp_reg; + alu.dst.write = 1; + r600_bytecode_src([0], >src[0], 0); + r = r600_bytecode_add_alu(ctx->bc, ); + if (r) + return r; + + /* temp.y = src.x >> 16 */ + memset(, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP2_LSHR_INT; + alu.dst.chan = 1; + alu.dst.sel = ctx->temp_reg; + alu.dst.write = 1; + r600_bytecode_src([0], >src[0], 0); + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = 16; + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, ); + if (r) + return r; + + /* dst.wz = dst.xy = f16_to_f32(temp.xy) */ + for (i = 0; i < lasti + 1; i++) { + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) +