Module: Mesa Branch: main Commit: 6253a23eff4dc13fdb4da9335b21d1942d63bc5c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6253a23eff4dc13fdb4da9335b21d1942d63bc5c
Author: Charmaine Lee <[email protected]> Date: Fri May 6 14:45:55 2022 -0700 svga/tgsi: add translation for DTRUNC DTRUNC is not lowered in glsl-nir-tgsi path. Add the translation in svga_tgsi_vgpu10(). Reviewed-by: Neha Bhende <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16412> --- src/gallium/drivers/svga/svga_tgsi_vgpu10.c | 99 ++++++++++++++++++++++++++++- 1 file changed, 96 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c index 56d2e179284..0064fe291db 100644 --- a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c +++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c @@ -60,7 +60,7 @@ #define INVALID_INDEX 99999 -#define MAX_INTERNAL_TEMPS 3 +#define MAX_INTERNAL_TEMPS 4 #define MAX_SYSTEM_VALUES 4 #define MAX_IMMEDIATE_COUNT \ (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4) @@ -3218,7 +3218,8 @@ alloc_common_immediates(struct svga_shader_emitter_v10 *emit) alloc_immediate_double2(emit, -1.0, -1.0); } - if (emit->info.opcode_count[TGSI_OPCODE_DSQRT] > 0) { + if (emit->info.opcode_count[TGSI_OPCODE_DSQRT] > 0 || + emit->info.opcode_count[TGSI_OPCODE_DTRUNC] > 0) { emit->common_immediate_pos[n++] = alloc_immediate_double2(emit, 0.0, 0.0); emit->common_immediate_pos[n++] = @@ -9470,6 +9471,96 @@ emit_dsqrt(struct svga_shader_emitter_v10 *emit, } +/** + * glsl-nir path does not lower DTRUNC, so we need to + * add the translation here. + * + * frac = DFRAC(src) + * tmp = src - frac + * dst = src >= 0 ? tmp : (tmp + (frac==0 ? 0 : 1)) + */ +static boolean +emit_dtrunc(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + assert(emit->version >= 50); + + check_double_src_swizzle(&inst->Src[0]); + + + + /* frac = DFRAC(src) */ + unsigned frac_index = get_temp_index(emit); + struct tgsi_full_dst_register frac_dst = make_dst_temp_reg(frac_index); + struct tgsi_full_src_register frac_src = make_src_temp_reg(frac_index); + + VGPU10OpcodeToken0 token0; + begin_emit_instruction(emit); + token0.value = 0; + token0.opcodeType = VGPU10_OPCODE_VMWARE; + token0.vmwareOpcodeType = VGPU10_VMWARE_OPCODE_DFRC; + emit_dword(emit, token0.value); + emit_dst_register(emit, &frac_dst); + emit_src_register(emit, &inst->Src[0]); + end_emit_instruction(emit); + + /* tmp = src - frac */ + unsigned tmp_index = get_temp_index(emit); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp_index); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp_index); + struct tgsi_full_src_register negate_frac_src = negate_src(&frac_src); + emit_instruction_opn(emit, VGPU10_OPCODE_DADD, + &tmp_dst, &inst->Src[0], &negate_frac_src, NULL, + inst->Instruction.Saturate, inst->Instruction.Precise); + + /* cond = frac==0 */ + unsigned cond_index = get_temp_index(emit); + struct tgsi_full_dst_register cond_dst = make_dst_temp_reg(cond_index); + struct tgsi_full_src_register cond_src = make_src_temp_reg(cond_index); + struct tgsi_full_src_register zero = + make_immediate_reg_double(emit, 0); + + /* Only use one or two components for double opcode */ + cond_dst = writemask_dst(&cond_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y); + + emit_instruction_opn(emit, VGPU10_OPCODE_DEQ, + &cond_dst, &frac_src, &zero, NULL, + inst->Instruction.Saturate, inst->Instruction.Precise); + + /* tmp2 = cond ? 0 : 1 */ + unsigned tmp2_index = get_temp_index(emit); + struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2_index); + struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2_index); + struct tgsi_full_src_register cond_src_xy = + swizzle_src(&cond_src, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, + PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y); + struct tgsi_full_src_register one = + make_immediate_reg_double(emit, 1.0); + + emit_instruction_opn(emit, VGPU10_OPCODE_DMOVC, + &tmp2_dst, &cond_src_xy, &zero, &one, + inst->Instruction.Saturate, inst->Instruction.Precise); + + /* tmp2 = tmp + tmp2 */ + emit_instruction_opn(emit, VGPU10_OPCODE_DADD, + &tmp2_dst, &tmp_src, &tmp2_src, NULL, + inst->Instruction.Saturate, inst->Instruction.Precise); + + /* cond = src>=0 */ + emit_instruction_opn(emit, VGPU10_OPCODE_DGE, + &cond_dst, &inst->Src[0], &zero, NULL, + inst->Instruction.Saturate, inst->Instruction.Precise); + + /* dst = cond ? tmp : tmp2 */ + emit_instruction_opn(emit, VGPU10_OPCODE_DMOVC, + &inst->Dst[0], &cond_src_xy, &tmp_src, &tmp2_src, + inst->Instruction.Saturate, inst->Instruction.Precise); + + free_temp_indexes(emit); + return TRUE; +} + + static boolean emit_interp_offset(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) @@ -10960,6 +11051,9 @@ emit_instruction(struct svga_shader_emitter_v10 *emit, case TGSI_OPCODE_DFMA: return emit_simple(emit, inst); + case TGSI_OPCODE_DTRUNC: + return emit_dtrunc(emit, inst); + /* The following opcodes should never be seen here. We return zero * for all the PIPE_CAP_TGSI_DROUND_SUPPORTED, DFRACEXP_DLDEXP_SUPPORTED, * LDEXP_SUPPORTED queries. @@ -10968,7 +11062,6 @@ emit_instruction(struct svga_shader_emitter_v10 *emit, case TGSI_OPCODE_DSSG: case TGSI_OPCODE_DFRACEXP: case TGSI_OPCODE_DLDEXP: - case TGSI_OPCODE_DTRUNC: case TGSI_OPCODE_DCEIL: case TGSI_OPCODE_DFLR: debug_printf("Unexpected TGSI opcode %s. "
