Module: Mesa
Branch: main
Commit: 6253a23eff4dc13fdb4da9335b21d1942d63bc5c
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=6253a23eff4dc13fdb4da9335b21d1942d63bc5c

Author: Charmaine Lee <[email protected]>
Date:   Fri May  6 14:45:55 2022 -0700

svga/tgsi: add translation for DTRUNC

DTRUNC is not lowered in glsl-nir-tgsi path. Add the translation
in svga_tgsi_vgpu10().

Reviewed-by: Neha Bhende <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16412>

---

 src/gallium/drivers/svga/svga_tgsi_vgpu10.c | 99 ++++++++++++++++++++++++++++-
 1 file changed, 96 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c 
b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
index 56d2e179284..0064fe291db 100644
--- a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
+++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
@@ -60,7 +60,7 @@
 
 
 #define INVALID_INDEX 99999
-#define MAX_INTERNAL_TEMPS 3
+#define MAX_INTERNAL_TEMPS 4
 #define MAX_SYSTEM_VALUES 4
 #define MAX_IMMEDIATE_COUNT \
         (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4)
@@ -3218,7 +3218,8 @@ alloc_common_immediates(struct svga_shader_emitter_v10 
*emit)
          alloc_immediate_double2(emit, -1.0, -1.0);
    }
 
-   if (emit->info.opcode_count[TGSI_OPCODE_DSQRT] > 0) {
+   if (emit->info.opcode_count[TGSI_OPCODE_DSQRT] > 0 ||
+       emit->info.opcode_count[TGSI_OPCODE_DTRUNC] > 0) {
       emit->common_immediate_pos[n++] =
          alloc_immediate_double2(emit, 0.0, 0.0);
       emit->common_immediate_pos[n++] =
@@ -9470,6 +9471,96 @@ emit_dsqrt(struct svga_shader_emitter_v10 *emit,
 }
 
 
+/**
+ * glsl-nir path does not lower DTRUNC, so we need to
+ * add the translation here.
+ *
+ * frac = DFRAC(src)
+ * tmp = src - frac
+ * dst = src >= 0 ? tmp : (tmp + (frac==0 ? 0 : 1))
+ */
+static boolean
+emit_dtrunc(struct svga_shader_emitter_v10 *emit,
+            const struct tgsi_full_instruction *inst)
+{
+   assert(emit->version >= 50);
+
+   check_double_src_swizzle(&inst->Src[0]);
+
+
+
+   /* frac = DFRAC(src) */
+   unsigned frac_index = get_temp_index(emit);
+   struct tgsi_full_dst_register frac_dst = make_dst_temp_reg(frac_index);
+   struct tgsi_full_src_register frac_src = make_src_temp_reg(frac_index);
+
+   VGPU10OpcodeToken0 token0;
+   begin_emit_instruction(emit);
+   token0.value = 0;
+   token0.opcodeType = VGPU10_OPCODE_VMWARE;
+   token0.vmwareOpcodeType = VGPU10_VMWARE_OPCODE_DFRC;
+   emit_dword(emit, token0.value);
+   emit_dst_register(emit, &frac_dst);
+   emit_src_register(emit, &inst->Src[0]);
+   end_emit_instruction(emit);
+
+   /* tmp = src - frac */
+   unsigned tmp_index = get_temp_index(emit);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp_index);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp_index);
+   struct tgsi_full_src_register negate_frac_src = negate_src(&frac_src);
+   emit_instruction_opn(emit, VGPU10_OPCODE_DADD,
+                        &tmp_dst, &inst->Src[0], &negate_frac_src, NULL,
+                        inst->Instruction.Saturate, inst->Instruction.Precise);
+
+   /* cond = frac==0 */
+   unsigned cond_index = get_temp_index(emit);
+   struct tgsi_full_dst_register cond_dst = make_dst_temp_reg(cond_index);
+   struct tgsi_full_src_register cond_src = make_src_temp_reg(cond_index);
+   struct tgsi_full_src_register zero =
+               make_immediate_reg_double(emit, 0);
+
+   /* Only use one or two components for double opcode */
+   cond_dst = writemask_dst(&cond_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y);
+
+   emit_instruction_opn(emit, VGPU10_OPCODE_DEQ,
+                        &cond_dst, &frac_src, &zero, NULL,
+                        inst->Instruction.Saturate, inst->Instruction.Precise);
+
+   /* tmp2 = cond ? 0 : 1 */
+   unsigned tmp2_index = get_temp_index(emit);
+   struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2_index);
+   struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2_index);
+   struct tgsi_full_src_register cond_src_xy =
+      swizzle_src(&cond_src, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
+                            PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y);
+   struct tgsi_full_src_register one =
+               make_immediate_reg_double(emit, 1.0);
+
+   emit_instruction_opn(emit, VGPU10_OPCODE_DMOVC,
+                        &tmp2_dst, &cond_src_xy, &zero, &one,
+                        inst->Instruction.Saturate, inst->Instruction.Precise);
+
+   /* tmp2 = tmp + tmp2 */
+   emit_instruction_opn(emit, VGPU10_OPCODE_DADD,
+                        &tmp2_dst, &tmp_src, &tmp2_src, NULL,
+                        inst->Instruction.Saturate, inst->Instruction.Precise);
+
+   /* cond = src>=0 */
+   emit_instruction_opn(emit, VGPU10_OPCODE_DGE,
+                        &cond_dst, &inst->Src[0], &zero, NULL,
+                        inst->Instruction.Saturate, inst->Instruction.Precise);
+
+   /* dst = cond ? tmp : tmp2 */
+   emit_instruction_opn(emit, VGPU10_OPCODE_DMOVC,
+                        &inst->Dst[0], &cond_src_xy, &tmp_src, &tmp2_src,
+                        inst->Instruction.Saturate, inst->Instruction.Precise);
+
+   free_temp_indexes(emit);
+   return TRUE;
+}
+
+
 static boolean
 emit_interp_offset(struct svga_shader_emitter_v10 *emit,
                    const struct tgsi_full_instruction *inst)
@@ -10960,6 +11051,9 @@ emit_instruction(struct svga_shader_emitter_v10 *emit,
    case TGSI_OPCODE_DFMA:
       return emit_simple(emit, inst);
 
+   case TGSI_OPCODE_DTRUNC:
+      return emit_dtrunc(emit, inst);
+
    /* The following opcodes should never be seen here.  We return zero
     * for all the PIPE_CAP_TGSI_DROUND_SUPPORTED, DFRACEXP_DLDEXP_SUPPORTED,
     * LDEXP_SUPPORTED queries.
@@ -10968,7 +11062,6 @@ emit_instruction(struct svga_shader_emitter_v10 *emit,
    case TGSI_OPCODE_DSSG:
    case TGSI_OPCODE_DFRACEXP:
    case TGSI_OPCODE_DLDEXP:
-   case TGSI_OPCODE_DTRUNC:
    case TGSI_OPCODE_DCEIL:
    case TGSI_OPCODE_DFLR:
       debug_printf("Unexpected TGSI opcode %s.  "

Reply via email to