This way we can set the destination type as double to all these new opcodes, avoiding any optimizer's confusion that was happening before.
Signed-off-by: Samuel Iglesias Gonsálvez <sigles...@igalia.com> --- This patch is going to be placed just after patch 17. Please discard patch 19. src/intel/compiler/brw_eu_defines.h | 4 +++- src/intel/compiler/brw_shader.cpp | 8 ++++++-- src/intel/compiler/brw_vec4.cpp | 12 +++++++++--- src/intel/compiler/brw_vec4_copy_propagation.cpp | 4 +++- src/intel/compiler/brw_vec4_generator.cpp | 23 +++++++++++++++++++++-- src/intel/compiler/brw_vec4_nir.cpp | 24 ++++++++++++++++++------ src/intel/compiler/brw_vec4_reg_allocate.cpp | 4 +++- 7 files changed, 63 insertions(+), 16 deletions(-) diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index f0b0d5c2a06..13a70f6f6a1 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -447,7 +447,9 @@ enum opcode { VEC4_OPCODE_MOV_BYTES, VEC4_OPCODE_PACK_BYTES, VEC4_OPCODE_UNPACK_UNIFORM, - VEC4_OPCODE_FROM_DOUBLE, + VEC4_OPCODE_DOUBLE_TO_F32, + VEC4_OPCODE_DOUBLE_TO_D32, + VEC4_OPCODE_DOUBLE_TO_U32, VEC4_OPCODE_TO_DOUBLE, VEC4_OPCODE_PICK_LOW_32BIT, VEC4_OPCODE_PICK_HIGH_32BIT, diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index bfaa5e7bfe2..4d9d3990a63 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -317,8 +317,12 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) return "pack_bytes"; case VEC4_OPCODE_UNPACK_UNIFORM: return "unpack_uniform"; - case VEC4_OPCODE_FROM_DOUBLE: - return "double_to_single"; + case VEC4_OPCODE_DOUBLE_TO_F32: + return "double_to_f32"; + case VEC4_OPCODE_DOUBLE_TO_D32: + return "double_to_d32"; + case VEC4_OPCODE_DOUBLE_TO_U32: + return "double_to_u32"; case VEC4_OPCODE_TO_DOUBLE: return "single_to_double"; case VEC4_OPCODE_PICK_LOW_32BIT: diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp index b26f8035811..36855a8cff6 100644 --- a/src/intel/compiler/brw_vec4.cpp +++ b/src/intel/compiler/brw_vec4.cpp @@ -260,7 +260,9 @@ vec4_instruction::can_do_writemask(const struct gen_device_info *devinfo) { switch (opcode) { case SHADER_OPCODE_GEN4_SCRATCH_READ: - case VEC4_OPCODE_FROM_DOUBLE: + case VEC4_OPCODE_DOUBLE_TO_F32: + case VEC4_OPCODE_DOUBLE_TO_D32: + case VEC4_OPCODE_DOUBLE_TO_U32: case VEC4_OPCODE_TO_DOUBLE: case VEC4_OPCODE_PICK_LOW_32BIT: case VEC4_OPCODE_PICK_HIGH_32BIT: @@ -521,7 +523,9 @@ vec4_visitor::opt_reduce_swizzle() break; case VEC4_OPCODE_TO_DOUBLE: - case VEC4_OPCODE_FROM_DOUBLE: + case VEC4_OPCODE_DOUBLE_TO_F32: + case VEC4_OPCODE_DOUBLE_TO_D32: + case VEC4_OPCODE_DOUBLE_TO_U32: case VEC4_OPCODE_PICK_LOW_32BIT: case VEC4_OPCODE_PICK_HIGH_32BIT: case VEC4_OPCODE_SET_LOW_32BIT: @@ -2255,7 +2259,9 @@ static bool is_align1_df(vec4_instruction *inst) { switch (inst->opcode) { - case VEC4_OPCODE_FROM_DOUBLE: + case VEC4_OPCODE_DOUBLE_TO_F32: + case VEC4_OPCODE_DOUBLE_TO_D32: + case VEC4_OPCODE_DOUBLE_TO_U32: case VEC4_OPCODE_TO_DOUBLE: case VEC4_OPCODE_PICK_LOW_32BIT: case VEC4_OPCODE_PICK_HIGH_32BIT: diff --git a/src/intel/compiler/brw_vec4_copy_propagation.cpp b/src/intel/compiler/brw_vec4_copy_propagation.cpp index e7f6f93f8bd..c1ae32a2936 100644 --- a/src/intel/compiler/brw_vec4_copy_propagation.cpp +++ b/src/intel/compiler/brw_vec4_copy_propagation.cpp @@ -293,7 +293,9 @@ static bool is_align1_opcode(unsigned opcode) { switch (opcode) { - case VEC4_OPCODE_FROM_DOUBLE: + case VEC4_OPCODE_DOUBLE_TO_F32: + case VEC4_OPCODE_DOUBLE_TO_D32: + case VEC4_OPCODE_DOUBLE_TO_U32: case VEC4_OPCODE_TO_DOUBLE: case VEC4_OPCODE_PICK_LOW_32BIT: case VEC4_OPCODE_PICK_HIGH_32BIT: diff --git a/src/intel/compiler/brw_vec4_generator.cpp b/src/intel/compiler/brw_vec4_generator.cpp index d0fd694901f..26de5c12e8e 100644 --- a/src/intel/compiler/brw_vec4_generator.cpp +++ b/src/intel/compiler/brw_vec4_generator.cpp @@ -1944,9 +1944,28 @@ generate_code(struct brw_codegen *p, break; } - case VEC4_OPCODE_FROM_DOUBLE: { + case VEC4_OPCODE_DOUBLE_TO_F32: + case VEC4_OPCODE_DOUBLE_TO_D32: + case VEC4_OPCODE_DOUBLE_TO_U32: { assert(type_sz(src[0].type) == 8); - assert(type_sz(dst.type) == 4); + assert(type_sz(dst.type) == 8); + + brw_reg_type dst_type; + + switch (inst->opcode) { + case VEC4_OPCODE_DOUBLE_TO_F32: + dst_type = BRW_REGISTER_TYPE_F; + break; + case VEC4_OPCODE_DOUBLE_TO_D32: + dst_type = BRW_REGISTER_TYPE_D; + break; + case VEC4_OPCODE_DOUBLE_TO_U32: + dst_type = BRW_REGISTER_TYPE_UD; + break; + default: + unreachable("Not supported conversion"); + } + dst = retype(dst, dst_type); brw_set_default_access_mode(p, BRW_ALIGN_1); diff --git a/src/intel/compiler/brw_vec4_nir.cpp b/src/intel/compiler/brw_vec4_nir.cpp index 45d2c9f4a93..a4257e45b60 100644 --- a/src/intel/compiler/brw_vec4_nir.cpp +++ b/src/intel/compiler/brw_vec4_nir.cpp @@ -1183,16 +1183,28 @@ vec4_visitor::emit_conversion_from_double(dst_reg dst, src_reg src, return; } + enum opcode op; + switch (dst.type) { + case BRW_REGISTER_TYPE_D: + op = VEC4_OPCODE_DOUBLE_TO_D32; + break; + case BRW_REGISTER_TYPE_UD: + op = VEC4_OPCODE_DOUBLE_TO_U32; + break; + case BRW_REGISTER_TYPE_F: + op = VEC4_OPCODE_DOUBLE_TO_F32; + break; + default: + unreachable("Unknown conversion"); + } + dst_reg temp = dst_reg(this, glsl_type::dvec4_type); emit(MOV(temp, src)); - dst_reg temp2 = dst_reg(this, glsl_type::dvec4_type); - temp2 = retype(temp2, dst.type); - emit(VEC4_OPCODE_FROM_DOUBLE, temp2, src_reg(temp)) - ->size_written = 2 * REG_SIZE; + emit(op, temp2, src_reg(temp)); - emit(VEC4_OPCODE_PICK_LOW_32BIT, temp2, src_reg(retype(temp2, BRW_REGISTER_TYPE_DF))); - vec4_instruction *inst = emit(MOV(dst, src_reg(temp2))); + emit(VEC4_OPCODE_PICK_LOW_32BIT, retype(temp2, dst.type), src_reg(temp2)); + vec4_instruction *inst = emit(MOV(dst, src_reg(retype(temp2, dst.type)))); inst->saturate = saturate; } diff --git a/src/intel/compiler/brw_vec4_reg_allocate.cpp b/src/intel/compiler/brw_vec4_reg_allocate.cpp index e3b46cc2f7f..f46aca8f49d 100644 --- a/src/intel/compiler/brw_vec4_reg_allocate.cpp +++ b/src/intel/compiler/brw_vec4_reg_allocate.cpp @@ -456,7 +456,9 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill) * dst we see a 32-bit destination and emit a scratch write that * allocates a single spill register. */ - if (inst->opcode == VEC4_OPCODE_FROM_DOUBLE) + if (inst->opcode == VEC4_OPCODE_DOUBLE_TO_F32 || + inst->opcode == VEC4_OPCODE_DOUBLE_TO_D32 || + inst->opcode == VEC4_OPCODE_DOUBLE_TO_U32) no_spill[inst->dst.nr] = true; /* We can't spill registers that mix 32-bit and 64-bit access (that -- 2.11.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev