Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/compiler/brw_fs_nir.cpp | 70 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-)
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 2a32b1449a..aff592c354 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -1662,7 +1662,75 @@ fs_visitor::get_nir_alu_dest(const nir_alu_instr *instr) * one component per register. */ const bool pad_components_to_full_register = true; - return get_nir_dest(instr->dest.dest, pad_components_to_full_register); + + switch (instr->op) { + case nir_op_flt: + case nir_op_fge: + case nir_op_feq: + case nir_op_fne: { + assert(instr->dest.dest.is_ssa); + + if (nir_src_bit_size(instr->src[0].src) > 16) + return get_nir_dest(instr->dest.dest); + + assert(nir_src_bit_size(instr->src[0].src) == 16 && + nir_src_bit_size(instr->src[1].src) == 16); + + /* Destination type for comparison operations is boolean which NIR + * treats as having 32-bit size. If, however, sources are 16-bit + * hardware will produce 16-bit result (0xFFFF/0x0000). Therefore set + * the destination type accordingly. + */ + nir_ssa_values[instr->dest.dest.ssa.index] = + bld.vgrf(BRW_REGISTER_TYPE_HF, + instr->dest.dest.ssa.num_components, + pad_components_to_full_register); + return nir_ssa_values[instr->dest.dest.ssa.index]; + } + case nir_op_inot: + case nir_op_ixor: + case nir_op_ior: + case nir_op_iand: { + assert(instr->dest.dest.is_ssa); + + const fs_reg src0 = get_nir_src(instr->src[0].src); + const fs_reg src1 = get_nir_src(instr->src[0].src); + + /* TODO: This specifically prepares for mixed precision operations which + * in principle shouldn't happen. There is, however, corner case + * when this is possible. As NIR doesn't consider how booleans + * are produced, we may end up here with one source operand + * produced from an operation with 32-bit sources and another from + * 16-bits. + * This is handled by marking this operation as producing 16-bits + * and relying on nir_emit_alu() to adjust the 32-bit source + * operand to 16-bits with stride == 2. Recall that 32-bit + * booleans are just 0xFFFFFFFF/0x00000000 and it suffices to read + * only the lower 16-bits. + * WARN: This blindly assumes that mixed precision integer source + * operands represent boolean values. There is no way of checking + * if that holds. + */ + if (brw_reg_type_to_size(src0.type) > 2 && + brw_reg_type_to_size(src1.type) > 2) + return get_nir_dest(instr->dest.dest); + + /* Translation from GLSL to NIR produces logical operations with + * integer operands even when operands are booleans. See handling + * of ir_binop_bit_*. + * As hardware will produce 16-bit results when the sources are 16-bit + * set the destination type accordingly. + */ + nir_ssa_values[instr->dest.dest.ssa.index] = + bld.vgrf(BRW_REGISTER_TYPE_W, + instr->dest.dest.ssa.num_components, + pad_components_to_full_register); + return nir_ssa_values[instr->dest.dest.ssa.index]; + } + default: + return get_nir_dest(instr->dest.dest, + pad_components_to_full_register); + } } fs_reg -- 2.11.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev