--- src/amd/common/ac_nir_to_llvm.c | 32 ++-- src/gallium/auxiliary/nir/tgsi_to_nir.c | 8 +- .../drivers/freedreno/ir3/ir3_compiler_nir.c | 148 ++++++++---------- src/gallium/drivers/vc4/vc4_program.c | 8 +- src/intel/compiler/brw_fs_nir.cpp | 78 +++------ src/intel/compiler/brw_vec4_nir.cpp | 39 ++--- 6 files changed, 124 insertions(+), 189 deletions(-)
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index c950b81dca2..07c999edf5c 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -857,58 +857,44 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) src[i] = ac_to_integer(&ctx->ac, src[i]); result = ac_build_gather_values(&ctx->ac, src, num_components); break; - case nir_op_f2i16: - case nir_op_f2i32: - case nir_op_f2i64: + case nir_op_f2i: src[0] = ac_to_float(&ctx->ac, src[0]); result = LLVMBuildFPToSI(ctx->ac.builder, src[0], def_type, ""); break; - case nir_op_f2u16: - case nir_op_f2u32: - case nir_op_f2u64: + case nir_op_f2u: src[0] = ac_to_float(&ctx->ac, src[0]); result = LLVMBuildFPToUI(ctx->ac.builder, src[0], def_type, ""); break; - case nir_op_i2f16: - case nir_op_i2f32: - case nir_op_i2f64: + case nir_op_i2f: src[0] = ac_to_integer(&ctx->ac, src[0]); result = LLVMBuildSIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); break; - case nir_op_u2f16: - case nir_op_u2f32: - case nir_op_u2f64: + case nir_op_u2f: src[0] = ac_to_integer(&ctx->ac, src[0]); result = LLVMBuildUIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); break; - case nir_op_f2f16_rtz: + case nir_op_f2f_rtz: src[0] = ac_to_float(&ctx->ac, src[0]); LLVMValueRef param[2] = { src[0], ctx->ac.f32_0 }; result = ac_build_cvt_pkrtz_f16(&ctx->ac, param); result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, ""); break; - case nir_op_f2f16_rtne: - case nir_op_f2f16: - case nir_op_f2f32: - case nir_op_f2f64: + case nir_op_f2f_rtne: + case nir_op_f2f: src[0] = ac_to_float(&ctx->ac, src[0]); if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type)) result = LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); else result = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); break; - case nir_op_u2u16: - case nir_op_u2u32: - case nir_op_u2u64: + case nir_op_u2u: src[0] = ac_to_integer(&ctx->ac, src[0]); if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type)) result = LLVMBuildZExt(ctx->ac.builder, src[0], def_type, ""); else result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, ""); break; - case nir_op_i2i16: - case nir_op_i2i32: - case nir_op_i2i64: + case nir_op_i2i: src[0] = ac_to_integer(&ctx->ac, src[0]); if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type)) result = LLVMBuildSExt(ctx->ac.builder, src[0], def_type, ""); diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index 0ad274b535a..508d7a96130 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -1449,7 +1449,7 @@ static const nir_op op_trans[TGSI_OPCODE_LAST] = { [TGSI_OPCODE_DDY_FINE] = nir_op_fddy_fine, [TGSI_OPCODE_CEIL] = nir_op_fceil, - [TGSI_OPCODE_I2F] = nir_op_i2f32, + [TGSI_OPCODE_I2F] = nir_op_i2f, [TGSI_OPCODE_NOT] = nir_op_inot, [TGSI_OPCODE_TRUNC] = nir_op_ftrunc, [TGSI_OPCODE_SHL] = nir_op_ishl, @@ -1480,7 +1480,7 @@ static const nir_op op_trans[TGSI_OPCODE_LAST] = { [TGSI_OPCODE_END] = 0, - [TGSI_OPCODE_F2I] = nir_op_f2i32, + [TGSI_OPCODE_F2I] = nir_op_f2i, [TGSI_OPCODE_IDIV] = nir_op_idiv, [TGSI_OPCODE_IMAX] = nir_op_imax, [TGSI_OPCODE_IMIN] = nir_op_imin, @@ -1488,8 +1488,8 @@ static const nir_op op_trans[TGSI_OPCODE_LAST] = { [TGSI_OPCODE_ISGE] = nir_op_ige, [TGSI_OPCODE_ISHR] = nir_op_ishr, [TGSI_OPCODE_ISLT] = nir_op_ilt, - [TGSI_OPCODE_F2U] = nir_op_f2u32, - [TGSI_OPCODE_U2F] = nir_op_u2f32, + [TGSI_OPCODE_F2U] = nir_op_f2u, + [TGSI_OPCODE_U2F] = nir_op_u2f, [TGSI_OPCODE_UADD] = nir_op_iadd, [TGSI_OPCODE_UDIV] = nir_op_udiv, [TGSI_OPCODE_UMAD] = 0, diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 0c7a722aa0c..e42a3f52a8b 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -881,21 +881,16 @@ ir3_n2b(struct ir3_block *block, struct ir3_instruction *instr) static struct ir3_instruction * create_cov(struct ir3_context *ctx, struct ir3_instruction *src, - unsigned src_bitsize, nir_op op) + unsigned src_bitsize, unsigned dst_bitsize, nir_op op) { type_t src_type, dst_type; switch (op) { - case nir_op_f2f32: - case nir_op_f2f16_rtne: - case nir_op_f2f16_rtz: - case nir_op_f2f16: - case nir_op_f2i32: - case nir_op_f2i16: - case nir_op_f2i8: - case nir_op_f2u32: - case nir_op_f2u16: - case nir_op_f2u8: + case nir_op_f2f: + case nir_op_f2f_rtne: + case nir_op_f2f_rtz: + case nir_op_f2i: + case nir_op_f2u: switch (src_bitsize) { case 32: src_type = TYPE_F32; @@ -908,11 +903,8 @@ create_cov(struct ir3_context *ctx, struct ir3_instruction *src, } break; - case nir_op_i2f32: - case nir_op_i2f16: - case nir_op_i2i32: - case nir_op_i2i16: - case nir_op_i2i8: + case nir_op_i2f: + case nir_op_i2i: switch (src_bitsize) { case 32: src_type = TYPE_S32; @@ -928,11 +920,8 @@ create_cov(struct ir3_context *ctx, struct ir3_instruction *src, } break; - case nir_op_u2f32: - case nir_op_u2f16: - case nir_op_u2u32: - case nir_op_u2u16: - case nir_op_u2u8: + case nir_op_u2f: + case nir_op_u2u: switch (src_bitsize) { case 32: src_type = TYPE_U32; @@ -953,49 +942,56 @@ create_cov(struct ir3_context *ctx, struct ir3_instruction *src, } switch (op) { - case nir_op_f2f32: - case nir_op_i2f32: - case nir_op_u2f32: - dst_type = TYPE_F32; - break; - - case nir_op_f2f16_rtne: - case nir_op_f2f16_rtz: - case nir_op_f2f16: + case nir_op_f2f: + case nir_op_i2f: + case nir_op_u2f: + case nir_op_f2f_rtne: + case nir_op_f2f_rtz: /* TODO how to handle rounding mode? */ - case nir_op_i2f16: - case nir_op_u2f16: - dst_type = TYPE_F16; - break; - - case nir_op_f2i32: - case nir_op_i2i32: - dst_type = TYPE_S32; - break; - - case nir_op_f2i16: - case nir_op_i2i16: - dst_type = TYPE_S16; - break; - - case nir_op_f2i8: - case nir_op_i2i8: - dst_type = TYPE_S8; - break; - - case nir_op_f2u32: - case nir_op_u2u32: - dst_type = TYPE_U32; + switch (dst_bitsize) { + case 32: + dst_type = TYPE_F32; + break; + case 16: + dst_type = TYPE_F16; + break; + default: + compile_error(ctx, "invalid dst bit size: %u", dst_bitsize); + } break; - case nir_op_f2u16: - case nir_op_u2u16: - dst_type = TYPE_U16; + case nir_op_f2i: + case nir_op_i2i: + switch (src_bitsize) { + case 32: + dst_type = TYPE_S32; + break; + case 16: + dst_type = TYPE_S16; + break; + case 8: + dst_type = TYPE_S8; + break; + default: + compile_error(ctx, "invalid dst bit size: %u", dst_bitsize); + } break; - case nir_op_f2u8: - case nir_op_u2u8: - dst_type = TYPE_U8; + case nir_op_f2u: + case nir_op_u2u: + switch (src_bitsize) { + case 32: + dst_type = TYPE_U32; + break; + case 16: + dst_type = TYPE_U16; + break; + case 8: + dst_type = TYPE_U8; + break; + default: + compile_error(ctx, "invalid dst bit size: %u", dst_bitsize); + } break; default: @@ -1012,7 +1008,7 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu) struct ir3_instruction **dst, *src[info->num_inputs]; unsigned bs[info->num_inputs]; /* bit size */ struct ir3_block *b = ctx->block; - unsigned dst_sz, wrmask; + unsigned dst_sz, dst_bs, wrmask; if (alu->dest.dest.is_ssa) { dst_sz = alu->dest.dest.ssa.num_components; @@ -1081,29 +1077,19 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu) compile_assert(ctx, src[i]); } + dst_bs = nir_dest_bit_size(alu->dest.dest); switch (alu->op) { - case nir_op_f2f32: - case nir_op_f2f16_rtne: - case nir_op_f2f16_rtz: - case nir_op_f2f16: - case nir_op_f2i32: - case nir_op_f2i16: - case nir_op_f2i8: - case nir_op_f2u32: - case nir_op_f2u16: - case nir_op_f2u8: - case nir_op_i2f32: - case nir_op_i2f16: - case nir_op_i2i32: - case nir_op_i2i16: - case nir_op_i2i8: - case nir_op_u2f32: - case nir_op_u2f16: - case nir_op_u2u32: - case nir_op_u2u16: - case nir_op_u2u8: - dst[0] = create_cov(ctx, src[0], bs[0], alu->op); + case nir_op_f2f: + case nir_op_f2f_rtne: + case nir_op_f2f_rtz: + case nir_op_f2i: + case nir_op_f2u: + case nir_op_i2f: + case nir_op_i2i: + case nir_op_u2f: + case nir_op_u2u: + dst[0] = create_cov(ctx, src[0], bs[0], dst_bs, alu->op); break; case nir_op_f2b: dst[0] = ir3_CMPS_F(b, src[0], 0, create_immed(b, fui(0.0)), 0); diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index bc9bd76ae95..615124e3562 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -1200,12 +1200,12 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr) result = qir_FMAX(c, src[0], src[1]); break; - case nir_op_f2i32: - case nir_op_f2u32: + case nir_op_f2i: + case nir_op_f2u: result = qir_FTOI(c, src[0]); break; - case nir_op_i2f32: - case nir_op_u2f32: + case nir_op_i2f: + case nir_op_u2f: result = qir_ITOF(c, src[0]); break; case nir_op_b2f: diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 2b36171136e..649b4dc545c 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -654,9 +654,9 @@ emit_find_msb_using_lzd(const fs_builder &bld, static brw_rnd_mode brw_rnd_mode_from_nir_op (const nir_op op) { switch (op) { - case nir_op_f2f16_rtz: + case nir_op_f2f_rtz: return BRW_RND_MODE_RTZ; - case nir_op_f2f16_rtne: + case nir_op_f2f_rtne: return BRW_RND_MODE_RTNE; default: unreachable("Operation doesn't support rounding mode"); @@ -758,47 +758,33 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) } switch (instr->op) { - case nir_op_i2f32: - case nir_op_u2f32: - if (optimize_extract_to_float(instr, result)) + case nir_op_i2f: + case nir_op_i2i: + case nir_op_u2f: + case nir_op_u2u: + case nir_op_f2i: + case nir_op_f2u: + case nir_op_f2f: + case nir_op_f2f_rtne: + case nir_op_f2f_rtz: + case nir_op_b2f: + case nir_op_b2i: + if ((instr->op == nir_op_i2f || instr->op == nir_op_u2f) && + nir_dest_bit_size(instr->dest.dest) == 32 && + optimize_extract_to_float(instr, result)) return; - inst = bld.MOV(result, op[0]); - inst->saturate = instr->dest.saturate; - break; - - case nir_op_f2f16_rtne: - case nir_op_f2f16_rtz: - bld.emit(SHADER_OPCODE_RND_MODE, bld.null_reg_ud(), - brw_imm_d(brw_rnd_mode_from_nir_op(instr->op))); - /* fallthrough */ - /* In theory, it would be better to use BRW_OPCODE_F32TO16. Depending - * on the HW gen, it is a special hw opcode or just a MOV, and - * brw_F32TO16 (at brw_eu_emit) would do the work to chose. - * - * But if we want to use that opcode, we need to provide support on - * different optimizations and lowerings. As right now HF support is - * only for gen8+, it will be better to use directly the MOV, and use - * BRW_OPCODE_F32TO16 when/if we work for HF support on gen7. - */ + if (instr->op == nir_op_f2f_rtne || instr->op == nir_op_f2f_rtz) { + assert(nir_dest_bit_size(instr->dest.dest) == 16); + bld.emit(SHADER_OPCODE_RND_MODE, bld.null_reg_ud(), + brw_imm_d(brw_rnd_mode_from_nir_op(instr->op))); + } - case nir_op_f2f16: - inst = bld.MOV(result, op[0]); - inst->saturate = instr->dest.saturate; - break; + if (nir_op_infos[instr->op].input_types[0] == nir_type_bool32) { + op[0].type = BRW_REGISTER_TYPE_D; + op[0].negate = !op[0].negate; + } - case nir_op_b2i: - case nir_op_b2f: - op[0].type = BRW_REGISTER_TYPE_D; - op[0].negate = !op[0].negate; - /* fallthrough */ - case nir_op_f2f64: - case nir_op_f2i64: - case nir_op_f2u64: - case nir_op_i2f64: - case nir_op_i2i64: - case nir_op_u2f64: - case nir_op_u2u64: /* CHV PRM, vol07, 3D Media GPGPU Engine, Register Region Restrictions: * * "When source or destination is 64b (...), regioning in Align1 @@ -822,20 +808,6 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) inst->saturate = instr->dest.saturate; break; } - /* fallthrough */ - case nir_op_f2f32: - case nir_op_f2i32: - case nir_op_f2u32: - case nir_op_f2i16: - case nir_op_f2u16: - case nir_op_i2i32: - case nir_op_u2u32: - case nir_op_i2i16: - case nir_op_u2u16: - case nir_op_i2f16: - case nir_op_u2f16: - case nir_op_i2i8: - case nir_op_u2u8: inst = bld.MOV(result, op[0]); inst->saturate = instr->dest.saturate; break; diff --git a/src/intel/compiler/brw_vec4_nir.cpp b/src/intel/compiler/brw_vec4_nir.cpp index 564be7e5eee..6799dff03bc 100644 --- a/src/intel/compiler/brw_vec4_nir.cpp +++ b/src/intel/compiler/brw_vec4_nir.cpp @@ -1154,27 +1154,28 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) case nir_op_vec4: unreachable("not reached: should be handled by lower_vec_to_movs()"); - case nir_op_i2f32: - case nir_op_u2f32: - inst = emit(MOV(dst, op[0])); - inst->saturate = instr->dest.saturate; - break; + case nir_op_i2f: + case nir_op_i2i: + case nir_op_u2f: + case nir_op_u2u: + case nir_op_f2f: + case nir_op_f2i: + case nir_op_f2u: + case nir_op_b2i: + case nir_op_b2f: + if (nir_op_infos[instr->op].input_types[0] == nir_type_bool32) { + assert(op[0].type == BRW_REGISTER_TYPE_D); + op[0].negate = true; + } - case nir_op_f2f32: - case nir_op_f2i32: - case nir_op_f2u32: if (nir_src_bit_size(instr->src[0].src) == 64) emit_conversion_from_double(dst, op[0], instr->dest.saturate); + else if (nir_dest_bit_size(instr->dest.dest) == 64) + emit_conversion_to_double(dst, op[0], instr->dest.saturate); else inst = emit(MOV(dst, op[0])); break; - case nir_op_f2f64: - case nir_op_i2f64: - case nir_op_u2f64: - emit_conversion_to_double(dst, op[0], instr->dest.saturate); - break; - case nir_op_iadd: assert(nir_dest_bit_size(instr->dest.dest) < 64); /* fall through */ @@ -1538,16 +1539,6 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) emit(AND(dst, op[0], op[1])); break; - case nir_op_b2i: - case nir_op_b2f: - if (nir_dest_bit_size(instr->dest.dest) > 32) { - assert(dst.type == BRW_REGISTER_TYPE_DF); - emit_conversion_to_double(dst, negate(op[0]), false); - } else { - emit(MOV(dst, negate(op[0]))); - } - break; - case nir_op_f2b: if (nir_src_bit_size(instr->src[0].src) == 64) { /* We use a MOV with conditional_mod to check if the provided value is -- 2.19.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev