Enables the support of 16-bit types on load_input and store_outputs intrinsics intra-stages.
The approach was based on re-using the 32-bit URB read and writes between stages, shuffling pairs of 16-bit values into 32-bit values at load_store intrinsic and un-shuffling the values at load_inputs. v2: Minor changes after rebase against recent master (Jose Maria Casanova) v3: - Remove unnecessary retypes (Topi Pohjolainen) - Rebase needed changes as now get_nir_src doesn't returns a 32-bit type, it returns a bitsized integer. Previous implementation of this patch assumed 32-bit type for get_nir_src. (Jose María Casanova) - Move 32-16 shuffle-unshuffle helpers to independent patch. (Jose María Casanova) --- src/intel/compiler/brw_fs_nir.cpp | 69 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 67 insertions(+), 2 deletions(-) diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index eb45b5df27..b85aa17114 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -2209,12 +2209,17 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst, first_component = first_component / 2; } + if (type_sz(dst.type) == 2) { + num_components = DIV_ROUND_UP(num_components, 2); + tmp_dst = bld.vgrf(BRW_REGISTER_TYPE_F, num_components); + } + for (unsigned iter = 0; iter < num_iterations; iter++) { if (offset_const) { /* Constant indexing - use global offset. */ if (first_component != 0) { unsigned read_components = num_components + first_component; - fs_reg tmp = bld.vgrf(dst.type, read_components); + fs_reg tmp = bld.vgrf(tmp_dst.type, read_components); inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp, icp_handle); inst->size_written = read_components * tmp.component_size(inst->exec_size); @@ -2264,6 +2269,11 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst, bld.MOV(offset(dst, bld, iter * 2 + c), offset(tmp_dst, bld, c)); } + if (type_sz(dst.type) == 2) { + shuffle_32bit_load_result_to_16bit_data(bld, dst, tmp_dst, + orig_num_components); + } + if (num_iterations > 1) { num_components = orig_num_components - 2; if(offset_const) { @@ -2605,6 +2615,11 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld, dst = tmp; } + if (type_sz(dst.type) == 2) { + num_components = DIV_ROUND_UP(num_components, 2); + dst = bld.vgrf(BRW_REGISTER_TYPE_F, num_components); + } + for (unsigned iter = 0; iter < num_iterations; iter++) { if (indirect_offset.file == BAD_FILE) { /* Constant indexing - use global offset. */ @@ -2660,6 +2675,11 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld, } } + if (type_sz(orig_dst.type) == 2) { + shuffle_32bit_load_result_to_16bit_data( + bld, orig_dst, dst, instr->num_components); + } + /* Copy the temporary to the destination to deal with writemasking. * * Also attempt to deal with gl_PointSize being in the .w component. @@ -2750,6 +2770,8 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld, fs_reg value = get_nir_src(instr->src[0]); bool is_64bit = (instr->src[0].is_ssa ? instr->src[0].ssa->bit_size : instr->src[0].reg.reg->bit_size) == 64; + bool is_16bit = (instr->src[0].is_ssa ? + instr->src[0].ssa->bit_size : instr->src[0].reg.reg->bit_size) == 16; fs_reg indirect_offset = get_indirect_offset(instr); unsigned imm_offset = instr->const_index[0]; unsigned mask = instr->const_index[1]; @@ -2779,6 +2801,11 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld, num_iterations = 2; iter_components = 2; } + } else { + if (is_16bit) { + iter_components = DIV_ROUND_UP(num_components, 2); + value = retype (value, BRW_REGISTER_TYPE_D); + } } mask = mask << first_component; @@ -2824,6 +2851,13 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld, continue; if (!is_64bit) { + if (is_16bit) { + shuffle_16bit_data_for_32bit_write(bld, + retype(offset(value,bld, i), BRW_REGISTER_TYPE_F), + retype(offset(value,bld, i), BRW_REGISTER_TYPE_HF), + 2); + value = retype (value, BRW_REGISTER_TYPE_D); + } srcs[header_regs + i + first_component] = offset(value, bld, i); } else { /* We need to shuffle the 64-bit data to match the layout @@ -2967,6 +3001,11 @@ fs_visitor::nir_emit_tes_intrinsic(const fs_builder &bld, dest = tmp; } + if (type_sz(dest.type) == 2) { + num_components = DIV_ROUND_UP(num_components, 2); + dest = bld.vgrf(BRW_REGISTER_TYPE_F, num_components); + } + for (unsigned iter = 0; iter < num_iterations; iter++) { const fs_reg srcs[] = { retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD), @@ -3009,6 +3048,11 @@ fs_visitor::nir_emit_tes_intrinsic(const fs_builder &bld, } } + if (type_sz(dest.type) == 2) { + shuffle_32bit_load_result_to_16bit_data(bld, orig_dest, + dest, num_components); + } + /* If we are loading double data and we need a second read message * adjust the offset */ @@ -3362,6 +3406,13 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, num_components *= 2; } + fs_reg orig_dest = dest; + if (nir_dest_bit_size(instr->dest) == 16) { + type = BRW_REGISTER_TYPE_F; + num_components = DIV_ROUND_UP(num_components, 2); + dest = bld.vgrf(type, num_components); + } + for (unsigned int i = 0; i < num_components; i++) { struct brw_reg interp = interp_reg(base, component + i); interp = suboffset(interp, 3); @@ -3375,6 +3426,12 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, retype(dest, type), instr->num_components); } + if (nir_dest_bit_size(instr->dest) == 16) { + shuffle_32bit_load_result_to_16bit_data(bld, + orig_dest, + dest, + instr->num_components); + } break; } @@ -4279,9 +4336,17 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr unsigned num_components = instr->num_components; unsigned first_component = nir_intrinsic_component(instr); - if (nir_src_bit_size(instr->src[0]) == 64) { + unsigned bit_size = nir_src_bit_size(instr->src[0]); + + if (bit_size == 64) { src = shuffle_64bit_data_for_32bit_write(bld, src, num_components); num_components *= 2; + } else if (bit_size == 16) { + fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_D, + DIV_ROUND_UP(num_components, 2)); + shuffle_16bit_data_for_32bit_write(bld, tmp, src, num_components); + src = tmp; + num_components = DIV_ROUND_UP(num_components, 2); } fs_reg new_dest = retype(offset(outputs[instr->const_index[0]], bld, -- 2.14.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev