Module: Mesa Branch: main Commit: a64534754d9cce8c3c9da3421bed6d91646feef3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a64534754d9cce8c3c9da3421bed6d91646feef3
Author: Alyssa Rosenzweig <[email protected]> Date: Thu Feb 17 18:40:59 2022 -0500 pan/bi: Handle vectorized u2f16/i2f16 Will be useful when we enable int16, I guess... No shader-db changes. Signed-off-by: Alyssa Rosenzweig <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14576> --- src/panfrost/bifrost/bifrost_compile.c | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index c2ee1668c09..cd45895e67c 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -1929,6 +1929,34 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr) return; } + /* While we do not have a direct V2U32_TO_V2F16 instruction, lowering to + * MKVEC.v2i16 + V2U16_TO_V2F16 is more efficient on Bifrost than + * scalarizing due to scheduling (equal cost on Valhall). Additionally + * if the source is replicated the MKVEC.v2i16 can be optimized out. + */ + case nir_op_u2f16: + case nir_op_i2f16: { + if (!(src_sz == 32 && comps == 2)) + break; + + nir_alu_src *src = &instr->src[0]; + bi_index idx = bi_src_index(&src->src); + bi_index s0 = bi_word(idx, src->swizzle[0]); + bi_index s1 = bi_word(idx, src->swizzle[1]); + + bi_index t = (src->swizzle[0] == src->swizzle[1]) ? + bi_half(s0, false) : + bi_mkvec_v2i16(b, bi_half(s0, false), + bi_half(s1, false)); + + if (instr->op == nir_op_u2f16) + bi_v2u16_to_v2f16_to(b, dst, t, BI_ROUND_NONE); + else + bi_v2s16_to_v2f16_to(b, dst, t, BI_ROUND_NONE); + + return; + } + case nir_op_i2i8: case nir_op_u2u8: { @@ -3306,8 +3334,6 @@ bi_vectorize_filter(const nir_instr *instr, void *data) case nir_op_ushr: case nir_op_f2i16: case nir_op_f2u16: - case nir_op_i2f16: - case nir_op_u2f16: return false; default: return true;
