Render Target Message's payloads for 16bit values fit in only one register.
From Intel PRM vol07, page 249 "Render Target Messages" / "Message Data Payloads" "The half precision Render Target Write messages have data payloads that can pack a full SIMD16 payload into 1 register instead of two. The half-precision packed format is used for RGBA and Source 0 Alpha, but Source Depth data payload is always supplied in full precision." So when 16-bit data is uploaded to the payload it will use 1 register independently of it is SIMD16 or SIMD8. This change implies that we need to replicate the approach in the copy propagation of the load_payload operations. v2: By default 16-bit sources should be packed (Jason Ekstrand) Include changes in in copy_propagation of load_payload (Chema Casanova) --- src/intel/compiler/brw_fs.cpp | 5 ++++- src/intel/compiler/brw_fs_copy_propagation.cpp | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 449588c484..9d0b30e6e8 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -3523,7 +3523,10 @@ fs_visitor::lower_load_payload() for (uint8_t i = inst->header_size; i < inst->sources; i++) { if (inst->src[i].file != BAD_FILE) ibld.MOV(retype(dst, inst->src[i].type), inst->src[i]); - dst = offset(dst, ibld, 1); + if (type_sz(inst->src[i].type) == 2) + dst = byte_offset(dst, REG_SIZE); + else + dst = offset(dst, ibld, 1); } inst->remove(block); diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp index 92cc0a8de5..b714182fec 100644 --- a/src/intel/compiler/brw_fs_copy_propagation.cpp +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp @@ -829,7 +829,7 @@ fs_visitor::opt_copy_propagation_local(void *copy_prop_ctx, bblock_t *block, int offset = 0; for (int i = 0; i < inst->sources; i++) { int effective_width = i < inst->header_size ? 8 : inst->exec_size; - assert(effective_width * type_sz(inst->src[i].type) % REG_SIZE == 0); + assert(effective_width * MAX2(4, type_sz(inst->src[i].type)) % REG_SIZE == 0); const unsigned size_written = effective_width * type_sz(inst->src[i].type); if (inst->src[i].file == VGRF) { @@ -845,7 +845,7 @@ fs_visitor::opt_copy_propagation_local(void *copy_prop_ctx, bblock_t *block, ralloc_free(entry); } } - offset += size_written; + offset += type_sz(inst->src[i].type) == 2 ? REG_SIZE : size_written; } } } -- 2.14.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev