For Yang Rong, This patch need be put ahead of the utest patch to avoid regression. Thanks.
Luo Xionghu Best Regards -----Original Message----- From: Luo, Xionghu Sent: Tuesday, September 22, 2015 2:51 PM To: beignet@lists.freedesktop.org Cc: Luo, Xionghu Subject: [PATCH v2 3/3] add bswap64 for gen7/gen75 and gen8 seperately. From: Luo Xionghu <xionghu....@intel.com> as the long type data layout is not continous on platform gen7/gen75, the indirect address access pattern is a bit different than gen8. Signed-off-by: Luo Xionghu <xionghu....@intel.com> --- backend/src/backend/gen8_context.cpp | 64 ++++++++++++++++++++ backend/src/backend/gen_context.cpp | 110 +++++++++++++++++++++++++++++++++++ 2 files changed, 174 insertions(+) diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp index dd5b4ca..5eb7866 100644 --- a/backend/src/backend/gen8_context.cpp +++ b/backend/src/backend/gen8_context.cpp @@ -246,6 +246,70 @@ namespace gbe p->pop(); p->MOV(dst, tmp); + }else if (src.type == GEN_TYPE_UL || src.type == GEN_TYPE_L) { + bool uniform_src = (src.hstride == GEN_HORIZONTAL_STRIDE_0); + GBE_ASSERT(uniform_src || src.subnr == 0); + GBE_ASSERT(dst.subnr == 0); + GBE_ASSERT(tmp.subnr == 0); + GBE_ASSERT(start_addr >= 0); + new_a0[0] = start_addr + 7; + new_a0[1] = start_addr + 6; + new_a0[2] = start_addr + 5; + new_a0[3] = start_addr + 4; + new_a0[4] = start_addr + 3; + new_a0[5] = start_addr + 2; + new_a0[6] = start_addr + 1; + new_a0[7] = start_addr; + if(!uniform_src) { + new_a0[8] = start_addr + 15; + new_a0[9] = start_addr + 14; + new_a0[10] = start_addr + 13; + new_a0[11] = start_addr + 12; + new_a0[12] = start_addr + 11; + new_a0[13] = start_addr + 10; + new_a0[14] = start_addr + 9; + new_a0[15] = start_addr + 8; + } else { + new_a0[8] = start_addr + 7; + new_a0[9] = start_addr + 6; + new_a0[10] = start_addr + 5; + new_a0[11] = start_addr + 4; + new_a0[12] = start_addr + 3; + new_a0[13] = start_addr + 2; + new_a0[14] = start_addr + 1; + new_a0[15] = start_addr; + } + this->setA0Content(new_a0, 56); + + p->push(); + p->curr.execWidth = 16; + p->curr.predicate = GEN_PREDICATE_NONE; + p->curr.noMask = 1; + GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), new_a0[0], 0); + p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB), ind_src); + if(!uniform_src) + ind_src.addr_imm += 16; + p->MOV(GenRegister::offset(GenRegister::retype(tmp, GEN_TYPE_UB), 0, 16), ind_src); + for (int i = 0; i < 2; i++) { + if(!uniform_src) + ind_src.addr_imm += 16; + p->MOV(GenRegister::offset(GenRegister::retype(tmp, GEN_TYPE_UB), 1, 16*i), ind_src); + } + if (simd == 16) { + for (int i = 0; i < 2; i++) { + if(!uniform_src) + ind_src.addr_imm += 16; + p->MOV(GenRegister::offset(GenRegister::retype(tmp, GEN_TYPE_UB), 2, 16*i), ind_src); + } + for (int i = 0; i < 2; i++) { + if(!uniform_src) + ind_src.addr_imm += 16; + p->MOV(GenRegister::offset(GenRegister::retype(tmp, GEN_TYPE_UB), 3, 16*i), ind_src); + } + } + p->pop(); + + p->MOV(dst, tmp); } else { GBE_ASSERT(0); } diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index 8ee65ee..7fd43bb 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -437,6 +437,116 @@ namespace gbe p->pop(); p->MOV(dst, tmp); + }else if (src.type == GEN_TYPE_UL || src.type == GEN_TYPE_L) { + bool uniform_src = (src.hstride == GEN_HORIZONTAL_STRIDE_0); + GBE_ASSERT(uniform_src || src.subnr == 0); + GBE_ASSERT(dst.subnr == 0); + GBE_ASSERT(tmp.subnr == 0); + GBE_ASSERT(start_addr >= 0); + if (!uniform_src) { + new_a0[0] = start_addr + 3; + new_a0[1] = start_addr + 2; + new_a0[2] = start_addr + 1; + new_a0[3] = start_addr; + new_a0[4] = start_addr + 7; + new_a0[5] = start_addr + 6; + new_a0[6] = start_addr + 5; + new_a0[7] = start_addr + 4; + } else { + new_a0[0] = start_addr + 7; + new_a0[1] = start_addr + 6; + new_a0[2] = start_addr + 5; + new_a0[3] = start_addr + 4; + new_a0[4] = start_addr + 3; + new_a0[5] = start_addr + 2; + new_a0[6] = start_addr + 1; + new_a0[7] = start_addr; + } + this->setA0Content(new_a0, 56); + + if (!uniform_src) { + p->push(); + p->curr.execWidth = 8; + p->curr.predicate = GEN_PREDICATE_NONE; + p->curr.noMask = 1; + GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), new_a0[0], 0); + p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB), ind_src); + for (int i = 1; i < 4; i++) { + if (!uniform_src) + ind_src.addr_imm += 8; + p->MOV(GenRegister::offset(GenRegister::retype(tmp, GEN_TYPE_UB), 0, 8*i), ind_src); + } + for (int i = 0; i < 4; i++) { + if (!uniform_src) + ind_src.addr_imm += 8; + p->MOV(GenRegister::offset(GenRegister::retype(tmp, GEN_TYPE_UB), 1, 8*i), ind_src); + } + if (simd == 16) { + for (int i = 0; i < 4; i++) { + if (!uniform_src) + ind_src.addr_imm += 8; + p->MOV(GenRegister::offset(GenRegister::retype(tmp, GEN_TYPE_UB), 2, 8*i), ind_src); + } + for (int i = 0; i < 4; i++) { + if (!uniform_src) + ind_src.addr_imm += 8; + p->MOV(GenRegister::offset(GenRegister::retype(tmp, GEN_TYPE_UB), 3, 8*i), ind_src); + } + } + p->pop(); + + p->push(); + p->curr.execWidth = 8; + p->curr.predicate = GEN_PREDICATE_NONE; + p->curr.noMask = 1; + if (simd == 8) { + p->MOV(GenRegister::offset(GenRegister::retype(dst, GEN_TYPE_D), 1, 0), + GenRegister::offset(GenRegister::retype(tmp, GEN_TYPE_D), 0, 0)); + p->MOV(GenRegister::offset(GenRegister::retype(dst, GEN_TYPE_D), 0, 0), + GenRegister::offset(GenRegister::retype(tmp, GEN_TYPE_D), 1, 0)); + }else if(simd == 16) { + p->MOV(GenRegister::offset(GenRegister::retype(dst, GEN_TYPE_D), 2, 0), + GenRegister::offset(GenRegister::retype(tmp, GEN_TYPE_D), 0, 0)); + p->MOV(GenRegister::offset(GenRegister::retype(dst, GEN_TYPE_D), 3, 0), + GenRegister::offset(GenRegister::retype(tmp, GEN_TYPE_D), 1, 0)); + p->MOV(GenRegister::offset(GenRegister::retype(dst, GEN_TYPE_D), 0, 0), + GenRegister::offset(GenRegister::retype(tmp, GEN_TYPE_D), 2, 0)); + p->MOV(GenRegister::offset(GenRegister::retype(dst, GEN_TYPE_D), 1, 0), + GenRegister::offset(GenRegister::retype(tmp, GEN_TYPE_D), 3, 0)); + } + p->pop(); + } else { + p->push(); + p->curr.execWidth = 8; + p->curr.predicate = GEN_PREDICATE_NONE; + p->curr.noMask = 1; + GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), new_a0[0], 0); + p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB), ind_src); + p->pop(); + + p->push(); + p->curr.execWidth = 8; + p->curr.predicate = GEN_PREDICATE_NONE; + p->curr.noMask = 1; + GenRegister x = GenRegister::ud1grf(tmp.nr, 0); + GenRegister y = GenRegister::ud1grf(tmp.nr, 1); + GenRegister dst_ = dst; + dst_.type = GEN_TYPE_UD; + dst_.hstride = GEN_HORIZONTAL_STRIDE_1; + dst_.width = GEN_WIDTH_8; + dst_.vstride = GEN_VERTICAL_STRIDE_8; + + if (simd == 8) { + p->MOV(GenRegister::offset(GenRegister::retype(dst_, GEN_TYPE_D), 0, 0), x); + p->MOV(GenRegister::offset(GenRegister::retype(dst_, GEN_TYPE_D), 1, 0), y); + }else if(simd == 16) { + p->MOV(GenRegister::offset(GenRegister::retype(dst_, GEN_TYPE_D), 0, 0), x); + p->MOV(GenRegister::offset(GenRegister::retype(dst_, GEN_TYPE_D), 1, 0), x); + p->MOV(GenRegister::offset(GenRegister::retype(dst_, GEN_TYPE_D), 2, 0), y); + p->MOV(GenRegister::offset(GenRegister::retype(dst_, GEN_TYPE_D), 3, 0), y); + } + p->pop(); + } } else { GBE_ASSERT(0); } -- 1.9.1 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet