If the src and dst are the same byte vector or the src
is scalar, we don't need to split the instruction.
Thus the following instructions:
( 269) (-f1) sel(8) g95<2>:B g100<16,8,2>:B 0W {
align1 WE_normal 1Q };
( 271) (-f1) sel(8) g95.16<2>:B g100.16<16,8,2>:B 0W {
align1 WE_normal 2Q };
could be optimized to one sind16 instruction:
( 263) (-f1) sel(16) g95<2>:B g100<16,8,2>:B 0W {
align1 WE_normal 1H };
Signed-off-by: Zhigang Gong <[email protected]>
---
backend/src/backend/gen_encoder.cpp | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/backend/src/backend/gen_encoder.cpp
b/backend/src/backend/gen_encoder.cpp
index 51e999e..094c894 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -69,6 +69,10 @@ namespace gbe
INLINE bool needToSplitAlu1(GenEncoder *p, GenRegister dst, GenRegister src)
{
if (p->curr.execWidth != 16 || src.hstride == GEN_HORIZONTAL_STRIDE_0)
return false;
+ if (isVectorOfBytes(dst) == true &&
+ ((isVectorOfBytes(src) == true && src.hstride == dst.hstride)
+ || src.hstride == GEN_HORIZONTAL_STRIDE_0))
+ return false;
if (isVectorOfBytes(dst) == true) return true;
if (isVectorOfBytes(src) == true) return true;
return false;
@@ -79,7 +83,13 @@ namespace gbe
(src0.hstride == GEN_HORIZONTAL_STRIDE_0 &&
src1.hstride == GEN_HORIZONTAL_STRIDE_0))
return false;
- if (isVectorOfBytes(dst) == true) return true;
+ if (isVectorOfBytes(dst) == true &&
+ ((isVectorOfBytes(src0) == true && src0.hstride == dst.hstride) ||
+ src0.hstride == GEN_HORIZONTAL_STRIDE_0) &&
+ ((isVectorOfBytes(src1) == true && src1.hstride == dst.hstride) ||
+ src1.hstride == GEN_HORIZONTAL_STRIDE_0))
+ return false;
+ if (isVectorOfBytes(dst) == true ) return true;
if (isVectorOfBytes(src0) == true) return true;
if (isVectorOfBytes(src1) == true) return true;
return false;
--
1.8.3.2
_______________________________________________
Beignet mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/beignet