The previous implementation generates code as below: %33 = extractelement <4 x i8> %32, i32 0 %34 = extractelement <4 x i8> %32, i32 1 %35 = extractelement <4 x i8> %32, i32 2 %36 = extractelement <4 x i8> %32, i32 3 %32 = load <4 x i8> addrspace(1)* %31, align 4, !tbaa !3
It may bring some potential problems in the consequent optimization pass. Now fix adjust the extractelement instruction after the load instruction. %32 = load <4 x i8> addrspace(1)* %31, align 4, !tbaa !3 %33 = extractelement <4 x i8> %32, i32 0 %34 = extractelement <4 x i8> %32, i32 1 %35 = extractelement <4 x i8> %32, i32 2 %36 = extractelement <4 x i8> %32, i32 3 Signed-off-by: Zhigang Gong <[email protected]> --- backend/src/llvm/llvm_scalarize.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/backend/src/llvm/llvm_scalarize.cpp b/backend/src/llvm/llvm_scalarize.cpp index 6394909..8e06328 100644 --- a/backend/src/llvm/llvm_scalarize.cpp +++ b/backend/src/llvm/llvm_scalarize.cpp @@ -222,6 +222,12 @@ namespace gbe { return GetComponentCount(value->getType()); } + /* set to insert new instructions after the specified instruction.*/ + void setAppendPoint(Instruction *insn) { + BasicBlock::iterator next(insn); + builder->SetInsertPoint(++next); + } + DenseMap<Value*, VectorValues> vectorVals; Module* module; IRBuilder<>* builder; @@ -632,6 +638,7 @@ namespace gbe { CallSite CS(call); CallSite::arg_iterator CI = CS.arg_begin() + 3; + setAppendPoint(call); switch (it->second) { default: break; case GEN_OCL_READ_IMAGE0: @@ -677,6 +684,7 @@ namespace gbe { bool Scalarize::scalarizeBitCast(BitCastInst* bt) { + setAppendPoint(bt); if(bt->getOperand(0)->getType()->isVectorTy()) bt->setOperand(0, InsertToVector(bt, bt->getOperand(0))); if(bt->getType()->isVectorTy()) @@ -686,6 +694,7 @@ namespace gbe { bool Scalarize::scalarizeLoad(LoadInst* ld) { + setAppendPoint(ld); extractFromVector(ld); return false; } -- 1.7.9.5 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
