LGTM, pushed, thanks.
On Fri, Feb 27, 2015 at 02:33:08PM +0800, Ruiling Song wrote: > Although opencl does not allow unaligned load/store of dword/qword, > LLVM still may generate such kind of instructions, especially > large integer load/store is legalized into load/store of qword with > possible unaligned address. The implementation is simple: > for store, bitcast d/q word to vector of bytes before writing out, > for load, load vector of bytes and then bitcast them to d/q word. > > Signed-off-by: Ruiling Song <[email protected]> > --- > backend/src/llvm/llvm_gen_backend.cpp | 76 > +++++++++++++++++++++++++++++++++ > 1 file changed, 76 insertions(+) > > diff --git a/backend/src/llvm/llvm_gen_backend.cpp > b/backend/src/llvm/llvm_gen_backend.cpp > index de846cb..c7bf153 100644 > --- a/backend/src/llvm/llvm_gen_backend.cpp > +++ b/backend/src/llvm/llvm_gen_backend.cpp > @@ -651,6 +651,8 @@ namespace gbe > Value *llvmValue, const ir::Register ptr, > const ir::AddressSpace addrSpace, Type * elemType, bool > isLoad, ir::BTI bti, > bool dwAligned); > + // handle load of dword/qword with unaligned address > + void emitUnalignedDQLoadStore(Value *llvmPtr, Value *llvmValues, > ir::AddressSpace addrSpace, ir::BTI &binding, bool isLoad, bool dwAligned); > void visitInstruction(Instruction &I) {NOT_SUPPORTED;} > private: > ir::ImmediateIndex processConstantImmIndexImpl(Constant *CPV, int32_t > index = 0u); > @@ -3931,6 +3933,67 @@ error: > } > GBE_ASSERT(bti.count <= MAX_MIXED_POINTER); > } > + // handle load of dword/qword with unaligned address > + void GenWriter::emitUnalignedDQLoadStore(Value *llvmPtr, Value > *llvmValues, ir::AddressSpace addrSpace, ir::BTI &binding, bool isLoad, bool > dwAligned) > + { > + Type *llvmType = llvmValues->getType(); > + const ir::Type type = getType(ctx, llvmType); > + unsigned byteSize = getTypeByteSize(unit, llvmType); > + const ir::Register ptr = this->getRegister(llvmPtr); > + > + Type *elemType = llvmType; > + unsigned elemNum = 1; > + if (!isScalarType(llvmType)) { > + VectorType *vectorType = cast<VectorType>(llvmType); > + elemType = vectorType->getElementType(); > + elemNum = vectorType->getNumElements(); > + } > + > + vector<ir::Register> tupleData; > + for (uint32_t elemID = 0; elemID < elemNum; ++elemID) { > + ir::Register reg; > + if(regTranslator.isUndefConst(llvmValues, elemID)) { > + Value *v = Constant::getNullValue(elemType); > + reg = this->getRegister(v); > + } else > + reg = this->getRegister(llvmValues, elemID); > + > + tupleData.push_back(reg); > + } > + const ir::Tuple tuple = ctx.arrayTuple(&tupleData[0], elemNum); > + > + vector<ir::Register> byteTupleData; > + for (uint32_t elemID = 0; elemID < byteSize; ++elemID) { > + byteTupleData.push_back(ctx.reg(ir::FAMILY_BYTE)); > + } > + const ir::Tuple byteTuple = ctx.arrayTuple(&byteTupleData[0], byteSize); > + > + if (isLoad) { > + ctx.LOAD(ir::TYPE_U8, byteTuple, ptr, addrSpace, byteSize, dwAligned, > binding); > + ctx.BITCAST(type, ir::TYPE_U8, tuple, byteTuple, elemNum, byteSize); > + } else { > + ctx.BITCAST(ir::TYPE_U8, type, byteTuple, tuple, byteSize, elemNum); > + // FIXME: byte scatter does not handle correctly vector store, after > fix that, > + // we can directly use on store instruction like: > + // ctx.STORE(ir::TYPE_U8, byteTuple, ptr, addrSpace, byteSize, > dwAligned, binding); > + const ir::RegisterFamily pointerFamily = ctx.getPointerFamily(); > + for (uint32_t elemID = 0; elemID < byteSize; elemID++) { > + const ir::Register reg = byteTupleData[elemID]; > + ir::Register addr; > + if (elemID == 0) > + addr = ptr; > + else { > + const ir::Register offset = ctx.reg(pointerFamily); > + ir::ImmediateIndex immIndex; > + immIndex = ctx.newImmediate(int32_t(elemID)); > + addr = ctx.reg(pointerFamily); > + ctx.LOADI(ir::TYPE_S32, offset, immIndex); > + ctx.ADD(ir::TYPE_S32, addr, ptr, offset); > + } > + ctx.STORE(type, addr, addrSpace, dwAligned, binding, reg); > + } > + } > + } > > extern int OCL_SIMD_WIDTH; > template <bool isLoad, typename T> > @@ -3946,6 +4009,19 @@ error: > ir::BTI binding; > gatherBTI(&I, binding); > > + Type *scalarType = llvmType; > + if (!isScalarType(llvmType)) { > + VectorType *vectorType = cast<VectorType>(llvmType); > + scalarType = vectorType->getElementType(); > + } > + > + if (!dwAligned > + && (scalarType == IntegerType::get(I.getContext(), 64) > + || scalarType == IntegerType::get(I.getContext(), 32)) > + ) { > + emitUnalignedDQLoadStore(llvmPtr, llvmValues, addrSpace, binding, > isLoad, dwAligned); > + return; > + } > // Scalar is easy. We neednot build register tuples > if (isScalarType(llvmType) == true) { > const ir::Type type = getType(ctx, llvmType); > -- > 1.7.10.4 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
