According to OCL spec, size of vector3 are aligned to vector4. And for memory load/store, clang already aligned it to vector4. If we do not calculate private/local memory size as vector4, out of range memory access will appear.
This can fix the failure of opencv 3.0 case: OCL_Arithm/MeanStdDev.Mat_Mask v2: vec3 constant data should be aligned to vec4. Signed-off-by: Ruiling Song <[email protected]> --- backend/src/llvm/llvm_gen_backend.cpp | 3 +++ backend/src/llvm/llvm_passes.cpp | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index d2e0c87..0fbf941 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -693,8 +693,11 @@ namespace gbe case Type::TypeID::VectorTyID: { const ConstantDataSequential *cds = dyn_cast<ConstantDataSequential>(c); + const VectorType *vecTy = cast<VectorType>(type); GBE_ASSERT(cds); getSequentialData(cds, mem, offset); + if(vecTy->getNumElements() == 3) // OCL spec require align to vec4 + offset += getTypeByteSize(unit, vecTy->getElementType()); break; } case Type::TypeID::IntegerTyID: diff --git a/backend/src/llvm/llvm_passes.cpp b/backend/src/llvm/llvm_passes.cpp index b8ab844..1a38a0c 100644 --- a/backend/src/llvm/llvm_passes.cpp +++ b/backend/src/llvm/llvm_passes.cpp @@ -181,7 +181,9 @@ namespace gbe case Type::VectorTyID: { const VectorType* VecTy = cast<VectorType>(Ty); - return VecTy->getNumElements() * getTypeBitSize(unit, VecTy->getElementType()); + uint32_t numElem = VecTy->getNumElements(); + if(numElem == 3) numElem = 4; // OCL spec + return numElem * getTypeBitSize(unit, VecTy->getElementType()); } case Type::ArrayTyID: { -- 1.7.10.4 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
