The patch simply store a 'align' for each kernel argument. Then the runtime could align the kernel argument address to 'align'. This patch works for constant and local address space.
Signed-off-by: Ruiling Song <ruiling.s...@intel.com> --- backend/src/backend/context.cpp | 2 ++ backend/src/backend/program.cpp | 10 ++++++++ backend/src/backend/program.h | 4 +++ backend/src/backend/program.hpp | 4 +++ backend/src/ir/context.cpp | 4 +-- backend/src/ir/context.hpp | 2 +- backend/src/ir/function.hpp | 5 ++-- backend/src/llvm/llvm_gen_backend.cpp | 20 ++++++++------- src/cl_command_queue_gen7.c | 43 +++++++++++++++++++++------------ 9 files changed, 65 insertions(+), 29 deletions(-) diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp index cbeef24..2609e7a 100644 --- a/backend/src/backend/context.cpp +++ b/backend/src/backend/context.cpp @@ -534,6 +534,8 @@ namespace gbe kernel->args = NULL; for (uint32_t argID = 0; argID < kernel->argNum; ++argID) { const auto &arg = fn.getArg(argID); + + kernel->args[argID].align = arg.align; switch (arg.type) { case ir::FunctionArgument::VALUE: case ir::FunctionArgument::STRUCTURE: diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp index 25be484..e692186 100644 --- a/backend/src/backend/program.cpp +++ b/backend/src/backend/program.cpp @@ -229,6 +229,7 @@ namespace gbe { KernelArgument& arg = args[i]; OUT_UPDATE_SZ(arg.type); OUT_UPDATE_SZ(arg.size); + OUT_UPDATE_SZ(arg.align); OUT_UPDATE_SZ(arg.bufSize); } @@ -315,6 +316,7 @@ namespace gbe { KernelArgument& arg = args[i]; IN_UPDATE_SZ(arg.type); IN_UPDATE_SZ(arg.size); + IN_UPDATE_SZ(arg.align); IN_UPDATE_SZ(arg.bufSize); } @@ -422,6 +424,7 @@ namespace gbe { outs << spaces_nl << " Arg " << i << ":\n"; outs << spaces_nl << " type value: "<< arg.type << "\n"; outs << spaces_nl << " size: "<< arg.size << "\n"; + outs << spaces_nl << " align: "<< arg.align << "\n"; outs << spaces_nl << " bufSize: "<< arg.bufSize << "\n"; } @@ -696,6 +699,11 @@ namespace gbe { return kernel->getArgSize(argID); } + static uint32_t kernelGetArgAlign(gbe_kernel genKernel, uint32_t argID) { + if (genKernel == NULL) return 0u; + const gbe::Kernel *kernel = (const gbe::Kernel*) genKernel; + return kernel->getArgAlign(argID); + } static gbe_arg_type kernelGetArgType(gbe_kernel genKernel, uint32_t argID) { if (genKernel == NULL) return GBE_ARG_INVALID; const gbe::Kernel *kernel = (const gbe::Kernel*) genKernel; @@ -803,6 +811,7 @@ GBE_EXPORT_SYMBOL gbe_kernel_get_code_size_cb *gbe_kernel_get_code_size = NULL; GBE_EXPORT_SYMBOL gbe_kernel_get_arg_num_cb *gbe_kernel_get_arg_num = NULL; GBE_EXPORT_SYMBOL gbe_kernel_get_arg_size_cb *gbe_kernel_get_arg_size = NULL; GBE_EXPORT_SYMBOL gbe_kernel_get_arg_type_cb *gbe_kernel_get_arg_type = NULL; +GBE_EXPORT_SYMBOL gbe_kernel_get_arg_align_cb *gbe_kernel_get_arg_align = NULL; GBE_EXPORT_SYMBOL gbe_kernel_get_simd_width_cb *gbe_kernel_get_simd_width = NULL; GBE_EXPORT_SYMBOL gbe_kernel_get_curbe_offset_cb *gbe_kernel_get_curbe_offset = NULL; GBE_EXPORT_SYMBOL gbe_kernel_get_curbe_size_cb *gbe_kernel_get_curbe_size = NULL; @@ -838,6 +847,7 @@ namespace gbe gbe_kernel_get_arg_num = gbe::kernelGetArgNum; gbe_kernel_get_arg_size = gbe::kernelGetArgSize; gbe_kernel_get_arg_type = gbe::kernelGetArgType; + gbe_kernel_get_arg_align = gbe::kernelGetArgAlign; gbe_kernel_get_simd_width = gbe::kernelGetSIMDWidth; gbe_kernel_get_curbe_offset = gbe::kernelGetCurbeOffset; gbe_kernel_get_curbe_size = gbe::kernelGetCurbeSize; diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h index 10fcc49..b0149da 100644 --- a/backend/src/backend/program.h +++ b/backend/src/backend/program.h @@ -185,6 +185,10 @@ extern gbe_kernel_get_arg_size_cb *gbe_kernel_get_arg_size; typedef enum gbe_arg_type (gbe_kernel_get_arg_type_cb)(gbe_kernel, uint32_t argID); extern gbe_kernel_get_arg_type_cb *gbe_kernel_get_arg_type; +/*! Get the align of the given argument */ +typedef uint32_t (gbe_kernel_get_arg_align_cb)(gbe_kernel, uint32_t argID); +extern gbe_kernel_get_arg_align_cb *gbe_kernel_get_arg_align; + /*! Get the simd width for the kernel */ typedef uint32_t (gbe_kernel_get_simd_width_cb)(gbe_kernel); extern gbe_kernel_get_simd_width_cb *gbe_kernel_get_simd_width; diff --git a/backend/src/backend/program.hpp b/backend/src/backend/program.hpp index 9b33b7c..dd76210 100644 --- a/backend/src/backend/program.hpp +++ b/backend/src/backend/program.hpp @@ -47,6 +47,7 @@ namespace gbe { struct KernelArgument { gbe_arg_type type; //!< Pointer, structure, image, regular value? uint32_t size; //!< Size of the argument + uint32_t align; //!< addr alignment of the argument uint32_t bufSize; //!< Contant buffer size }; @@ -88,6 +89,9 @@ namespace gbe { INLINE uint32_t getArgSize(uint32_t argID) const { return argID >= argNum ? 0u : args[argID].size; } + INLINE uint32_t getArgAlign(uint32_t argID) const { + return argID >= argNum ? 0u : args[argID].align; + } /*! Return the type of the given argument */ INLINE gbe_arg_type getArgType(uint32_t argID) const { return argID >= argNum ? GBE_ARG_INVALID : args[argID].type; diff --git a/backend/src/ir/context.cpp b/backend/src/ir/context.cpp index 400a2a0..d6815e1 100644 --- a/backend/src/ir/context.cpp +++ b/backend/src/ir/context.cpp @@ -105,10 +105,10 @@ namespace ir { return index; } - void Context::input(const std::string &name, FunctionArgument::Type type, Register reg, uint32_t elementSize) { + void Context::input(const std::string &name, FunctionArgument::Type type, Register reg, uint32_t elementSize, uint32_t align) { GBE_ASSERTM(fn != NULL, "No function currently defined"); GBE_ASSERTM(reg < fn->file.regNum(), "Out-of-bound register"); - FunctionArgument *arg = GBE_NEW(FunctionArgument, type, reg, elementSize, name); + FunctionArgument *arg = GBE_NEW(FunctionArgument, type, reg, elementSize, name, align); fn->args.push_back(arg); } diff --git a/backend/src/ir/context.hpp b/backend/src/ir/context.hpp index 242beaa..adeaf6f 100644 --- a/backend/src/ir/context.hpp +++ b/backend/src/ir/context.hpp @@ -107,7 +107,7 @@ namespace ir { /*! Create a new label for the current function */ LabelIndex label(void); /*! Append a new input register for the function */ - void input(const std::string &name, FunctionArgument::Type type, Register reg, uint32_t elemSz = 0u); + void input(const std::string &name, FunctionArgument::Type type, Register reg, uint32_t elemSz = 0u, uint32_t align = 0); /*! Append a new output register for the function */ void output(Register reg); /*! Get the immediate value */ diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp index 3d4733d..84d2504 100644 --- a/backend/src/ir/function.hpp +++ b/backend/src/ir/function.hpp @@ -106,11 +106,12 @@ namespace ir { SAMPLER = 6 }; /*! Create a function input argument */ - INLINE FunctionArgument(Type type, Register reg, uint32_t size, const std::string &name) : - type(type), reg(reg), size(size), name(name) {} + INLINE FunctionArgument(Type type, Register reg, uint32_t size, const std::string &name, uint32_t align) : + type(type), reg(reg), size(size), align(align), name(name) {} Type type; //!< Gives the type of argument we have Register reg; //!< Holds the argument uint32_t size; //!< == sizeof(void*) for ptr, sizeof(elem) for the rest + uint32_t align; //!< address alignment for the argument const std::string name; //!< Holds the function name for IR output GBE_STRUCT(FunctionArgument); // Use custom allocator }; diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index a104df4..cb3511f 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -692,7 +692,8 @@ namespace gbe void* mem = malloc(size); uint32_t offset = 0; getConstantData(c, mem, offset); - unit.newConstant((char *)mem, name, size, sizeof(unsigned)); + uint32_t alignment = getAlignmentByte(unit, type); + unit.newConstant((char *)mem, name, size, alignment); free(mem); } } @@ -1079,7 +1080,7 @@ namespace gbe const uint32_t elemSize = getTypeByteSize(unit, elemType); const uint32_t elemNum = vectorType->getNumElements(); //vector's elemType always scalar type - ctx.input(argName, ir::FunctionArgument::VALUE, reg, elemNum*elemSize); + ctx.input(argName, ir::FunctionArgument::VALUE, reg, elemNum*elemSize, getAlignmentByte(unit, type)); ir::Function& fn = ctx.getFunction(); for(uint32_t i=1; i < elemNum; i++) { @@ -1094,37 +1095,38 @@ namespace gbe "vector type in the function argument is not supported yet"); const ir::Register reg = regTranslator.newScalar(I); if (type->isPointerTy() == false) - ctx.input(argName, ir::FunctionArgument::VALUE, reg, getTypeByteSize(unit, type)); + ctx.input(argName, ir::FunctionArgument::VALUE, reg, getTypeByteSize(unit, type), getAlignmentByte(unit, type)); else { PointerType *pointerType = dyn_cast<PointerType>(type); + Type *pointed = pointerType->getElementType(); // By value structure #if LLVM_VERSION_MINOR <= 1 if (PAL.paramHasAttr(argID+1, Attribute::ByVal)) { #else if (I->hasByValAttr()) { #endif /* LLVM_VERSION_MINOR <= 1 */ - Type *pointed = pointerType->getElementType(); const size_t structSize = getTypeByteSize(unit, pointed); - ctx.input(argName, ir::FunctionArgument::STRUCTURE, reg, structSize); + ctx.input(argName, ir::FunctionArgument::STRUCTURE, reg, structSize, getAlignmentByte(unit, type)); } // Regular user provided pointer (global, local or constant) else { const uint32_t addr = pointerType->getAddressSpace(); const ir::AddressSpace addrSpace = addressSpaceLLVMToGen(addr); const uint32_t ptrSize = getTypeByteSize(unit, type); + const uint32_t align = getAlignmentByte(unit, pointed); switch (addrSpace) { case ir::MEM_GLOBAL: - ctx.input(argName, ir::FunctionArgument::GLOBAL_POINTER, reg, ptrSize); + ctx.input(argName, ir::FunctionArgument::GLOBAL_POINTER, reg, ptrSize, align); break; case ir::MEM_LOCAL: - ctx.input(argName, ir::FunctionArgument::LOCAL_POINTER, reg, ptrSize); + ctx.input(argName, ir::FunctionArgument::LOCAL_POINTER, reg, ptrSize, align); ctx.getFunction().setUseSLM(true); break; case ir::MEM_CONSTANT: - ctx.input(argName, ir::FunctionArgument::CONSTANT_POINTER, reg, ptrSize); + ctx.input(argName, ir::FunctionArgument::CONSTANT_POINTER, reg, ptrSize, align); break; case ir::IMAGE: - ctx.input(argName, ir::FunctionArgument::IMAGE, reg, ptrSize); + ctx.input(argName, ir::FunctionArgument::IMAGE, reg, ptrSize, align); ctx.getFunction().getImageSet()->append(reg, &ctx); break; default: GBE_ASSERT(addrSpace != ir::MEM_PRIVATE); diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c index fb7a8a3..0788975 100644 --- a/src/cl_command_queue_gen7.c +++ b/src/cl_command_queue_gen7.c @@ -98,37 +98,47 @@ error: static void cl_upload_constant_buffer(cl_command_queue queue, cl_kernel ker) { - /* calculate constant buffer size */ + /* calculate constant buffer size + * we need raw_size & aligned_size + */ GET_QUEUE_THREAD_GPGPU(queue); int32_t arg; - size_t offset; + size_t offset = 0; + uint32_t raw_size = 0, aligned_size =0; gbe_program prog = ker->program->opaque; const int32_t arg_n = gbe_kernel_get_arg_num(ker->opaque); size_t global_const_size = gbe_program_get_global_constant_size(prog); - uint32_t constant_buf_size = 0; + aligned_size = raw_size = global_const_size; + /* Reserve 8 bytes to get rid of 0 address */ + if(global_const_size == 0) aligned_size = 8; + for (arg = 0; arg < arg_n; ++arg) { const enum gbe_arg_type type = gbe_kernel_get_arg_type(ker->opaque, arg); if (type == GBE_ARG_CONSTANT_PTR && ker->args[arg].mem) { + uint32_t alignment = gbe_kernel_get_arg_align(ker->opaque, arg); + assert(alignment != 0); cl_mem mem = ker->args[arg].mem; - constant_buf_size += ALIGN(mem->size, 4); + raw_size += mem->size; + aligned_size = ALIGN(aligned_size, alignment); + aligned_size += mem->size; } } - if(global_const_size == 0 && constant_buf_size == 0) + if(raw_size == 0) return; - cl_buffer bo = cl_gpgpu_alloc_constant_buffer(gpgpu, constant_buf_size + global_const_size + 4); + cl_buffer bo = cl_gpgpu_alloc_constant_buffer(gpgpu, aligned_size); cl_buffer_map(bo, 1); char * cst_addr = cl_buffer_get_virtual(bo); - offset = 0; + + /* upload the global constant data */ if (global_const_size > 0) { - /* Write the global constant arrays */ gbe_program_get_global_constant_data(prog, (char*)(cst_addr+offset)); + offset += global_const_size; } - offset += ALIGN(global_const_size, 4); + /* reserve 8 bytes to get rid of 0 address */ if(global_const_size == 0) { - /* reserve 4 bytes to get rid of 0 address */ - offset += 4; + offset = 8; } /* upload constant buffer argument */ @@ -137,7 +147,8 @@ cl_upload_constant_buffer(cl_command_queue queue, cl_kernel ker) const enum gbe_arg_type type = gbe_kernel_get_arg_type(ker->opaque, arg); if (type == GBE_ARG_CONSTANT_PTR && ker->args[arg].mem) { cl_mem mem = ker->args[arg].mem; - + uint32_t alignment = gbe_kernel_get_arg_align(ker->opaque, arg); + offset = ALIGN(offset, alignment); curbe_offset = gbe_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_KERNEL_ARGUMENT, arg); assert(curbe_offset >= 0); *(uint32_t *) (ker->curbe + curbe_offset) = offset; @@ -146,7 +157,7 @@ cl_upload_constant_buffer(cl_command_queue queue, cl_kernel ker) void * addr = cl_buffer_get_virtual(mem->bo); memcpy(cst_addr + offset, addr, mem->size); cl_buffer_unmap(mem->bo); - offset += ALIGN(mem->size, 4); + offset += mem->size; } } cl_buffer_unmap(bo); @@ -201,12 +212,14 @@ cl_curbe_fill(cl_kernel ker, } /* Handle the various offsets to SLM */ const int32_t arg_n = gbe_kernel_get_arg_num(ker->opaque); - /* align so that we kernel argument get good alignment */ - int32_t arg, slm_offset = ALIGN(gbe_kernel_get_slm_size(ker->opaque), 32); + int32_t arg, slm_offset = gbe_kernel_get_slm_size(ker->opaque); for (arg = 0; arg < arg_n; ++arg) { const enum gbe_arg_type type = gbe_kernel_get_arg_type(ker->opaque, arg); if (type != GBE_ARG_LOCAL_PTR) continue; + uint32_t align = gbe_kernel_get_arg_align(ker->opaque, arg); + assert(align != 0); + slm_offset = ALIGN(slm_offset, align); offset = gbe_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_KERNEL_ARGUMENT, arg); assert(offset >= 0); uint32_t *slmptr = (uint32_t *) (ker->curbe + offset); -- 1.7.9.5 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet