The spec only required the intel_reqd_sub_group_size in the return value CL_DEVICE_SUB_GROUP_SIZES_INTEL. If some applications use the hard code sub_group_size, such as 32, then the program will exit in the GBE, I think it does not make sense.
> -----Original Message----- > From: Pan, Xiuli > Sent: Tuesday, June 13, 2017 17:04 > To: Yang, Rong R <rong.r.y...@intel.com>; beignet@lists.freedesktop.org > Subject: RE: [Beignet] [PATCH 1/3] Backend: Add intel_reqd_sub_group_size > support > > The spec has required the subgroup size to be 8 or 16, and I think we may > need to fail the build in some other place. > > -----Original Message----- > From: Yang, Rong R > Sent: Tuesday, June 13, 2017 16:44 > To: Pan, Xiuli <xiuli....@intel.com>; beignet@lists.freedesktop.org > Cc: Pan, Xiuli <xiuli....@intel.com> > Subject: RE: [Beignet] [PATCH 1/3] Backend: Add intel_reqd_sub_group_size > support > > > > > -----Original Message----- > > From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf > > Of Xiuli Pan > > Sent: Monday, June 5, 2017 16:28 > > To: beignet@lists.freedesktop.org > > Cc: Pan, Xiuli <xiuli....@intel.com> > > Subject: [Beignet] [PATCH 1/3] Backend: Add intel_reqd_sub_group_size > > support > > > > From: Pan Xiuli <xiuli....@intel.com> > > > > If we get intel_reqd_sub_group_size attribute from frontend then set > > it to backend. > > > > Signed-off-by: Pan Xiuli <xiuli....@intel.com> > > --- > > backend/src/backend/context.cpp | 6 +----- > > backend/src/backend/gen_program.cpp | 28 ++++++++++++++++++++-- > -- > > ---- > > backend/src/llvm/llvm_gen_backend.cpp | 20 ++++++++++++++++++++ > > 3 files changed, 41 insertions(+), 13 deletions(-) > > > > diff --git a/backend/src/backend/context.cpp > > b/backend/src/backend/context.cpp index e9ddd17..c9500c8 100644 > > --- a/backend/src/backend/context.cpp > > +++ b/backend/src/backend/context.cpp > > @@ -340,7 +340,6 @@ namespace gbe > > > > /////////////////////////////////////////////////////////////////////////// > > // Generic Context (shared by the simulator and the HW context) > > > > ////////////////////////////////////////////////////////////////////// > > ///// > > - IVAR(OCL_SIMD_WIDTH, 8, 15, 16); > > > > Context::Context(const ir::Unit &unit, const std::string &name) : > > unit(unit), fn(*unit.getFunction(name)), name(name), > > liveness(NULL), dag(NULL), useDWLabel(false) @@ -361,10 +360,7 @@ > namespace gbe > > } > > > > void Context::startNewCG(uint32_t simdWidth) { > > - if (simdWidth == 0 || OCL_SIMD_WIDTH != 15) > > - this->simdWidth = nextHighestPowerOf2(OCL_SIMD_WIDTH); > > - else > > - this->simdWidth = simdWidth; > > + this->simdWidth = simdWidth; > > GBE_SAFE_DELETE(this->registerAllocator); > > GBE_SAFE_DELETE(this->scratchAllocator); > > GBE_ASSERT(dag != NULL && liveness != NULL); diff --git > > a/backend/src/backend/gen_program.cpp > > b/backend/src/backend/gen_program.cpp > > index c1827b1..26b646a 100644 > > --- a/backend/src/backend/gen_program.cpp > > +++ b/backend/src/backend/gen_program.cpp > > @@ -59,6 +59,7 @@ > > #include <clang/CodeGen/CodeGenAction.h> #endif > > > > +#include "sys/cvar.hpp" > > #include <cstring> > > #include <sstream> > > #include <memory> > > @@ -138,17 +139,24 @@ namespace gbe { > > } > > > > /*! We must avoid spilling at all cost with Gen */ > > - static const struct CodeGenStrategy { > > + struct CodeGenStrategy { > > uint32_t simdWidth; > > uint32_t reservedSpillRegs; > > bool limitRegisterPressure; > > - } codeGenStrategy[] = { > > + }; > > + static const struct CodeGenStrategy codeGenStrategyDefault[] = { > > {16, 0, false}, > > {8, 0, false}, > > {8, 8, false}, > > {8, 16, false}, > > }; > > + static const struct CodeGenStrategy codeGenStrategySimd16[] = { > > + {16, 0, false}, > > + {16, 8, false}, > > + {16, 16, false}, > > + }; > > > > + IVAR(OCL_SIMD_WIDTH, 8, 15, 16); > > Kernel *GenProgram::compileKernel(const ir::Unit &unit, const > > std::string &name, > > bool relaxMath, int profiling) { > > #ifdef GBE_COMPILER_AVAILABLE @@ -156,19 +164,23 @@ namespace > gbe { > > // when the function already provides the simd width we need to use > (i.e. > > // non zero) > > const ir::Function *fn = unit.getFunction(name); > > + const struct CodeGenStrategy* codeGenStrategy = > > + codeGenStrategyDefault; > > if(fn == NULL) > > GBE_ASSERT(0); > > - uint32_t codeGenNum = sizeof(codeGenStrategy) / > > sizeof(codeGenStrategy[0]); > > + uint32_t codeGenNum = sizeof(codeGenStrategyDefault) / > > + sizeof(codeGenStrategyDefault[0]); > > uint32_t codeGen = 0; > > GenContext *ctx = NULL; > > - if (fn->getSimdWidth() == 8) { > > + if ( fn->getSimdWidth() != 0 && OCL_SIMD_WIDTH != 15) { > > + GBE_ASSERTM(0, "unsupported SIMD width!"); > > + }else if (fn->getSimdWidth() == 8 || OCL_SIMD_WIDTH == 8) { > > codeGen = 1; > > - } else if (fn->getSimdWidth() == 16) { > > - codeGenNum = 1; > > - } else if (fn->getSimdWidth() == 0) { > > + } else if (fn->getSimdWidth() == 16 || OCL_SIMD_WIDTH == 16){ > > + codeGenStrategy = codeGenStrategySimd16; > > + codeGenNum = 3; > Don't hard the codeGenStrategySimd16's array size here. > > > + } else if (fn->getSimdWidth() == 0 && OCL_SIMD_WIDTH == 15) { > > codeGen = 0; > > } else > > - GBE_ASSERT(0); > > + GBE_ASSERTM(0, "unsupported SIMD width!"); > > Kernel *kernel = NULL; > > > > // Stop when compilation is successful diff --git > > a/backend/src/llvm/llvm_gen_backend.cpp > > b/backend/src/llvm/llvm_gen_backend.cpp > > index c8e29c5..b2d9b1b 100644 > > --- a/backend/src/llvm/llvm_gen_backend.cpp > > +++ b/backend/src/llvm/llvm_gen_backend.cpp > > @@ -2125,6 +2125,7 @@ namespace gbe > > // Loop over the kernel metadatas to set the required work group size. > > size_t reqd_wg_sz[3] = {0, 0, 0}; > > size_t hint_wg_sz[3] = {0, 0, 0}; > > + size_t reqd_sg_sz = 0; > > ir::FunctionArgument::InfoFromLLVM llvmInfo; > > MDNode *addrSpaceNode = NULL; > > MDNode *typeNameNode = NULL; > > @@ -2220,6 +2221,23 @@ namespace gbe > > functionAttributes += buffer; > > functionAttributes += " "; > > } > > + if ((attrNode = F.getMetadata("intel_reqd_sub_group_size"))) { > > + GBE_ASSERT(attrNode->getNumOperands() == 1); > > + ConstantInt *sz = mdconst::extract<ConstantInt>(attrNode- > > >getOperand(0)); > > + GBE_ASSERT(sz); > > + reqd_sg_sz = sz->getZExtValue(); > > + GBE_ASSERT(reqd_sg_sz == 8 || reqd_sg_sz == 16); > I think fail the program building is better then assert. > > > > + functionAttributes += "intel_reqd_sub_group_size"; > > + std::stringstream param; > > + char buffer[100] = {0}; > > + param << "("; > > + param << reqd_sg_sz; > > + param << ")"; > > + param >> buffer; > > + functionAttributes += buffer; > > + functionAttributes += " "; > > + } > > + > > #else > > /* First find the meta data belong to this function. */ > > MDNode *node = getKernelFunctionMetadata(&F); @@ -2345,6 +2363,8 > > @@ namespace gbe #endif /* LLVM 3.9 Function metadata */ > > > > ctx.getFunction().setCompileWorkGroupSize(reqd_wg_sz[0], > > reqd_wg_sz[1], reqd_wg_sz[2]); > > + if (reqd_sg_sz) > > + ctx.setSimdWidth(reqd_sg_sz); > > > > ctx.getFunction().setFunctionAttributes(functionAttributes); > > // Loop over the arguments and output registers for them > > -- > > 2.7.4 > > > > _______________________________________________ > > Beignet mailing list > > Beignet@lists.freedesktop.org > > https://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet