Yes, I am also plan to change GenContext to a pure virtual class, and I think It is better to do this when optimize the long operations in BDW.
-----Original Message----- From: He Junyan [mailto:junyan...@inbox.com] Sent: Thursday, October 9, 2014 13:09 To: Yang, Rong R Cc: beignet@lists.freedesktop.org Subject: Re: [Beignet] [PATCH 5/5] BDW: Add class Gen8Context. This patchset is OK and will not cause regression on previous platform. In this patch set, the GenEncoder will be a pure virtual class and all platform encoders will derive from it. But the GenContext still represents the Gen7 context. I think it is better to follow the same way as the encoder to make the architecture clearer. On 一, 2014-09-29 at 13:37 +0800, Yang Rong wrote: > Now Gen8Context is almost same as Gen75Context, but still derive Gen8Context > from GenContext for clearly. > > Signed-off-by: Yang Rong <rong.r.y...@intel.com> > --- > backend/src/CMakeLists.txt | 2 + > backend/src/backend/gen8_context.cpp | 113 > +++++++++++++++++++++++++++++++++++ > backend/src/backend/gen8_context.hpp | 63 +++++++++++++++++++ > backend/src/backend/gen_program.cpp | 3 + > 4 files changed, 181 insertions(+) > create mode 100644 backend/src/backend/gen8_context.cpp > create mode 100644 backend/src/backend/gen8_context.hpp > > diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt > index 2daa630..c5d388e 100644 > --- a/backend/src/CMakeLists.txt > +++ b/backend/src/CMakeLists.txt > @@ -96,6 +96,8 @@ set (GBE_SRC > backend/gen_context.cpp > backend/gen75_context.hpp > backend/gen75_context.cpp > + backend/gen8_context.hpp > + backend/gen8_context.cpp > backend/gen_program.cpp > backend/gen_program.hpp > backend/gen_program.h > diff --git a/backend/src/backend/gen8_context.cpp > b/backend/src/backend/gen8_context.cpp > new file mode 100644 > index 0000000..a9914f6 > --- /dev/null > +++ b/backend/src/backend/gen8_context.cpp > @@ -0,0 +1,113 @@ > +/* > + * Copyright © 2012 Intel Corporation > + * > + * This library is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2 of the License, or (at your option) any later version. > + * > + * This library is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with this library. If not, see > <http://www.gnu.org/licenses/>. > + * > + */ > + > +/** > + * \file gen8_context.cpp > + */ > + > +#include "backend/gen8_context.hpp" > +#include "backend/gen8_encoder.hpp" > +#include "backend/gen_program.hpp" > +#include "backend/gen_defs.hpp" > +#include "backend/gen_encoder.hpp" > +#include "backend/gen_insn_selection.hpp" > +#include "backend/gen_insn_scheduling.hpp" > +#include "backend/gen_reg_allocation.hpp" > +#include "sys/cvar.hpp" > +#include "ir/function.hpp" > +#include "ir/value.hpp" > +#include <cstring> > + > +namespace gbe > +{ > + void Gen8Context::emitSLMOffset(void) { > + if(kernel->getUseSLM() == false) > + return; > + > + const GenRegister slm_offset = > ra->genReg(GenRegister::ud1grf(ir::ocl::slmoffset)); > + const GenRegister slm_index = GenRegister::ud1grf(0, 0); > + //the slm index is hold in r0.0 24-27 bit, in 4K unit, shift left 12 to > get byte unit > + p->push(); > + p->curr.execWidth = 1; > + p->curr.predicate = GEN_PREDICATE_NONE; > + p->SHR(slm_offset, slm_index, GenRegister::immud(12)); > + p->pop(); > + } > + > + void Gen8Context::allocSLMOffsetCurbe(void) { > + if(fn.getUseSLM()) > + allocCurbeReg(ir::ocl::slmoffset, GBE_CURBE_SLM_OFFSET); } > + > + uint32_t Gen8Context::alignScratchSize(uint32_t size){ > + if(size == 0) > + return 0; > + uint32_t i = 2048; > + while(i < size) i *= 2; > + return i; > + } > + > + void Gen8Context::emitStackPointer(void) { > + using namespace ir; > + > + // Only emit stack pointer computation if we use a stack > + if (kernel->getCurbeOffset(GBE_CURBE_STACK_POINTER, 0) <= 0) > + return; > + > + // Check that everything is consistent in the kernel code > + const uint32_t perLaneSize = kernel->getStackSize(); > + const uint32_t perThreadSize = perLaneSize * this->simdWidth; > + GBE_ASSERT(perLaneSize > 0); > + GBE_ASSERT(isPowerOf<2>(perLaneSize) == true); > + GBE_ASSERT(isPowerOf<2>(perThreadSize) == true); > + > + // Use shifts rather than muls which are limited to 32x16 bit sources > + const uint32_t perLaneShift = logi2(perLaneSize); > + const uint32_t perThreadShift = logi2(perThreadSize); > + const GenRegister selStatckPtr = this->simdWidth == 8 ? > + GenRegister::ud8grf(ir::ocl::stackptr) : > + GenRegister::ud16grf(ir::ocl::stackptr); > + const GenRegister stackptr = ra->genReg(selStatckPtr); > + const GenRegister selStackBuffer = > GenRegister::ud1grf(ir::ocl::stackbuffer); > + const GenRegister bufferptr = ra->genReg(selStackBuffer); > + > + // We compute the per-lane stack pointer here > + p->push(); > + p->curr.execWidth = 1; > + p->curr.predicate = GEN_PREDICATE_NONE; > + //p->AND(GenRegister::ud1grf(126,0), GenRegister::ud1grf(0,5), > GenRegister::immud(0x1ff)); > + p->AND(GenRegister::ud1grf(126,0), GenRegister::ud1grf(0,5), > GenRegister::immud(0x7f)); > + p->AND(GenRegister::ud1grf(126,4), GenRegister::ud1grf(0,5), > GenRegister::immud(0x180)); > + p->SHR(GenRegister::ud1grf(126,4), GenRegister::ud1grf(126, 4), > GenRegister::immud(7)); > + p->curr.execWidth = this->simdWidth; > + p->SHL(stackptr, stackptr, GenRegister::immud(perLaneShift)); > + p->curr.execWidth = 1; > + p->SHL(GenRegister::ud1grf(126,0), GenRegister::ud1grf(126,0), > GenRegister::immud(2)); > + p->ADD(GenRegister::ud1grf(126,0), GenRegister::ud1grf(126,0), > GenRegister::ud1grf(126, 4)); > + p->SHL(GenRegister::ud1grf(126,0), GenRegister::ud1grf(126,0), > GenRegister::immud(perThreadShift)); > + p->curr.execWidth = this->simdWidth; > + p->ADD(stackptr, stackptr, bufferptr); > + p->ADD(stackptr, stackptr, GenRegister::ud1grf(126,0)); > + p->pop(); > + } > + > + void Gen8Context::newSelection(void) { > + this->sel = GBE_NEW(Selection75, *this); > + } > +} > + > diff --git a/backend/src/backend/gen8_context.hpp > b/backend/src/backend/gen8_context.hpp > new file mode 100644 > index 0000000..3269eeb > --- /dev/null > +++ b/backend/src/backend/gen8_context.hpp > @@ -0,0 +1,63 @@ > +/* > + * Copyright © 2012 Intel Corporation > + * > + * This library is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2 of the License, or (at your option) any later version. > + * > + * This library is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with this library. If not, see > <http://www.gnu.org/licenses/>. > + * > + */ > + > +/** > + * \file gen8_context.hpp > + */ > +#ifndef __GBE_GEN8_CONTEXT_HPP__ > +#define __GBE_GEN8_CONTEXT_HPP__ > + > +#include "backend/gen_context.hpp" > +#include "backend/gen8_encoder.hpp" > + > +namespace gbe > +{ > + /* This class is used to implement the HSW > + specific logic for context. */ > + class Gen8Context : public GenContext > + { > + public: > + virtual ~Gen8Context(void) { } > + Gen8Context(const ir::Unit &unit, const std::string &name, uint32_t > deviceID, bool relaxMath = false) > + : GenContext(unit, name, deviceID, relaxMath) { > + }; > + /*! device's max srcatch buffer size */ > + #define GEN8_SCRATCH_SIZE (2 * KB * KB) > + /*! Emit the per-lane stack pointer computation */ > + virtual void emitStackPointer(void); > + /*! Align the scratch size to the device's scratch unit size */ > + virtual uint32_t alignScratchSize(uint32_t size); > + /*! Get the device's max srcatch size */ > + virtual uint32_t getScratchSize(void) { > + //Because the allocate is use uint16_t, so clamp it, need refine > + return std::min(GEN8_SCRATCH_SIZE, 0x7fff); > + } > + > + protected: > + virtual GenEncoder* generateEncoder(void) { > + return GBE_NEW(Gen8Encoder, this->simdWidth, 8, deviceID); > + } > + > + private: > + virtual void emitSLMOffset(void); > + virtual void allocSLMOffsetCurbe(void); > + virtual void newSelection(void); > + }; > +} > +#endif /* __GBE_GEN8_CONTEXT_HPP__ */ > + > diff --git a/backend/src/backend/gen_program.cpp > b/backend/src/backend/gen_program.cpp > index 44f7e33..71a69dd 100644 > --- a/backend/src/backend/gen_program.cpp > +++ b/backend/src/backend/gen_program.cpp > @@ -51,6 +51,7 @@ > #include "backend/gen_program.hpp" > #include "backend/gen_context.hpp" > #include "backend/gen75_context.hpp" > +#include "backend/gen8_context.hpp" > #include "backend/gen_defs.hpp" > #include "backend/gen/gen_mesa_disasm.h" > #include "backend/gen_reg_allocation.hpp" > @@ -164,6 +165,8 @@ namespace gbe { > ctx = GBE_NEW(GenContext, unit, name, deviceID, relaxMath); > } else if (IS_HASWELL(deviceID)) { > ctx = GBE_NEW(Gen75Context, unit, name, deviceID, relaxMath); > + } else if (IS_BROADWELL(deviceID)) { > + ctx = GBE_NEW(Gen8Context, unit, name, deviceID, relaxMath); > } > GBE_ASSERTM(ctx != NULL, "Fail to create the gen context\n"); > _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet