From: Junyan He <[email protected]> We add this function to generate a standard ELF format binary. All the verbose information we need in runtime will be stored in .note.gpu_info section. Then, we can separate the runtime and compiler clearly.
Signed-off-by: Junyan He <[email protected]> --- backend/src/backend/gen_program.hpp | 7 + backend/src/backend/gen_program_elf.cpp | 357 ++++++++++++++++++++++++++++++++ backend/src/backend/program.hpp | 2 + backend/src/gbe_bin_interpreter.cpp | 1 + 4 files changed, 367 insertions(+) diff --git a/backend/src/backend/gen_program.hpp b/backend/src/backend/gen_program.hpp index ff756e0..8963c38 100644 --- a/backend/src/backend/gen_program.hpp +++ b/backend/src/backend/gen_program.hpp @@ -33,6 +33,8 @@ struct GenInstruction; namespace gbe { + class GenProgramElfContext; + /*! Describe a compiled kernel */ class GenKernel : public Kernel { @@ -58,6 +60,9 @@ namespace gbe /*! Describe a compiled program */ class GenProgram : public Program { + protected: + GenProgramElfContext* elf_ctx; + public: /*! Create an empty program */ GenProgram(uint32_t deviceID, const void* mod = NULL, const void* ctx = NULL, const char* asm_fname = NULL, uint32_t fast_relaxed_math = 0) : @@ -70,6 +75,8 @@ namespace gbe virtual void CleanLlvmResource(void); /*! Implements base class */ virtual Kernel *compileKernel(const ir::Unit &unit, const std::string &name, bool relaxMath, int profiling); + /*! Generate binary format */ + virtual void *toBinaryFormat(size_t &ret_size); /*! Allocate an empty kernel. */ virtual Kernel *allocateKernel(const std::string &name) { return GBE_NEW(GenKernel, name, deviceID); diff --git a/backend/src/backend/gen_program_elf.cpp b/backend/src/backend/gen_program_elf.cpp index efd45fe..0440e81 100644 --- a/backend/src/backend/gen_program_elf.cpp +++ b/backend/src/backend/gen_program_elf.cpp @@ -22,6 +22,7 @@ #include <algorithm> #include <sstream> #include <streambuf> +using namespace std; namespace gbe { @@ -121,4 +122,360 @@ protected: return count; } }; + +using namespace ELFIO; + +class GenProgramElfContext +{ +public: + enum { // 0, 1, 2 already have meanings + GEN_NOTE_TYPE_GPU_INFO = 3, + GEN_NOTE_TYPE_CL_ARG_INFO = 4, + GEN_NOTE_TYPE_CL_WORKGROUP_SIZE = 5, + }; + + GenProgram &genProg; + void emitOneKernel(GenKernel &kernel); + elfio writer; + section *text_sec; + section *sym_sec; + section *strtab_sec; + section *ker_info_sec; + section *rodata_sec; + symbol_section_accessor *syma; + string_section_accessor *stra; + note_section_accessor *note_writer; + Elf32_Word sym_num; + Elf64_Word bitcode_offset; + + ~GenProgramElfContext(void) + { + if (syma) + GBE_DELETE(syma); + if (stra) + GBE_DELETE(stra); + if (note_writer) + GBE_DELETE(note_writer); + } + + GenProgramElfContext(GenProgram &prog) : genProg(prog), text_sec(NULL), sym_sec(NULL), + strtab_sec(NULL), ker_info_sec(NULL), rodata_sec(NULL), + syma(NULL), stra(NULL), note_writer(NULL), sym_num(0), + bitcode_offset(0) + { + writer.create(ELFCLASS64, ELFDATA2LSB); + writer.set_os_abi(ELFOSABI_LINUX); + writer.set_type(ET_REL); + writer.set_machine(EM_INTEL205); // TODO: Some value of Intel GPU; + + // Create code section + text_sec = writer.sections.add(".text"); + text_sec->set_type(SHT_PROGBITS); + text_sec->set_flags(SHF_ALLOC | SHF_EXECINSTR); + text_sec->set_addr_align(4); + + // Create string table section + strtab_sec = writer.sections.add(".strtab"); + strtab_sec->set_type(SHT_STRTAB); + strtab_sec->set_addr_align(1); + + // Create symbol table section + sym_sec = writer.sections.add(".symtab"); + sym_sec->set_type(SHT_SYMTAB); + sym_sec->set_addr_align(0x4); + sym_sec->set_entry_size(writer.get_default_entry_size(SHT_SYMTAB)); + sym_sec->set_link(strtab_sec->get_index()); + sym_sec->set_info(0x01); + + // Create kernel info section + ker_info_sec = writer.sections.add(".note.gpu_info"); + ker_info_sec->set_type(SHT_NOTE); + text_sec->set_flags(SHF_ALLOC); + ker_info_sec->set_addr_align(0x04); + + // Create string table writer + stra = GBE_NEW(string_section_accessor, strtab_sec); + // Create symbol table writer + syma = GBE_NEW(symbol_section_accessor, writer, sym_sec); + // Create note writer + note_writer = GBE_NEW(note_section_accessor, writer, ker_info_sec); + } +}; + +void GenProgramElfContext::emitOneKernel(GenKernel &kernel) +{ + assert(text_sec != NULL); + assert(sym_sec != NULL); + assert(text_sec != NULL); + assert(syma != NULL); + assert(stra != NULL); + + sym_num++; + + // Add the kernel's bitcode to .text section + text_sec->append_data(kernel.getCode(), kernel.getCodeSize()); + // Add the kernel func as a symbol + syma->add_symbol(*stra, kernel.getName(), bitcode_offset, kernel.getCodeSize(), + STB_GLOBAL, STT_FUNC, 0, text_sec->get_index()); + bitcode_offset += kernel.getCodeSize(); + + void *kernel_info = GBE_MALLOC(8 * 1024); + void *ptr = kernel_info; + Elf32_Word *p_simd = static_cast<Elf32_Word *>(ptr); + Elf32_Word *p_slm_sz = static_cast<Elf32_Word *>(ptr) + 1; + Elf32_Word *p_scratch_sz = static_cast<Elf32_Word *>(ptr) + 2; + Elf32_Word *p_stack_sz = static_cast<Elf32_Word *>(ptr) + 3; + Elf32_Word *p_barrier_slm_used = static_cast<Elf32_Word *>(ptr) + 4; + Elf32_Word *p_arg_num = static_cast<Elf32_Word *>(ptr) + 5; + *p_simd = kernel.getSIMDWidth(); + *p_slm_sz = kernel.getSLMSize(); + *p_scratch_sz = kernel.getScratchSize(); + *p_stack_sz = kernel.getStackSize(); + *p_barrier_slm_used = kernel.getUseSLM(); + *p_arg_num = kernel.getArgNum(); + + ptr = static_cast<char *>(ptr) + 6 * sizeof(Elf32_Word); + + size_t sampler_data_sz = kernel.getSamplerSize(); + uint32_t *sampler_data = NULL; + if (sampler_data_sz) { + sampler_data = static_cast<uint32_t *>(GBE_MALLOC(sampler_data_sz)); + ::memset(sampler_data, 0, sampler_data_sz); + kernel.getSamplerData(sampler_data); + } + + size_t image_data_sz = kernel.getImageSize(); + ImageInfo *image_data = NULL; + if (image_data_sz) { + image_data = static_cast<ImageInfo *>(GBE_MALLOC(image_data_sz)); + ::memset(image_data, 0, image_data_sz); + kernel.getImageData(image_data); + } + + for (unsigned int i = 0; i < *p_arg_num; i++) { + Elf32_Word *arg_index = static_cast<Elf32_Word *>(ptr); + Elf32_Word *arg_size = static_cast<Elf32_Word *>(ptr) + 1; + Elf32_Word *arg_type = static_cast<Elf32_Word *>(ptr) + 2; + Elf32_Word *arg_offset = static_cast<Elf32_Word *>(ptr) + 3; + Elf32_Word *arg_space = static_cast<Elf32_Word *>(ptr) + 4; + Elf32_Word *arg_align = static_cast<Elf32_Word *>(ptr) + 5; + Elf32_Word *arg_info = static_cast<Elf32_Word *>(ptr) + 6; + + *arg_index = i; + *arg_size = kernel.getArgSize(i); + *arg_type = kernel.getArgType(i); + *arg_align = kernel.getArgAlign(i); +#if 0 + *arg_space = kernel.getArgAddressSpace(i); + + if (*arg_type == GBE_ARG_TYPE_POINTER && *arg_space == GBE_ADDRESS_SPACE_GLOBAL) { + *arg_info = kernel.getArgBTI(i); + } else if (*arg_type == GBE_ARG_TYPE_IMAGE) { + assert(image_data_sz > 0); + for (size_t j = 0; j < image_data_sz / sizeof(ImageInfo); j++) { + if (image_data[j].arg_idx == static_cast<int32_t>(i)) { + *arg_info = static_cast<Elf32_Word>(image_data[j].idx); + break; + } + } + } else if (*arg_type == GBE_ARG_TYPE_SAMPLER) { + assert(sampler_data_sz > 0); + for (size_t j = 0; j < sampler_data_sz / sizeof(uint32_t); j++) { + if (((sampler_data[i] & __CLK_SAMPLER_ARG_MASK) >> __CLK_SAMPLER_ARG_BASE) == + static_cast<uint32_t>(i)) { + *arg_info = static_cast<Elf32_Word>(j); + break; + } + } + } +#endif + *arg_offset = kernel.getCurbeOffset(GBE_CURBE_KERNEL_ARGUMENT, i); + ptr = static_cast<char *>(ptr) + 7 * sizeof(Elf32_Word); + } + + /* Store all the sampler info */ + *(static_cast<Elf32_Word *>(ptr)) = + static_cast<Elf32_Word>(sampler_data_sz / sizeof(uint32_t)); // Samper number + ptr = static_cast<char *>(ptr) + sizeof(Elf32_Word); + if (sampler_data_sz > 0) { + ::memcpy(ptr, sampler_data, sampler_data_sz); + GBE_FREE(sampler_data); + ptr = static_cast<char *>(ptr) + sampler_data_sz; + } + + /* Store all the Image info */ + *(static_cast<Elf32_Word *>(ptr)) = + static_cast<Elf32_Word>(image_data_sz / sizeof(ImageInfo)); // Image number + ptr = static_cast<char *>(ptr) + sizeof(Elf32_Word); + if (image_data_sz > 0) { + std::sort(image_data, image_data + image_data_sz / sizeof(ImageInfo), + [](ImageInfo &a, ImageInfo &b) { return a.idx < b.idx; }); + + /* Store all the image info by index */ + for (size_t i = 0; i < image_data_sz / sizeof(ImageInfo); i++) { + Elf32_Word *bti = static_cast<Elf32_Word *>(ptr); + Elf32_Word *width = static_cast<Elf32_Word *>(ptr) + 1; + Elf32_Word *height = static_cast<Elf32_Word *>(ptr) + 2; + Elf32_Word *depth = static_cast<Elf32_Word *>(ptr) + 3; + Elf32_Word *data_type = static_cast<Elf32_Word *>(ptr) + 4; + Elf32_Word *channel_order = static_cast<Elf32_Word *>(ptr) + 5; + Elf32_Word *dim_order = static_cast<Elf32_Word *>(ptr) + 6; + + *bti = image_data[i].idx; + *width = image_data[i].wSlot; + *height = image_data[i].hSlot; + *depth = image_data[i].depthSlot; + *data_type = image_data[i].dataTypeSlot; + *channel_order = image_data[i].channelOrderSlot; + *dim_order = image_data[i].dimOrderSlot; + + ptr = static_cast<char *>(ptr) + 7 * sizeof(Elf32_Word); + } + + GBE_FREE(image_data); + } + + Elf32_Word *p_virt_phy_num = static_cast<Elf32_Word *>(ptr); + ptr = static_cast<char *>(ptr) + sizeof(Elf32_Word); + unsigned int virt_phy_num = 0; + +/*Store the special vitrual register map */ +#define STORE_CURB_MAP(curbe_enum, data_type, uniform) \ + if (kernel.getCurbeOffset(curbe_enum, 0) >= 0) { \ + *static_cast<Elf32_Word *>(ptr) = curbe_enum; \ + ptr = static_cast<char *>(ptr) + sizeof(Elf32_Word); \ + *static_cast<Elf32_Word *>(ptr) = kernel.getCurbeOffset(curbe_enum, 0); \ + ptr = static_cast<char *>(ptr) + sizeof(Elf32_Word); \ + *static_cast<Elf32_Word *>(ptr) = \ + uniform ? sizeof(data_type) : sizeof(data_type) * kernel.getSIMDWidth(); \ + ptr = static_cast<char *>(ptr) + sizeof(Elf32_Word); \ + virt_phy_num++; \ + } + STORE_CURB_MAP(GBE_CURBE_LOCAL_ID_X, Elf32_Word, 0); + STORE_CURB_MAP(GBE_CURBE_LOCAL_ID_Y, Elf32_Word, 0); + STORE_CURB_MAP(GBE_CURBE_LOCAL_ID_Z, Elf32_Word, 0); + STORE_CURB_MAP(GBE_CURBE_LOCAL_SIZE_X, Elf32_Word, 1); + STORE_CURB_MAP(GBE_CURBE_LOCAL_SIZE_Y, Elf32_Word, 1); + STORE_CURB_MAP(GBE_CURBE_LOCAL_SIZE_Z, Elf32_Word, 1); + STORE_CURB_MAP(GBE_CURBE_ENQUEUED_LOCAL_SIZE_X, Elf32_Word, 1); + STORE_CURB_MAP(GBE_CURBE_ENQUEUED_LOCAL_SIZE_Y, Elf32_Word, 1); + STORE_CURB_MAP(GBE_CURBE_ENQUEUED_LOCAL_SIZE_Z, Elf32_Word, 1); + STORE_CURB_MAP(GBE_CURBE_GLOBAL_SIZE_X, Elf32_Word, 1); + STORE_CURB_MAP(GBE_CURBE_GLOBAL_SIZE_Y, Elf32_Word, 1); + STORE_CURB_MAP(GBE_CURBE_GLOBAL_SIZE_Z, Elf32_Word, 1); + STORE_CURB_MAP(GBE_CURBE_GLOBAL_OFFSET_X, Elf32_Word, 1); + STORE_CURB_MAP(GBE_CURBE_GLOBAL_OFFSET_Y, Elf32_Word, 1); + STORE_CURB_MAP(GBE_CURBE_GLOBAL_OFFSET_Z, Elf32_Word, 1); + STORE_CURB_MAP(GBE_CURBE_GROUP_NUM_X, Elf32_Word, 1); + STORE_CURB_MAP(GBE_CURBE_GROUP_NUM_Y, Elf32_Word, 1); + STORE_CURB_MAP(GBE_CURBE_GROUP_NUM_Z, Elf32_Word, 1); + STORE_CURB_MAP(GBE_CURBE_WORK_DIM, Elf32_Word, 1); + STORE_CURB_MAP(GBE_CURBE_BLOCK_IP, Elf32_Half, 0); + STORE_CURB_MAP(GBE_CURBE_DW_BLOCK_IP, Elf32_Word, 0); + STORE_CURB_MAP(GBE_CURBE_THREAD_NUM, Elf32_Word, 1); + STORE_CURB_MAP(GBE_CURBE_THREAD_ID, Elf32_Word, 1); + STORE_CURB_MAP(GBE_CURBE_CONSTANT_ADDRSPACE, Elf64_Word, 1); + STORE_CURB_MAP(GBE_CURBE_STACK_SIZE, Elf64_Word, 1); +#undef STORE_CURB_MAP + if (kernel.getCurbeOffset(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER) >= 0) { + *static_cast<Elf32_Word *>(ptr) = GBE_CURBE_EXTRA_ARGUMENT; + ptr = static_cast<char *>(ptr) + sizeof(Elf32_Word); + *static_cast<Elf32_Word *>(ptr) = + kernel.getCurbeOffset(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER); + ptr = static_cast<char *>(ptr) + sizeof(Elf32_Word); + *static_cast<Elf32_Word *>(ptr) = sizeof(Elf64_Word); + ptr = static_cast<char *>(ptr) + sizeof(Elf32_Word); + virt_phy_num++; + } + *p_virt_phy_num = virt_phy_num; + + Elf_Word total_sz = static_cast<char *>(ptr) - static_cast<char *>(kernel_info); + note_writer->add_note(GEN_NOTE_TYPE_GPU_INFO, kernel.getName(), kernel_info, total_sz); + +#if 0 + for (int i = 0; i < (int)total_sz; i++) { + if (i % 16 == 0) + printf("\n"); + if (i % 2 == 0) + printf(" "); + printf("%2.2x", ((unsigned char *)kernel_info)[i]); + } + printf("\n"); + for (int i = 0; i < (int)total_sz / 4; i++) { + printf(" %d", ((unsigned int *)kernel_info)[i]); + } +#endif + + GBE_FREE(kernel_info); +} + +/* The format for one Gen Kernel function is following note section format + ------------------------ + | Function Name size:4 | + ------------------------ + | Desc size:4 | + ---------------- + | Type:4 | + -------------------- + | The name(strlen) | + ----------------------------------------------------------------------------------------------- + | SIMD:4 | Local Mem Size:4 | Scratch Size:4 | Stack Size :4 | Barrier/SLM Used:4 | Arg Num:4 | + ----------------------------------------------------------------------------------------------- + Then the format for each argument is + -------------------------------------------------------------------------------------------------------------------------- + | Index:4 | Size:4 | Type:4 | Offset:4 | Addr Space:4 | Align(if is ptr) | BTI(if buffer):4 / Index(sampler and image):4 | + -------------------------------------------------------------------------------------------------------------------------- + Then all sampler info + ----------------------------------- + | Number:4 | SamperInfo:4 | ......| + ----------------------------------- + Then all image info + ---------------------------------------------------------------------------------------------------------- + | Number:4 | BTI:4 | Width:4 | Height:4 | Depth:4 | Data Type:4 | Channel Order:4 | Dim Order:4 | .......| + ---------------------------------------------------------------------------------------------------------- + Last is the map table of special virtual register and phy register + -------------------------------------------------------- + | Number:4 | Virt Reg:4 | Phy Reg:4 | Size:4 |.........| + -------------------------------------------------------- */ +void * +GenProgram::toBinaryFormat(size_t &ret_size) +{ + ret_size = 0; + assert(elf_ctx == NULL); + elf_ctx = GBE_NEW(GenProgramElfContext, *this); + + if (getGlobalConstantSize() > 0) { + elf_ctx->rodata_sec = elf_ctx->writer.sections.add(".rodata"); + elf_ctx->rodata_sec->set_type(SHT_PROGBITS); + elf_ctx->rodata_sec->set_flags(SHF_ALLOC); + elf_ctx->rodata_sec->set_addr_align(1); + + char *const_data = static_cast<char *>(GBE_MALLOC(getGlobalConstantSize())); + getGlobalConstantData(const_data); + elf_ctx->rodata_sec->set_data(const_data, getGlobalConstantSize()); + GBE_FREE(const_data); + } + + for (map<std::string, Kernel *>::const_iterator it = kernels.begin(); + it != kernels.end(); ++it) { + GenKernel *k = static_cast<GenKernel *>(it->second); + elf_ctx->emitOneKernel(*k); + } + + wmemstreambuf membuf(4096); + std::ostream oss(&membuf); + elf_ctx->writer.save(oss); + GBE_DELETE(elf_ctx); + + size_t elf_size = 0; + char *elf_mem = membuf.getcontent(elf_size); + if (elf_size == 0) + return NULL; + + void *p_elf_ret = ::malloc(elf_size); + ::memcpy(p_elf_ret, elf_mem, elf_size); + ret_size = elf_size; + return p_elf_ret; +} + } /* namespace gbe */ diff --git a/backend/src/backend/program.hpp b/backend/src/backend/program.hpp index 1aff8b9..1eaaa11 100644 --- a/backend/src/backend/program.hpp +++ b/backend/src/backend/program.hpp @@ -341,6 +341,8 @@ namespace gbe { /*! Compile a kernel */ virtual Kernel *compileKernel(const ir::Unit &unit, const std::string &name, bool relaxMath, int profiling) = 0; + /*! Generate binary format */ + virtual void *toBinaryFormat(size_t &ret_size) = 0; /*! Allocate an empty kernel. */ virtual Kernel *allocateKernel(const std::string &name) = 0; /*! Kernels sorted by their name */ diff --git a/backend/src/gbe_bin_interpreter.cpp b/backend/src/gbe_bin_interpreter.cpp index 64cacd9..6b14dcd 100644 --- a/backend/src/gbe_bin_interpreter.cpp +++ b/backend/src/gbe_bin_interpreter.cpp @@ -30,6 +30,7 @@ #undef GBE_COMPILER_AVAILABLE #include "backend/program.cpp" #include "backend/gen_program.cpp" +#include "backend/gen_program_elf.cpp" #include "ir/sampler.cpp" #include "ir/image.cpp" -- 2.7.4 _______________________________________________ Beignet mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/beignet
