From: Junyan He <[email protected]> 1.Modify the typed write for state write using GEN_SFID_DATAPORT_DATA_CACHE. 2.Add the channel select for surface state setting. 3.Correct the send message for setting slot in send description.
Signed-off-by: Junyan He <[email protected]> --- backend/src/backend/gen75_encoder.cpp | 116 +++++++++++++++++++++++++++++++++ backend/src/backend/gen75_encoder.hpp | 4 ++ backend/src/backend/gen_defs.hpp | 3 +- backend/src/backend/gen_encoder.hpp | 14 ++-- src/intel/intel_defines.h | 7 ++ src/intel/intel_gpgpu.c | 73 ++++++++++++++++----- src/intel/intel_structs.h | 11 +++- 7 files changed, 200 insertions(+), 28 deletions(-) diff --git a/backend/src/backend/gen75_encoder.cpp b/backend/src/backend/gen75_encoder.cpp index bb6d622..d1d1292 100644 --- a/backend/src/backend/gen75_encoder.cpp +++ b/backend/src/backend/gen75_encoder.cpp @@ -27,8 +27,39 @@ #include "backend/gen75_encoder.hpp" +static const uint32_t untypedRWMask[] = { + GEN_UNTYPED_ALPHA|GEN_UNTYPED_BLUE|GEN_UNTYPED_GREEN|GEN_UNTYPED_RED, + GEN_UNTYPED_ALPHA|GEN_UNTYPED_BLUE|GEN_UNTYPED_GREEN, + GEN_UNTYPED_ALPHA|GEN_UNTYPED_BLUE, + GEN_UNTYPED_ALPHA, + 0 +}; + namespace gbe { + void Gen75Encoder::setHeader(GenInstruction *insn) { + if (this->curr.execWidth == 8) + insn->header.execution_size = GEN_WIDTH_8; + else if (this->curr.execWidth == 16) + insn->header.execution_size = GEN_WIDTH_16; + else if (this->curr.execWidth == 1) + insn->header.execution_size = GEN_WIDTH_1; + else if (this->curr.execWidth == 4) + insn->header.execution_size = GEN_WIDTH_4; + else + NOT_IMPLEMENTED; + insn->header.acc_wr_control = this->curr.accWrEnable; + insn->header.quarter_control = this->curr.quarterControl; + insn->bits1.ia1.nib_ctrl = this->curr.nibControl; + insn->header.mask_control = this->curr.noMask; + insn->bits2.ia1.flag_reg_nr = this->curr.flag; + insn->bits2.ia1.flag_sub_reg_nr = this->curr.subFlag; + if (this->curr.predicate != GEN_PREDICATE_NONE) { + insn->header.predicate_control = this->curr.predicate; + insn->header.predicate_inverse = this->curr.inversePredicate; + } + insn->header.saturate = this->curr.saturate; + } void Gen75Encoder::setDPUntypedRW(GenInstruction *insn, uint32_t bti, @@ -62,6 +93,91 @@ namespace gbe insn->bits3.gen7_typed_rw.slot = 1; } + void Gen75Encoder::ATOMIC(GenRegister dst, uint32_t function, GenRegister src, uint32_t bti, uint32_t srcNum) { + GenInstruction *insn = this->next(GEN_OPCODE_SEND); + uint32_t msg_length = 0; + uint32_t response_length = 0; + + if (this->curr.execWidth == 8) { + msg_length = srcNum; + response_length = 1; + } else if (this->curr.execWidth == 16) { + msg_length = 2*srcNum; + response_length = 2; + } else + NOT_IMPLEMENTED; + + this->setHeader(insn); + this->setDst(insn, GenRegister::uw16grf(dst.nr, 0)); + this->setSrc0(insn, GenRegister::ud8grf(src.nr, 0)); + this->setSrc1(insn, GenRegister::immud(0)); + + const GenMessageTarget sfid = GEN_SFID_DATAPORT1_DATA_CACHE; + setMessageDescriptor(insn, sfid, msg_length, response_length); + insn->bits3.gen7_atomic_op.msg_type = GEN75_P1_TYPED_ATOMIC_OP; + insn->bits3.gen7_atomic_op.bti = bti; + insn->bits3.gen7_atomic_op.return_data = 1; + insn->bits3.gen7_atomic_op.aop_type = function; + + if (this->curr.execWidth == 8) + insn->bits3.gen7_atomic_op.simd_mode = GEN_ATOMIC_SIMD8; + else if (this->curr.execWidth == 16) + insn->bits3.gen7_atomic_op.simd_mode = GEN_ATOMIC_SIMD16; + else + NOT_SUPPORTED; + } + + void Gen75Encoder::UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum) { + GenInstruction *insn = this->next(GEN_OPCODE_SEND); + assert(elemNum >= 1 || elemNum <= 4); + uint32_t msg_length = 0; + uint32_t response_length = 0; + if (this->curr.execWidth == 8) { + msg_length = 1; + response_length = elemNum; + } else if (this->curr.execWidth == 16) { + msg_length = 2; + response_length = 2*elemNum; + } else + NOT_IMPLEMENTED; + + this->setHeader(insn); + this->setDst(insn, GenRegister::uw16grf(dst.nr, 0)); + this->setSrc0(insn, GenRegister::ud8grf(src.nr, 0)); + this->setSrc1(insn, GenRegister::immud(0)); + setDPUntypedRW(insn, + bti, + untypedRWMask[elemNum], + GEN75_P1_UNTYPED_READ, + msg_length, + response_length); + } + + void Gen75Encoder::UNTYPED_WRITE(GenRegister msg, uint32_t bti, uint32_t elemNum) { + GenInstruction *insn = this->next(GEN_OPCODE_SEND); + assert(elemNum >= 1 || elemNum <= 4); + uint32_t msg_length = 0; + uint32_t response_length = 0; + this->setHeader(insn); + if (this->curr.execWidth == 8) { + this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD)); + msg_length = 1+elemNum; + } else if (this->curr.execWidth == 16) { + this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UW)); + msg_length = 2*(1+elemNum); + } + else + NOT_IMPLEMENTED; + this->setSrc0(insn, GenRegister::ud8grf(msg.nr, 0)); + this->setSrc1(insn, GenRegister::immud(0)); + setDPUntypedRW(insn, + bti, + untypedRWMask[elemNum], + GEN75_P1_UNTYPED_SURFACE_WRITE, + msg_length, + response_length); + } + void Gen75Encoder::patchJMPI(uint32_t insnID, int32_t jumpDistance) { GenInstruction &insn = this->store[insnID]; GBE_ASSERT(insnID < this->store.size()); diff --git a/backend/src/backend/gen75_encoder.hpp b/backend/src/backend/gen75_encoder.hpp index bdd294a..1bbdd2c 100644 --- a/backend/src/backend/gen75_encoder.hpp +++ b/backend/src/backend/gen75_encoder.hpp @@ -32,6 +32,10 @@ namespace gbe { public: Gen75Encoder(uint32_t simdWidth, uint32_t gen) : GenEncoder(simdWidth, gen) { }; + virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, uint32_t bti, uint32_t srcNum); + virtual void UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum); + virtual void UNTYPED_WRITE(GenRegister src, uint32_t bti, uint32_t elemNum); + virtual void setHeader(GenInstruction *insn); virtual void setDPUntypedRW(GenInstruction *insn, uint32_t bti, uint32_t rgba, uint32_t msg_type, uint32_t msg_length, uint32_t response_length); virtual void setTypedWriteMessage(GenInstruction *insn, unsigned char bti, diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp index f74b82c..3e0e8fb 100644 --- a/backend/src/backend/gen_defs.hpp +++ b/backend/src/backend/gen_defs.hpp @@ -857,8 +857,7 @@ struct GenInstruction struct { uint32_t bti:8; uint32_t chan_mask:4; - uint32_t pad:1; - uint32_t slot:1; + uint32_t slot:2; uint32_t msg_type:4; uint32_t pad2:1; uint32_t header_present:1; diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp index cd3dfdd..c82d7c6 100644 --- a/backend/src/backend/gen_encoder.hpp +++ b/backend/src/backend/gen_encoder.hpp @@ -145,15 +145,15 @@ namespace gbe /*! Wait instruction (used for the barrier) */ void WAIT(void); /*! Atomic instructions */ - void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, uint32_t bti, uint32_t srcNum); + virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, uint32_t bti, uint32_t srcNum); /*! Read 64-bits float/int arrays */ void READ64(GenRegister dst, GenRegister tmp, GenRegister addr, GenRegister src, uint32_t bti, uint32_t elemNum); /*! Write 64-bits float/int arrays */ void WRITE64(GenRegister src, GenRegister data, uint32_t bti, uint32_t elemNum, bool is_scalar); /*! Untyped read (upto 4 channels) */ - void UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum); + virtual void UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum); /*! Untyped write (upto 4 channels) */ - void UNTYPED_WRITE(GenRegister src, uint32_t bti, uint32_t elemNum); + virtual void UNTYPED_WRITE(GenRegister src, uint32_t bti, uint32_t elemNum); /*! Byte gather (for unaligned bytes, shorts and ints) */ void BYTE_GATHER(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemSize); /*! Byte scatter (for unaligned bytes, shorts and ints) */ @@ -176,9 +176,9 @@ namespace gbe uint32_t return_format); /*! TypedWrite instruction for texture */ - void TYPED_WRITE(GenRegister header, - bool header_present, - unsigned char bti); + virtual void TYPED_WRITE(GenRegister header, + bool header_present, + unsigned char bti); /*! Extended math function (2 sources) */ void MATH(GenRegister dst, uint32_t function, GenRegister src0, GenRegister src1); /*! Extended math function (1 source) */ @@ -190,7 +190,7 @@ namespace gbe //////////////////////////////////////////////////////////////////////// // Helper functions to encode //////////////////////////////////////////////////////////////////////// - void setHeader(GenInstruction *insn); + virtual void setHeader(GenInstruction *insn); virtual void setDPUntypedRW(GenInstruction *insn, uint32_t bti, uint32_t rgba, uint32_t msg_type, uint32_t msg_length, uint32_t response_length); diff --git a/src/intel/intel_defines.h b/src/intel/intel_defines.h index e5015ec..5139e43 100644 --- a/src/intel/intel_defines.h +++ b/src/intel/intel_defines.h @@ -288,6 +288,13 @@ #define I965_TILEWALK_XMAJOR 0 #define I965_TILEWALK_YMAJOR 1 +#define I965_SURCHAN_SELECT_ZERO 0 +#define I965_SURCHAN_SELECT_ONE 1 +#define I965_SURCHAN_SELECT_RED 4 +#define I965_SURCHAN_SELECT_GREEN 5 +#define I965_SURCHAN_SELECT_BLUE 6 +#define I965_SURCHAN_SELECT_ALPHA 7 + #define URB_SIZE(intel) (IS_IGDNG(intel->device_id) ? 1024 : \ IS_G4X(intel->device_id) ? 384 : 256) diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index 4cb9e0b..2696d68 100644 --- a/src/intel/intel_gpgpu.c +++ b/src/intel/intel_gpgpu.c @@ -629,6 +629,54 @@ intel_gpgpu_bind_image_gen7(intel_gpgpu_t *gpgpu, ss->ss0.render_cache_rw_mode = 1; /* XXX do we need to set it? */ intel_gpgpu_set_buf_reloc_gen7(gpgpu, index, obj_bo, obj_bo_offset); gpgpu->binded_img[index - gpgpu->img_index_base] = obj_bo; + + assert(index < GEN_MAX_SURFACES); +} + +static void +intel_gpgpu_bind_image_gen75(intel_gpgpu_t *gpgpu, + uint32_t index, + dri_bo* obj_bo, + uint32_t obj_bo_offset, + uint32_t format, + cl_mem_object_type type, + int32_t w, + int32_t h, + int32_t depth, + int32_t pitch, + int32_t tiling) +{ + surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset; + gen7_surface_state_t *ss = (gen7_surface_state_t *) heap->surface[index]; +printf ("###################### here\n"); + memset(ss, 0, sizeof(*ss)); + + ss->ss0.surface_type = intel_get_surface_type(type); + ss->ss0.surface_format = format; + ss->ss1.base_addr = obj_bo->offset; + ss->ss2.width = w - 1; + ss->ss2.height = h - 1; + ss->ss3.depth = depth - 1; + ss->ss4.not_str_buf.rt_view_extent = depth - 1; + ss->ss4.not_str_buf.min_array_element = 0; + ss->ss3.pitch = pitch - 1; + ss->ss5.cache_control = cc_llc_l3; + ss->ss7.shader_r = I965_SURCHAN_SELECT_RED; + ss->ss7.shader_g = I965_SURCHAN_SELECT_GREEN; + ss->ss7.shader_b = I965_SURCHAN_SELECT_BLUE; + ss->ss7.shader_a = I965_SURCHAN_SELECT_ALPHA; + if (tiling == GPGPU_TILE_X) { + ss->ss0.tiled_surface = 1; + ss->ss0.tile_walk = I965_TILEWALK_XMAJOR; + } else if (tiling == GPGPU_TILE_Y) { + ss->ss0.tiled_surface = 1; + ss->ss0.tile_walk = I965_TILEWALK_YMAJOR; + } + ss->ss0.render_cache_rw_mode = 1; /* XXX do we need to set it? */ + intel_gpgpu_set_buf_reloc_gen7(gpgpu, index, obj_bo, obj_bo_offset); + gpgpu->binded_img[index - gpgpu->img_index_base] = obj_bo; + + assert(index < GEN_MAX_SURFACES); } static void @@ -668,23 +716,6 @@ intel_gpgpu_set_stack(intel_gpgpu_t *gpgpu, uint32_t offset, uint32_t size, uint } static void -intel_gpgpu_bind_image(intel_gpgpu_t *gpgpu, - uint32_t index, - cl_buffer *obj_bo, - uint32_t obj_bo_offset, - uint32_t format, - cl_mem_object_type type, - int32_t w, - int32_t h, - int32_t depth, - int32_t pitch, - cl_gpgpu_tiling tiling) -{ - intel_gpgpu_bind_image_gen7(gpgpu, index, (drm_intel_bo*) obj_bo, obj_bo_offset, format, type, w, h, depth, pitch, tiling); - assert(index < GEN_MAX_SURFACES); -} - -static void intel_gpgpu_build_idrt(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel) { gen6_interface_descriptor_t *desc; @@ -1071,7 +1102,6 @@ intel_set_gpgpu_callbacks(int device_id) cl_gpgpu_new = (cl_gpgpu_new_cb *) intel_gpgpu_new; cl_gpgpu_delete = (cl_gpgpu_delete_cb *) intel_gpgpu_delete; cl_gpgpu_sync = (cl_gpgpu_sync_cb *) intel_gpgpu_sync; - cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image; cl_gpgpu_bind_buf = (cl_gpgpu_bind_buf_cb *) intel_gpgpu_bind_buf; cl_gpgpu_set_stack = (cl_gpgpu_set_stack_cb *) intel_gpgpu_set_stack; cl_gpgpu_state_init = (cl_gpgpu_state_init_cb *) intel_gpgpu_state_init; @@ -1096,5 +1126,12 @@ intel_set_gpgpu_callbacks(int device_id) cl_gpgpu_event_get_gpu_cur_timestamp = (cl_gpgpu_event_get_gpu_cur_timestamp_cb *)intel_gpgpu_event_get_gpu_cur_timestamp; cl_gpgpu_ref_batch_buf = (cl_gpgpu_ref_batch_buf_cb *)intel_gpgpu_ref_batch_buf; cl_gpgpu_unref_batch_buf = (cl_gpgpu_unref_batch_buf_cb *)intel_gpgpu_unref_batch_buf; + + if (IS_HASWELL(device_id)) + cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen75; + else if (IS_IVYBRIDGE(device_id)) + cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen7; + else + assert(0); } diff --git a/src/intel/intel_structs.h b/src/intel/intel_structs.h index 36b5971..59a9810 100644 --- a/src/intel/intel_structs.h +++ b/src/intel/intel_structs.h @@ -234,7 +234,16 @@ typedef struct gen7_surface_state } ss5; uint32_t ss6; /* unused */ - uint32_t ss7; /* unused */ + + struct { + uint32_t min_lod:12; + uint32_t pad0:4; + uint32_t shader_a:3; + uint32_t shader_b:3; + uint32_t shader_g:3; + uint32_t shader_r:3; + uint32_t pad1:4; + } ss7; } gen7_surface_state_t; STATIC_ASSERT(sizeof(gen6_surface_state_t) == sizeof(gen7_surface_state_t)); -- 1.7.9.5 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
