From: Ian Romanick <ian.d.roman...@intel.com> Also expand the message type field by a single bit. This is necessary for this Gen9 message type, but there are also Gen8 message types that need the extra bit (mostly for bindless).
Signed-off-by: Ian Romanick <ian.d.roman...@intel.com> --- src/intel/compiler/brw_disasm.c | 15 +++- src/intel/compiler/brw_eu.h | 11 +++ src/intel/compiler/brw_eu_defines.h | 12 ++- src/intel/compiler/brw_eu_emit.c | 48 ++++++++++ src/intel/compiler/brw_fs.cpp | 23 +++++ src/intel/compiler/brw_fs.h | 4 + src/intel/compiler/brw_fs_copy_propagation.cpp | 2 + src/intel/compiler/brw_fs_dead_code_eliminate.cpp | 2 + src/intel/compiler/brw_fs_generator.cpp | 7 ++ src/intel/compiler/brw_fs_nir.cpp | 103 ++++++++++++++++++++++ src/intel/compiler/brw_fs_surface_builder.cpp | 24 +++++ src/intel/compiler/brw_fs_surface_builder.h | 7 ++ src/intel/compiler/brw_inst.h | 2 +- src/intel/compiler/brw_schedule_instructions.cpp | 1 + src/intel/compiler/brw_shader.cpp | 6 ++ 15 files changed, 264 insertions(+), 3 deletions(-) diff --git a/src/intel/compiler/brw_disasm.c b/src/intel/compiler/brw_disasm.c index e45762afb07..322f4544dfd 100644 --- a/src/intel/compiler/brw_disasm.c +++ b/src/intel/compiler/brw_disasm.c @@ -406,7 +406,7 @@ static const char *const dp_dc0_msg_type_gen7[16] = { [GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE] = "DC untyped surface write", }; -static const char *const dp_dc1_msg_type_hsw[16] = { +static const char *const dp_dc1_msg_type_hsw[32] = { [HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ] = "untyped surface read", [HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP] = "DC untyped atomic op", [HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2] = @@ -421,6 +421,8 @@ static const char *const dp_dc1_msg_type_hsw[16] = { [HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP_SIMD4X2] = "DC 4x2 atomic counter op", [HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE] = "DC typed surface write", + [GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP] = + "DC untyped atomic float op", }; static const char *const aop[16] = { @@ -441,6 +443,12 @@ static const char *const aop[16] = { [BRW_AOP_PREDEC] = "predec", }; +static const char *const aop_float[4] = { + [BRW_AOP_FMAX] = "fmax", + [BRW_AOP_FMIN] = "fmin", + [BRW_AOP_FCMPWR] = "fcmpwr", +}; + static const char * const pixel_interpolator_msg_types[4] = { [GEN7_PIXEL_INTERPOLATOR_LOC_SHARED_OFFSET] = "per_message_offset", [GEN7_PIXEL_INTERPOLATOR_LOC_SAMPLE] = "sample_position", @@ -1797,6 +1805,11 @@ brw_disassemble_inst(FILE *file, const struct gen_device_info *devinfo, simd_modes[msg_ctrl >> 4], msg_ctrl & 0xf); break; } + case GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP: + format(file, "SIMD%d,", (msg_ctrl & (1 << 4)) ? 8 : 16); + control(file, "atomic float op", aop_float, msg_ctrl & 0xf, + &space); + break; default: format(file, "0x%x", msg_ctrl); } diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h index 0f07eeb3d6d..b0497806721 100644 --- a/src/intel/compiler/brw_eu.h +++ b/src/intel/compiler/brw_eu.h @@ -481,6 +481,17 @@ brw_untyped_atomic(struct brw_codegen *p, bool response_expected, bool header_present); +void +brw_untyped_atomic_float(struct brw_codegen *p, + struct brw_reg dst, + struct brw_reg payload, + struct brw_reg surface, + unsigned atomic_op, + unsigned msg_length, + bool response_expected, + bool header_present); + + void brw_untyped_surface_read(struct brw_codegen *p, struct brw_reg dst, diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index ee306a6c2ce..ac6dd02330a 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -388,6 +388,8 @@ enum opcode { */ SHADER_OPCODE_UNTYPED_ATOMIC, SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL, + SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT, + SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL, SHADER_OPCODE_UNTYPED_SURFACE_READ, SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL, SHADER_OPCODE_UNTYPED_SURFACE_WRITE, @@ -1153,6 +1155,7 @@ enum brw_message_target { #define HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP 11 #define HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP_SIMD4X2 12 #define HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE 13 +#define GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP 0x1b /* GEN9 */ #define GEN9_DATAPORT_RC_RENDER_TARGET_WRITE 12 @@ -1171,7 +1174,9 @@ enum brw_message_target { #define GEN8_BTI_STATELESS_IA_COHERENT 255 #define GEN8_BTI_STATELESS_NON_COHERENT 253 -/* dataport atomic operations. */ +/* Dataport atomic operations for Untyped Atomic Integer Operation message + * (and others). + */ #define BRW_AOP_AND 1 #define BRW_AOP_OR 2 #define BRW_AOP_XOR 3 @@ -1188,6 +1193,11 @@ enum brw_message_target { #define BRW_AOP_CMPWR 14 #define BRW_AOP_PREDEC 15 +/* Dataport atomic operations for Untyped Atomic Float Operation message. */ +#define BRW_AOP_FMAX 1 +#define BRW_AOP_FMIN 2 +#define BRW_AOP_FCMPWR 3 + #define BRW_MATH_FUNCTION_INV 1 #define BRW_MATH_FUNCTION_LOG 2 #define BRW_MATH_FUNCTION_EXP 3 diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index d3246edde44..815482c9b49 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -2937,6 +2937,54 @@ brw_untyped_atomic(struct brw_codegen *p, p, insn, atomic_op, response_expected); } +static void +brw_set_dp_untyped_atomic_float_message(struct brw_codegen *p, + brw_inst *insn, + unsigned atomic_op, + bool response_expected) +{ + const struct gen_device_info *devinfo = p->devinfo; + unsigned msg_control = + atomic_op | /* Atomic Operation Type: BRW_AOP_F* */ + (response_expected ? 1 << 5 : 0); /* Return data expected */ + + assert(devinfo->gen >= 9); + assert(brw_get_default_access_mode(p) == BRW_ALIGN_1); + + if (brw_get_default_exec_size(p) != BRW_EXECUTE_16) + msg_control |= 1 << 4; /* SIMD8 mode */ + + brw_inst_set_dp_msg_type(devinfo, insn, + GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP); + + brw_inst_set_dp_msg_control(devinfo, insn, msg_control); +} + +void +brw_untyped_atomic_float(struct brw_codegen *p, + struct brw_reg dst, + struct brw_reg payload, + struct brw_reg surface, + unsigned atomic_op, + unsigned msg_length, + bool response_expected, + bool header_present) +{ + const struct gen_device_info *devinfo = p->devinfo; + + assert(devinfo->gen >= 9); + assert(brw_get_default_access_mode(p) == BRW_ALIGN_1); + + const unsigned sfid = HSW_SFID_DATAPORT_DATA_CACHE_1; + struct brw_inst *insn = brw_send_indirect_surface_message( + p, sfid, brw_writemask(dst, WRITEMASK_XYZW), payload, surface, msg_length, + brw_surface_payload_size(p, response_expected, true, true), + header_present); + + brw_set_dp_untyped_atomic_float_message( + p, insn, atomic_op, response_expected); +} + static void brw_set_dp_untyped_surface_read_message(struct brw_codegen *p, struct brw_inst *insn, diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 5c95e260aad..ba3991f7efa 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -242,6 +242,7 @@ fs_inst::is_send_from_grf() const case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: case SHADER_OPCODE_UNTYPED_ATOMIC: + case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT: case SHADER_OPCODE_UNTYPED_SURFACE_READ: case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: case SHADER_OPCODE_BYTE_SCATTERED_WRITE: @@ -806,6 +807,20 @@ fs_inst::components_read(unsigned i) const return 1; } + case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL: { + assert(src[3].file == IMM && + src[4].file == IMM); + const unsigned op = src[4].ud; + /* Surface coordinates. */ + if (i == 0) + return src[3].ud; + /* Surface operation source. */ + else if (i == 1 && op == BRW_AOP_FCMPWR) + return 2; + else + return 1; + } + default: return 1; } @@ -824,6 +839,7 @@ fs_inst::size_read(int arg) const case SHADER_OPCODE_URB_READ_SIMD8: case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT: case SHADER_OPCODE_UNTYPED_ATOMIC: + case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT: case SHADER_OPCODE_UNTYPED_SURFACE_READ: case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: case SHADER_OPCODE_TYPED_ATOMIC: @@ -4855,6 +4871,12 @@ fs_visitor::lower_logical_sends() ibld.sample_mask_reg()); break; + case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL: + lower_surface_logical_send(ibld, inst, + SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT, + ibld.sample_mask_reg()); + break; + case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: lower_surface_logical_send(ibld, inst, SHADER_OPCODE_TYPED_SURFACE_READ, @@ -5333,6 +5355,7 @@ get_lowered_simd_width(const struct gen_device_info *devinfo, return 8; case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: + case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL: case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 17b1368d522..c0923eec8ec 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -226,8 +226,12 @@ public: nir_intrinsic_instr *instr); void nir_emit_ssbo_atomic(const brw::fs_builder &bld, int op, nir_intrinsic_instr *instr); + void nir_emit_ssbo_atomic_float(const brw::fs_builder &bld, + int op, nir_intrinsic_instr *instr); void nir_emit_shared_atomic(const brw::fs_builder &bld, int op, nir_intrinsic_instr *instr); + void nir_emit_shared_atomic_float(const brw::fs_builder &bld, + int op, nir_intrinsic_instr *instr); void nir_emit_texture(const brw::fs_builder &bld, nir_tex_instr *instr); void nir_emit_jump(const brw::fs_builder &bld, diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp index 92cc0a8de58..ab34b63748e 100644 --- a/src/intel/compiler/brw_fs_copy_propagation.cpp +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp @@ -679,6 +679,7 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) break; case SHADER_OPCODE_UNTYPED_ATOMIC: + case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT: case SHADER_OPCODE_UNTYPED_SURFACE_READ: case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: case SHADER_OPCODE_TYPED_ATOMIC: @@ -720,6 +721,7 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) case SHADER_OPCODE_TG4_LOGICAL: case SHADER_OPCODE_TG4_OFFSET_LOGICAL: case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: + case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL: case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: diff --git a/src/intel/compiler/brw_fs_dead_code_eliminate.cpp b/src/intel/compiler/brw_fs_dead_code_eliminate.cpp index 7adb4278919..eeb71dd2b92 100644 --- a/src/intel/compiler/brw_fs_dead_code_eliminate.cpp +++ b/src/intel/compiler/brw_fs_dead_code_eliminate.cpp @@ -55,6 +55,8 @@ can_omit_write(const fs_inst *inst) switch (inst->opcode) { case SHADER_OPCODE_UNTYPED_ATOMIC: case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: + case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT: + case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL: case SHADER_OPCODE_TYPED_ATOMIC: case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: return true; diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 09839d0b4da..65eb860386a 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -2228,6 +2228,13 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) inst->header_size); break; + case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT: + assert(src[2].file == BRW_IMMEDIATE_VALUE); + brw_untyped_atomic_float(p, dst, src[0], src[1], src[2].ud, + inst->mlen, !inst->dst.is_null(), + inst->header_size); + break; + case SHADER_OPCODE_UNTYPED_SURFACE_READ: assert(!inst->header_size); assert(src[2].file == BRW_IMMEDIATE_VALUE); diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 243b1d2ae75..adcc9f35fa5 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -3651,6 +3651,15 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld, case nir_intrinsic_shared_atomic_comp_swap: nir_emit_shared_atomic(bld, BRW_AOP_CMPWR, instr); break; + case nir_intrinsic_shared_atomic_fmin: + nir_emit_shared_atomic_float(bld, BRW_AOP_FMIN, instr); + break; + case nir_intrinsic_shared_atomic_fmax: + nir_emit_shared_atomic_float(bld, BRW_AOP_FMAX, instr); + break; + case nir_intrinsic_shared_atomic_fcomp_swap: + nir_emit_shared_atomic_float(bld, BRW_AOP_FCMPWR, instr); + break; case nir_intrinsic_load_shared: { assert(devinfo->gen >= 7); @@ -4378,6 +4387,15 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr case nir_intrinsic_ssbo_atomic_comp_swap: nir_emit_ssbo_atomic(bld, BRW_AOP_CMPWR, instr); break; + case nir_intrinsic_ssbo_atomic_fmin: + nir_emit_ssbo_atomic_float(bld, BRW_AOP_FMIN, instr); + break; + case nir_intrinsic_ssbo_atomic_fmax: + nir_emit_ssbo_atomic_float(bld, BRW_AOP_FMAX, instr); + break; + case nir_intrinsic_ssbo_atomic_fcomp_swap: + nir_emit_ssbo_atomic_float(bld, BRW_AOP_FCMPWR, instr); + break; case nir_intrinsic_get_buffer_size: { nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]); @@ -4866,6 +4884,54 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld, bld.MOV(dest, atomic_result); } +void +fs_visitor::nir_emit_ssbo_atomic_float(const fs_builder &bld, + int op, nir_intrinsic_instr *instr) +{ + if (stage == MESA_SHADER_FRAGMENT) + brw_wm_prog_data(prog_data)->has_side_effects = true; + + fs_reg dest; + if (nir_intrinsic_infos[instr->intrinsic].has_dest) + dest = get_nir_dest(instr->dest); + + fs_reg surface; + nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]); + if (const_surface) { + unsigned surf_index = stage_prog_data->binding_table.ssbo_start + + const_surface->u32[0]; + surface = brw_imm_ud(surf_index); + brw_mark_surface_used(prog_data, surf_index); + } else { + surface = vgrf(glsl_type::uint_type); + bld.ADD(surface, get_nir_src(instr->src[0]), + brw_imm_ud(stage_prog_data->binding_table.ssbo_start)); + + /* Assume this may touch any SSBO. This is the same we do for other + * UBO/SSBO accesses with non-constant surface. + */ + brw_mark_surface_used(prog_data, + stage_prog_data->binding_table.ssbo_start + + nir->info.num_ssbos - 1); + } + + fs_reg offset = get_nir_src(instr->src[1]); + fs_reg data1 = get_nir_src(instr->src[2]); + fs_reg data2; + if (op == BRW_AOP_FCMPWR) + data2 = get_nir_src(instr->src[3]); + + /* Emit the actual atomic operation */ + + fs_reg atomic_result = emit_untyped_atomic_float(bld, surface, offset, + data1, data2, + 1 /* dims */, 1 /* rsize */, + op, + BRW_PREDICATE_NONE); + dest.type = atomic_result.type; + bld.MOV(dest, atomic_result); +} + void fs_visitor::nir_emit_shared_atomic(const fs_builder &bld, int op, nir_intrinsic_instr *instr) @@ -4903,6 +4969,43 @@ fs_visitor::nir_emit_shared_atomic(const fs_builder &bld, bld.MOV(dest, atomic_result); } +void +fs_visitor::nir_emit_shared_atomic_float(const fs_builder &bld, + int op, nir_intrinsic_instr *instr) +{ + fs_reg dest; + if (nir_intrinsic_infos[instr->intrinsic].has_dest) + dest = get_nir_dest(instr->dest); + + fs_reg surface = brw_imm_ud(GEN7_BTI_SLM); + fs_reg offset; + fs_reg data1 = get_nir_src(instr->src[1]); + fs_reg data2; + if (op == BRW_AOP_FCMPWR) + data2 = get_nir_src(instr->src[2]); + + /* Get the offset */ + nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); + if (const_offset) { + offset = brw_imm_ud(instr->const_index[0] + const_offset->u32[0]); + } else { + offset = vgrf(glsl_type::uint_type); + bld.ADD(offset, + retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_UD), + brw_imm_ud(instr->const_index[0])); + } + + /* Emit the actual atomic operation operation */ + + fs_reg atomic_result = emit_untyped_atomic_float(bld, surface, offset, + data1, data2, + 1 /* dims */, 1 /* rsize */, + op, + BRW_PREDICATE_NONE); + dest.type = atomic_result.type; + bld.MOV(dest, atomic_result); +} + void fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) { diff --git a/src/intel/compiler/brw_fs_surface_builder.cpp b/src/intel/compiler/brw_fs_surface_builder.cpp index 1d147747456..0b8418ca736 100644 --- a/src/intel/compiler/brw_fs_surface_builder.cpp +++ b/src/intel/compiler/brw_fs_surface_builder.cpp @@ -110,6 +110,30 @@ namespace brw { addr, tmp, surface, dims, op, rsize, pred); } + /** + * Emit an untyped surface atomic float opcode. \p dims determines the + * number of components of the address and \p rsize the number of + * components of the returned value (either zero or one). + */ + fs_reg + emit_untyped_atomic_float(const fs_builder &bld, + const fs_reg &surface, const fs_reg &addr, + const fs_reg &src0, const fs_reg &src1, + unsigned dims, unsigned rsize, unsigned op, + brw_predicate pred) + { + /* FINISHME: Factor out this frequently recurring pattern into a + * helper function. + */ + const unsigned n = (src0.file != BAD_FILE) + (src1.file != BAD_FILE); + const fs_reg srcs[] = { src0, src1 }; + const fs_reg tmp = bld.vgrf(src0.type, n); + bld.LOAD_PAYLOAD(tmp, srcs, n, 0); + + return emit_send(bld, SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL, + addr, tmp, surface, dims, op, rsize, pred); + } + /** * Emit a typed surface read opcode. \p dims determines the number of * components of the address and \p size the number of components of the diff --git a/src/intel/compiler/brw_fs_surface_builder.h b/src/intel/compiler/brw_fs_surface_builder.h index f0048220d5d..6952df64286 100644 --- a/src/intel/compiler/brw_fs_surface_builder.h +++ b/src/intel/compiler/brw_fs_surface_builder.h @@ -48,6 +48,13 @@ namespace brw { unsigned dims, unsigned rsize, unsigned op, brw_predicate pred = BRW_PREDICATE_NONE); + fs_reg + emit_untyped_atomic_float(const fs_builder &bld, + const fs_reg &surface, const fs_reg &addr, + const fs_reg &src0, const fs_reg &src1, + unsigned dims, unsigned rsize, unsigned op, + brw_predicate pred); + fs_reg emit_typed_read(const fs_builder &bld, const fs_reg &surface, const fs_reg &addr, unsigned dims, unsigned size); diff --git a/src/intel/compiler/brw_inst.h b/src/intel/compiler/brw_inst.h index 8663c1b7f5b..8c19e330b2b 100644 --- a/src/intel/compiler/brw_inst.h +++ b/src/intel/compiler/brw_inst.h @@ -629,7 +629,7 @@ FF(dp_msg_type, -1, -1, -1, -1, -1, -1, /* 6: */ MD(16), MD(13), /* 7: */ MD(17), MD(14), - /* 8: */ MD(17), MD(14)) + /* 8: */ MD(18), MD(14)) FF(dp_msg_control, /* 4: */ MD(11), MD( 8), /* 4.5-5: use dp_read_msg_control or dp_write_msg_control */ -1, -1, -1, -1, diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp index f817142a8b5..f29671859cb 100644 --- a/src/intel/compiler/brw_schedule_instructions.cpp +++ b/src/intel/compiler/brw_schedule_instructions.cpp @@ -369,6 +369,7 @@ schedule_node::set_latency_gen7(bool is_haswell) break; case SHADER_OPCODE_UNTYPED_ATOMIC: + case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT: case SHADER_OPCODE_TYPED_ATOMIC: /* Test code: * mov(8) g112<1>ud 0x00000000ud { align1 WE_all 1Q }; diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index b7fb06ddbd9..8ac8a20aab5 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -274,6 +274,10 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) return "untyped_atomic"; case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: return "untyped_atomic_logical"; + case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT: + return "untyped_atomic_float"; + case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL: + return "untyped_atomic_float_logical"; case SHADER_OPCODE_UNTYPED_SURFACE_READ: return "untyped_surface_read"; case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: @@ -996,6 +1000,8 @@ backend_instruction::has_side_effects() const switch (opcode) { case SHADER_OPCODE_UNTYPED_ATOMIC: case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: + case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT: + case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL: case SHADER_OPCODE_GEN4_SCRATCH_WRITE: case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: -- 2.14.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev