For split indirect sends we have to put the EOT parameter in the
extended descriptor as well as the instruction itself so just calling
brw_inst_set_eot is insufficient.  Moving the EOT handling handling into
the send_indirect_[split]_message helper lets us handle it properly.
---
 src/intel/compiler/brw_eu.h               |  6 ++++--
 src/intel/compiler/brw_eu_emit.c          | 25 ++++++++++++++---------
 src/intel/compiler/brw_fs_generator.cpp   | 11 +++++-----
 src/intel/compiler/brw_vec4_generator.cpp |  6 ++++--
 4 files changed, 29 insertions(+), 19 deletions(-)

diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
index 104cbece9b3..ac8ff69a7e0 100644
--- a/src/intel/compiler/brw_eu.h
+++ b/src/intel/compiler/brw_eu.h
@@ -911,7 +911,8 @@ brw_send_indirect_message(struct brw_codegen *p,
                           struct brw_reg dst,
                           struct brw_reg payload,
                           struct brw_reg desc,
-                          unsigned desc_imm);
+                          unsigned desc_imm,
+                          bool eot);
 
 void
 brw_send_indirect_split_message(struct brw_codegen *p,
@@ -922,7 +923,8 @@ brw_send_indirect_split_message(struct brw_codegen *p,
                                 struct brw_reg desc,
                                 unsigned desc_imm,
                                 struct brw_reg ex_desc,
-                                unsigned ex_desc_imm);
+                                unsigned ex_desc_imm,
+                                bool eot);
 
 void brw_ff_sync(struct brw_codegen *p,
                   struct brw_reg dest,
diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c
index 9be82d1b87c..4440c84760b 100644
--- a/src/intel/compiler/brw_eu_emit.c
+++ b/src/intel/compiler/brw_eu_emit.c
@@ -2481,7 +2481,8 @@ brw_send_indirect_message(struct brw_codegen *p,
                           struct brw_reg dst,
                           struct brw_reg payload,
                           struct brw_reg desc,
-                          unsigned desc_imm)
+                          unsigned desc_imm,
+                          bool eot)
 {
    const struct gen_device_info *devinfo = p->devinfo;
    struct brw_inst *send;
@@ -2518,6 +2519,7 @@ brw_send_indirect_message(struct brw_codegen *p,
    brw_set_dest(p, send, dst);
    brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD));
    brw_inst_set_sfid(devinfo, send, sfid);
+   brw_inst_set_eot(devinfo, send, eot);
 }
 
 void
@@ -2529,7 +2531,8 @@ brw_send_indirect_split_message(struct brw_codegen *p,
                                 struct brw_reg desc,
                                 unsigned desc_imm,
                                 struct brw_reg ex_desc,
-                                unsigned ex_desc_imm)
+                                unsigned ex_desc_imm,
+                                bool eot)
 {
    const struct gen_device_info *devinfo = p->devinfo;
    struct brw_inst *send;
@@ -2574,13 +2577,13 @@ brw_send_indirect_split_message(struct brw_codegen *p,
        * so the caller can specify additional descriptor bits with the
        * desc_imm immediate.
        *
-       * Even though the instruction dispatcher always pulls the SFID from the
-       * instruction itself, the extended descriptor sent to the actual unit
-       * gets the SFID from the extended descriptor which comes from the
-       * address register.  If we don't OR it in, the external unit gets
-       * confused and hangs the GPU.
+       * Even though the instruction dispatcher always pulls the SFID and EOT
+       * fields from the instruction itself, actual external unit which
+       * processes the message gets the SFID and EOT from the extended
+       * descriptor which comes from the address register.  If we don't OR
+       * those two bits in, the external unit may get confused and hang.
        */
-      brw_OR(p, addr, ex_desc, brw_imm_ud(ex_desc_imm | sfid));
+      brw_OR(p, addr, ex_desc, brw_imm_ud(ex_desc_imm | sfid | eot << 5));
 
       brw_pop_insn_state(p);
       ex_desc = addr;
@@ -2613,6 +2616,7 @@ brw_send_indirect_split_message(struct brw_codegen *p,
    }
 
    brw_inst_set_sfid(devinfo, send, sfid);
+   brw_inst_set_eot(devinfo, send, eot);
 }
 
 static void
@@ -2645,7 +2649,7 @@ brw_send_indirect_surface_message(struct brw_codegen *p,
       surface = addr;
    }
 
-   brw_send_indirect_message(p, sfid, dst, payload, surface, desc_imm);
+   brw_send_indirect_message(p, sfid, dst, payload, surface, desc_imm, false);
 }
 
 static bool
@@ -3164,7 +3168,8 @@ brw_pixel_interpolator_query(struct brw_codegen *p,
                              dest,
                              mrf,
                              vec1(data),
-                             desc);
+                             desc,
+                             false);
 }
 
 void
diff --git a/src/intel/compiler/brw_fs_generator.cpp 
b/src/intel/compiler/brw_fs_generator.cpp
index e3b68fa3165..b2f87d02104 100644
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -272,16 +272,16 @@ fs_generator::generate_send(fs_inst *inst,
        * also covers the dual-payload case because ex_mlen goes in ex_desc.
        */
       brw_send_indirect_split_message(p, inst->sfid, dst, payload, payload2,
-                                      desc, desc_imm, ex_desc, ex_desc_imm);
+                                      desc, desc_imm, ex_desc, ex_desc_imm,
+                                      inst->eot);
       if (inst->check_tdr)
          brw_inst_set_opcode(p->devinfo, brw_last_inst, BRW_OPCODE_SENDSC);
    } else {
-      brw_send_indirect_message(p, inst->sfid, dst, payload, desc, desc_imm);
+      brw_send_indirect_message(p, inst->sfid, dst, payload, desc, desc_imm,
+                                   inst->eot);
       if (inst->check_tdr)
          brw_inst_set_opcode(p->devinfo, brw_last_inst, BRW_OPCODE_SENDC);
    }
-
-   brw_inst_set_eot(p->devinfo, brw_last_inst, inst->eot);
 }
 
 void
@@ -1457,7 +1457,8 @@ 
fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
          brw_dp_read_desc(devinfo, 0 /* surface */,
                           BRW_DATAPORT_OWORD_BLOCK_DWORDS(inst->exec_size),
                           GEN7_DATAPORT_DC_OWORD_BLOCK_READ,
-                          BRW_DATAPORT_READ_TARGET_DATA_CACHE));
+                          BRW_DATAPORT_READ_TARGET_DATA_CACHE),
+         false /* EOT */);
 
       brw_pop_insn_state(p);
    }
diff --git a/src/intel/compiler/brw_vec4_generator.cpp 
b/src/intel/compiler/brw_vec4_generator.cpp
index 93baaef3ab7..e473d3e2425 100644
--- a/src/intel/compiler/brw_vec4_generator.cpp
+++ b/src/intel/compiler/brw_vec4_generator.cpp
@@ -330,7 +330,8 @@ generate_tex(struct brw_codegen *p,
                           0 /* sampler */,
                           msg_type,
                           BRW_SAMPLER_SIMD_MODE_SIMD4X2,
-                          return_format));
+                          return_format),
+         false /* EOT */);
 
       /* visitor knows more than we do about the surface limit required,
        * so has already done marking.
@@ -1400,7 +1401,8 @@ generate_pull_constant_load_gen7(struct brw_codegen *p,
                           0 /* sampler */,
                           GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
                           BRW_SAMPLER_SIMD_MODE_SIMD4X2,
-                          0));
+                          0),
+         false /* EOT */);
    }
 }
 
-- 
2.20.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to