On 02/24/2018 11:36 AM, Karol Herbst wrote:
currently while insterting barriers, writes and reads to FILE_FLAGS aren't
considered. This can lead to WaR hazards in some situations.
With the previous commit fixes shaders with intstructions like this:
mad u32 $r2 $r4 $r11 $r2
mad u32 { $r5 $c0 } $r4 $r10 $r6
mad (SUBOP:1) u32 $r3 $r4 $r10 $r2 $c0
Affects OpenCL CTS tests on Maxwell+:
basic/test_basic intmath_long
basic/test_basic intmath_long2
basic/test_basic intmath_long4
v2: only put barriers on instructions which actually read flags
Signed-off-by: Karol Herbst <kher...@redhat.com>
---
.../drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index 96bd276884..fafece81ad 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -3944,6 +3944,7 @@ SchedDataCalculatorGM107::needWrDepBar(const Instruction
*insn) const
for (int d = 0; insn->defExists(d); ++d) {
if (insn->def(d).getFile() == FILE_GPR ||
+ insn->def(d).getFile() == FILE_FLAGS ||
insn->def(d).getFile() == FILE_PREDICATE)
return true;
}
@@ -3983,6 +3984,12 @@ SchedDataCalculatorGM107::findFirstUse(const Instruction
*bari) const
continue;
return insn;
}
+ if (def.getFile() == FILE_FLAGS) {
+ if (insn->src(s).getFile() != FILE_FLAGS ||
+ src->reg.data.id != minGPR)
+ continue;
+ return insn;
+ }
}
}
}
@@ -4002,7 +4009,8 @@ SchedDataCalculatorGM107::findFirstDef(const Instruction
*bari) const
for (int d = 0; insn->defExists(d); ++d) {
const Value *def = insn->def(d).rep();
- if (insn->def(d).getFile() != FILE_GPR)
+ if (insn->def(d).getFile() != FILE_GPR &&
+ insn->def(d).getFile() != FILE_FLAGS)
continue;
minGPR = def->reg.data.id;
@@ -4010,7 +4018,12 @@ SchedDataCalculatorGM107::findFirstDef(const Instruction
*bari) const
for (int s = 0; bari->srcExists(s); ++s) {
const Value *src = bari->src(s).rep();
+ if (bari->src(s).getFile() == FILE_FLAGS &&
+ insn->def(d).getFile() == FILE_FLAGS &&
+ src->reg.data.id == minGPR)
Why don't you check the dst GPR id too?
+ return insn;
if (bari->src(s).getFile() != FILE_GPR ||
+ insn->def(d).getFile() != FILE_GPR ||
src->reg.data.id + src->reg.size / 4 - 1 < minGPR ||
src->reg.data.id > maxGPR)
continue;
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev