Date: Monday, April 3, 2023 @ 20:22:14
Author: heftig
Revision: 1434420
23.0.1-2: add patch for gpu hangs on intel
Added:
lib32-mesa/trunk/0003-intel-fs-fix-scheduling-of-HALT-instructions.patch
Modified:
lib32-mesa/trunk/PKGBUILD
---------------------------------------------------------+
0003-intel-fs-fix-scheduling-of-HALT-instructions.patch | 130 ++++++++++++++
PKGBUILD | 9
2 files changed, 138 insertions(+), 1 deletion(-)
Added: 0003-intel-fs-fix-scheduling-of-HALT-instructions.patch
===================================================================
--- 0003-intel-fs-fix-scheduling-of-HALT-instructions.patch
(rev 0)
+++ 0003-intel-fs-fix-scheduling-of-HALT-instructions.patch 2023-04-03
20:22:14 UTC (rev 1434420)
@@ -0,0 +1,130 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Lionel Landwerlin <[email protected]>
+Date: Mon, 3 Apr 2023 14:52:59 +0300
+Subject: [PATCH] intel/fs: fix scheduling of HALT instructions
+
+With the following test :
+
+dEQP-VK.spirv_assembly.instruction.terminate_invocation.terminate.no_out_of_bounds_load
+
+There is a :
+
+shader_start:
+ ... <- no control flow
+ g0 = some_alu
+ g1 = fbl
+ g2 = broadcast g3, g1
+ g4 = get_buffer_size g2
+ ... <- no control flow
+ halt <- on some lanes
+ g5 = send <surface>, g4
+
+eliminate_find_live_channel will remove the fbl/broadcast because it
+assumes lane0 is active at get_buffer_size :
+
+shader_start:
+ ... <- no control flow
+ g0 = some_alu
+ g4 = get_buffer_size g0
+ ... <- no control flow
+ halt <- on some lanes
+ g5 = send <surface>, g4
+
+But then the instruction scheduler will move the get_buffer_size after
+the halt :
+
+shader_start:
+ ... <- no control flow
+ halt <- on some lanes
+ g0 = some_alu
+ g4 = get_buffer_size g0
+ g5 = send <surface>, g4
+
+get_buffer_size pulls the surface index from lane0 in g0 which could
+have been turned off by the halt and we end up accessing an invalid
+surface handle.
+
+Signed-off-by: Lionel Landwerlin <[email protected]>
+Cc: mesa-stable
+---
+ .../compiler/brw_schedule_instructions.cpp | 46 +++++++++++++++++++
+ 1 file changed, 46 insertions(+)
+
+diff --git a/src/intel/compiler/brw_schedule_instructions.cpp
b/src/intel/compiler/brw_schedule_instructions.cpp
+index 3286e3f83b96..43f63784b2e8 100644
+--- a/src/intel/compiler/brw_schedule_instructions.cpp
++++ b/src/intel/compiler/brw_schedule_instructions.cpp
+@@ -651,6 +651,7 @@ public:
+ ralloc_free(this->mem_ctx);
+ }
+ void add_barrier_deps(schedule_node *n);
++ void add_cross_lane_deps(schedule_node *n);
+ void add_dep(schedule_node *before, schedule_node *after, int latency);
+ void add_dep(schedule_node *before, schedule_node *after);
+
+@@ -1098,6 +1099,28 @@ is_scheduling_barrier(const backend_instruction *inst)
+ inst->has_side_effects();
+ }
+
++static bool
++has_cross_lane_access(const fs_inst *inst)
++{
++ if (inst->opcode == SHADER_OPCODE_BROADCAST ||
++ inst->opcode == SHADER_OPCODE_READ_SR_REG ||
++ inst->opcode == SHADER_OPCODE_CLUSTER_BROADCAST ||
++ inst->opcode == SHADER_OPCODE_SHUFFLE ||
++ inst->opcode == FS_OPCODE_LOAD_LIVE_CHANNELS ||
++ inst->opcode == SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL ||
++ inst->opcode == SHADER_OPCODE_FIND_LIVE_CHANNEL)
++ return true;
++
++ for (unsigned s = 0; s < inst->sources; s++) {
++ if (inst->src[s].file == VGRF) {
++ if (inst->src[s].stride == 0)
++ return true;
++ }
++ }
++
++ return false;
++}
++
+ /**
+ * Sometimes we really want this node to execute after everything that
+ * was before it and before everything that followed it. This adds
+@@ -1128,6 +1151,25 @@ instruction_scheduler::add_barrier_deps(schedule_node
*n)
+ }
+ }
+
++/**
++ * Because some instructions like HALT can disable lanes, scheduling prior to
++ * a cross lane access should not be allowed, otherwise we could end up with
++ * later instructions accessing uninitialized data.
++ */
++void
++instruction_scheduler::add_cross_lane_deps(schedule_node *n)
++{
++ schedule_node *prev = (schedule_node *)n->prev;
++
++ if (prev) {
++ while (!prev->is_head_sentinel()) {
++ if (has_cross_lane_access((fs_inst *)prev->inst))
++ add_dep(prev, n, 0);
++ prev = (schedule_node *)prev->prev;
++ }
++ }
++}
++
+ /* instruction scheduling needs to be aware of when an MRF write
+ * actually writes 2 MRFs.
+ */
+@@ -1165,6 +1207,10 @@ fs_instruction_scheduler::calculate_deps()
+ if (is_scheduling_barrier(inst))
+ add_barrier_deps(n);
+
++ if (inst->opcode == BRW_OPCODE_HALT ||
++ inst->opcode == SHADER_OPCODE_HALT_TARGET)
++ add_cross_lane_deps(n);
++
+ /* read-after-write deps. */
+ for (int i = 0; i < inst->sources; i++) {
+ if (inst->src[i].file == VGRF) {
Modified: PKGBUILD
===================================================================
--- PKGBUILD 2023-04-03 19:58:21 UTC (rev 1434419)
+++ PKGBUILD 2023-04-03 20:22:14 UTC (rev 1434420)
@@ -7,7 +7,7 @@
pkgname=('lib32-vulkan-mesa-layers' 'lib32-opencl-mesa' 'lib32-vulkan-intel'
'lib32-vulkan-radeon' 'lib32-vulkan-virtio' 'lib32-libva-mesa-driver'
'lib32-mesa-vdpau' 'lib32-mesa')
pkgdesc="An open-source implementation of the OpenGL specification (32-bit)"
pkgver=23.0.1
-pkgrel=1
+pkgrel=2
arch=('x86_64')
makedepends=('python-mako' 'lib32-libxml2' 'lib32-expat' 'lib32-libx11'
'xorgproto' 'lib32-libdrm'
'lib32-libxshmfence' 'lib32-libxxf86vm' 'lib32-libxdamage'
'lib32-libvdpau'
@@ -20,16 +20,19 @@
source=(https://mesa.freedesktop.org/archive/mesa-${pkgver}.tar.xz{,.sig}
0001-iris-Retry-DRM_IOCTL_I915_GEM_EXECBUFFER2-on-ENOMEM.patch
0002-Revert-iris-Avoid-abort-if-kernel-can-t-allocate-mem.patch
+ 0003-intel-fs-fix-scheduling-of-HALT-instructions.patch
LICENSE)
sha256sums=('e8e586856b55893abae9bdcdb98b41c081d909bb1faf372e6e7262307bf34adf'
'SKIP'
'99264c77d63d6fa810e295914808cde9f580a64e913e99fa794c1aa25a4f8fb2'
'd6ef8fb1809e8aeae0ec32bfe916adb770c64880bfd3d0f4472a616c9f356a9a'
+ 'dc6790b5be0e80c23e74ae18ca1a2b40f57f4211cc7b645bf22b63af3b790e40'
'7052ba73bb07ea78873a2431ee4e828f4e72bda7d176d07f770fa48373dec537')
b2sums=('50d358e393037381d0d848f868ac3439b0851809c3533432dc428bd77e81bc71bbfd2b598e221b6e8c4c2528ef32e5624aec4fe2e552e01ee98abbcf96a1f5b7'
'SKIP'
'a90bfc47fb3a46eff1ef2455c7aa18c2bb515ec217b423d0a87cc5f3b824a77c0381e1378498464418644108142022dcd3c289e157877c6ae7584beaec1d9987'
'bd52994305fc0fa2f12c46ea3208bbb24f97495d9bad73120d83a6cdcf7e48f5ff0d14ac0055765516b70caacdf024fca4159b70b054e85f2783c78c9218aefe'
+
'37d1d070c45c85bce8abe3524a3f8d9ac9ed6326a3eec653cd89fffce3630b08eb9b96b11aeb495488230449c99f9b508f73a15e53265d2b159286b0e2dda7cc'
'1ecf007b82260710a7bf5048f47dd5d600c168824c02c595af654632326536a6527fbe0738670ee7b921dd85a70425108e0f471ba85a8e1ca47d294ad74b4adb')
validpgpkeys=('8703B6700E7EE06D7A39B8D6EDAE37B02CEB490D' # Emil Velikov
<[email protected]>
'946D09B5E4C9845E63075FF1D961C596A7203456' # Andres Gomez
<[email protected]>
@@ -45,6 +48,10 @@
# https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20449
patch -Np1 -i
../0001-iris-Retry-DRM_IOCTL_I915_GEM_EXECBUFFER2-on-ENOMEM.patch
patch -Np1 -i
../0002-Revert-iris-Avoid-abort-if-kernel-can-t-allocate-mem.patch
+
+ # https://gitlab.freedesktop.org/mesa/mesa/-/issues/7110
+ # https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20765
+ patch -Np1 -i ../0003-intel-fs-fix-scheduling-of-HALT-instructions.patch
}
build() {