Date: Monday, April 3, 2023 @ 20:22:14
  Author: heftig
Revision: 1434420

23.0.1-2: add patch for gpu hangs on intel

Added:
  lib32-mesa/trunk/0003-intel-fs-fix-scheduling-of-HALT-instructions.patch
Modified:
  lib32-mesa/trunk/PKGBUILD

---------------------------------------------------------+
 0003-intel-fs-fix-scheduling-of-HALT-instructions.patch |  130 ++++++++++++++
 PKGBUILD                                                |    9 
 2 files changed, 138 insertions(+), 1 deletion(-)

Added: 0003-intel-fs-fix-scheduling-of-HALT-instructions.patch
===================================================================
--- 0003-intel-fs-fix-scheduling-of-HALT-instructions.patch                     
        (rev 0)
+++ 0003-intel-fs-fix-scheduling-of-HALT-instructions.patch     2023-04-03 
20:22:14 UTC (rev 1434420)
@@ -0,0 +1,130 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Lionel Landwerlin <[email protected]>
+Date: Mon, 3 Apr 2023 14:52:59 +0300
+Subject: [PATCH] intel/fs: fix scheduling of HALT instructions
+
+With the following test :
+
+dEQP-VK.spirv_assembly.instruction.terminate_invocation.terminate.no_out_of_bounds_load
+
+There is a :
+
+shader_start:
+   ...                                 <- no control flow
+   g0 = some_alu
+   g1 = fbl
+   g2 = broadcast g3, g1
+   g4 = get_buffer_size g2
+   ...                                 <- no control flow
+   halt                                <- on some lanes
+   g5 = send <surface>, g4
+
+eliminate_find_live_channel will remove the fbl/broadcast because it
+assumes lane0 is active at get_buffer_size :
+
+shader_start:
+   ...                                 <- no control flow
+   g0 = some_alu
+   g4 = get_buffer_size g0
+   ...                                 <- no control flow
+   halt                                <- on some lanes
+   g5 = send <surface>, g4
+
+But then the instruction scheduler will move the get_buffer_size after
+the halt :
+
+shader_start:
+   ...                                 <- no control flow
+   halt                                <- on some lanes
+   g0 = some_alu
+   g4 = get_buffer_size g0
+   g5 = send <surface>, g4
+
+get_buffer_size pulls the surface index from lane0 in g0 which could
+have been turned off by the halt and we end up accessing an invalid
+surface handle.
+
+Signed-off-by: Lionel Landwerlin <[email protected]>
+Cc: mesa-stable
+---
+ .../compiler/brw_schedule_instructions.cpp    | 46 +++++++++++++++++++
+ 1 file changed, 46 insertions(+)
+
+diff --git a/src/intel/compiler/brw_schedule_instructions.cpp 
b/src/intel/compiler/brw_schedule_instructions.cpp
+index 3286e3f83b96..43f63784b2e8 100644
+--- a/src/intel/compiler/brw_schedule_instructions.cpp
++++ b/src/intel/compiler/brw_schedule_instructions.cpp
+@@ -651,6 +651,7 @@ public:
+       ralloc_free(this->mem_ctx);
+    }
+    void add_barrier_deps(schedule_node *n);
++   void add_cross_lane_deps(schedule_node *n);
+    void add_dep(schedule_node *before, schedule_node *after, int latency);
+    void add_dep(schedule_node *before, schedule_node *after);
+ 
+@@ -1098,6 +1099,28 @@ is_scheduling_barrier(const backend_instruction *inst)
+           inst->has_side_effects();
+ }
+ 
++static bool
++has_cross_lane_access(const fs_inst *inst)
++{
++   if (inst->opcode == SHADER_OPCODE_BROADCAST ||
++       inst->opcode == SHADER_OPCODE_READ_SR_REG ||
++       inst->opcode == SHADER_OPCODE_CLUSTER_BROADCAST ||
++       inst->opcode == SHADER_OPCODE_SHUFFLE ||
++       inst->opcode == FS_OPCODE_LOAD_LIVE_CHANNELS ||
++       inst->opcode == SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL ||
++       inst->opcode == SHADER_OPCODE_FIND_LIVE_CHANNEL)
++      return true;
++
++   for (unsigned s = 0; s < inst->sources; s++) {
++      if (inst->src[s].file == VGRF) {
++         if (inst->src[s].stride == 0)
++            return true;
++      }
++   }
++
++   return false;
++}
++
+ /**
+  * Sometimes we really want this node to execute after everything that
+  * was before it and before everything that followed it.  This adds
+@@ -1128,6 +1151,25 @@ instruction_scheduler::add_barrier_deps(schedule_node 
*n)
+    }
+ }
+ 
++/**
++ * Because some instructions like HALT can disable lanes, scheduling prior to
++ * a cross lane access should not be allowed, otherwise we could end up with
++ * later instructions accessing uninitialized data.
++ */
++void
++instruction_scheduler::add_cross_lane_deps(schedule_node *n)
++{
++   schedule_node *prev = (schedule_node *)n->prev;
++
++   if (prev) {
++      while (!prev->is_head_sentinel()) {
++         if (has_cross_lane_access((fs_inst *)prev->inst))
++            add_dep(prev, n, 0);
++         prev = (schedule_node *)prev->prev;
++      }
++   }
++}
++
+ /* instruction scheduling needs to be aware of when an MRF write
+  * actually writes 2 MRFs.
+  */
+@@ -1165,6 +1207,10 @@ fs_instruction_scheduler::calculate_deps()
+       if (is_scheduling_barrier(inst))
+          add_barrier_deps(n);
+ 
++      if (inst->opcode == BRW_OPCODE_HALT ||
++          inst->opcode == SHADER_OPCODE_HALT_TARGET)
++          add_cross_lane_deps(n);
++
+       /* read-after-write deps. */
+       for (int i = 0; i < inst->sources; i++) {
+          if (inst->src[i].file == VGRF) {

Modified: PKGBUILD
===================================================================
--- PKGBUILD    2023-04-03 19:58:21 UTC (rev 1434419)
+++ PKGBUILD    2023-04-03 20:22:14 UTC (rev 1434420)
@@ -7,7 +7,7 @@
 pkgname=('lib32-vulkan-mesa-layers' 'lib32-opencl-mesa' 'lib32-vulkan-intel' 
'lib32-vulkan-radeon' 'lib32-vulkan-virtio' 'lib32-libva-mesa-driver' 
'lib32-mesa-vdpau' 'lib32-mesa')
 pkgdesc="An open-source implementation of the OpenGL specification (32-bit)"
 pkgver=23.0.1
-pkgrel=1
+pkgrel=2
 arch=('x86_64')
 makedepends=('python-mako' 'lib32-libxml2' 'lib32-expat' 'lib32-libx11' 
'xorgproto' 'lib32-libdrm'
              'lib32-libxshmfence' 'lib32-libxxf86vm' 'lib32-libxdamage' 
'lib32-libvdpau'
@@ -20,16 +20,19 @@
 source=(https://mesa.freedesktop.org/archive/mesa-${pkgver}.tar.xz{,.sig}
         0001-iris-Retry-DRM_IOCTL_I915_GEM_EXECBUFFER2-on-ENOMEM.patch
         0002-Revert-iris-Avoid-abort-if-kernel-can-t-allocate-mem.patch
+        0003-intel-fs-fix-scheduling-of-HALT-instructions.patch
         LICENSE)
 sha256sums=('e8e586856b55893abae9bdcdb98b41c081d909bb1faf372e6e7262307bf34adf'
             'SKIP'
             '99264c77d63d6fa810e295914808cde9f580a64e913e99fa794c1aa25a4f8fb2'
             'd6ef8fb1809e8aeae0ec32bfe916adb770c64880bfd3d0f4472a616c9f356a9a'
+            'dc6790b5be0e80c23e74ae18ca1a2b40f57f4211cc7b645bf22b63af3b790e40'
             '7052ba73bb07ea78873a2431ee4e828f4e72bda7d176d07f770fa48373dec537')
 
b2sums=('50d358e393037381d0d848f868ac3439b0851809c3533432dc428bd77e81bc71bbfd2b598e221b6e8c4c2528ef32e5624aec4fe2e552e01ee98abbcf96a1f5b7'
         'SKIP'
         
'a90bfc47fb3a46eff1ef2455c7aa18c2bb515ec217b423d0a87cc5f3b824a77c0381e1378498464418644108142022dcd3c289e157877c6ae7584beaec1d9987'
         
'bd52994305fc0fa2f12c46ea3208bbb24f97495d9bad73120d83a6cdcf7e48f5ff0d14ac0055765516b70caacdf024fca4159b70b054e85f2783c78c9218aefe'
+        
'37d1d070c45c85bce8abe3524a3f8d9ac9ed6326a3eec653cd89fffce3630b08eb9b96b11aeb495488230449c99f9b508f73a15e53265d2b159286b0e2dda7cc'
         
'1ecf007b82260710a7bf5048f47dd5d600c168824c02c595af654632326536a6527fbe0738670ee7b921dd85a70425108e0f471ba85a8e1ca47d294ad74b4adb')
 validpgpkeys=('8703B6700E7EE06D7A39B8D6EDAE37B02CEB490D'  # Emil Velikov 
<[email protected]>
               '946D09B5E4C9845E63075FF1D961C596A7203456'  # Andres Gomez 
<[email protected]>
@@ -45,6 +48,10 @@
   # https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20449
   patch -Np1 -i 
../0001-iris-Retry-DRM_IOCTL_I915_GEM_EXECBUFFER2-on-ENOMEM.patch
   patch -Np1 -i 
../0002-Revert-iris-Avoid-abort-if-kernel-can-t-allocate-mem.patch
+
+  # https://gitlab.freedesktop.org/mesa/mesa/-/issues/7110
+  # https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20765
+  patch -Np1 -i ../0003-intel-fs-fix-scheduling-of-HALT-instructions.patch
 }
 
 build() {

Reply via email to