Re: [Mesa-dev] [PATCH] radv: add a workaround for DXVK hangs by setting amdgpu-skip-threshold

2018-06-08 Thread Samuel Pitoiset



On 06/08/2018 12:16 PM, Grazvydas Ignotas wrote:

I haven't tried it, but maybe calling LLVMParseCommandLineOptions()
second time would work (after creating the target machine, through
call_once) to avoid all this code duplication?


Maybe, yes.



Gražvydas

On Fri, Jun 8, 2018 at 12:38 PM, Samuel Pitoiset
 wrote:

Workaround for bug in llvm that causes the GPU to hang in presence
of nested loops because there is an exec mask issue. The proper
solution is to fix LLVM but this might require a bunch of work.

This fixes a bunch of GPU hangs that happen with DXVK.

Vega10:
Totals from affected shaders:
SGPRS: 110456 -> 110456 (0.00 %)
VGPRS: 122800 -> 122800 (0.00 %)
Spilled SGPRs: 7478 -> 7478 (0.00 %)
Spilled VGPRs: 36 -> 36 (0.00 %)
Code Size: 9901104 -> 9922928 (0.22 %) bytes
Max Waves: 7143 -> 7143 (0.00 %)

Code size slightly increases because it inserts more branch
instructions but that's expected. I don't see any real performance
changes.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105613
Cc: mesa-sta...@lists.freedesktop.org
Signed-off-by: Samuel Pitoiset 
---
  src/amd/vulkan/radv_shader.c | 79 +++-
  1 file changed, 78 insertions(+), 1 deletion(-)

diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 68f2a55e81..76790a1904 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -36,6 +36,7 @@

  #include 
  #include 
+#include 

  #include "sid.h"
  #include "gfx9d.h"
@@ -467,6 +468,82 @@ radv_fill_shader_variant(struct radv_device *device,
 memcpy(ptr, binary->code, binary->code_size);
  }

+static void radv_init_llvm_target()
+{
+   LLVMInitializeAMDGPUTargetInfo();
+   LLVMInitializeAMDGPUTarget();
+   LLVMInitializeAMDGPUTargetMC();
+   LLVMInitializeAMDGPUAsmPrinter();
+
+   /* For inline assembly. */
+   LLVMInitializeAMDGPUAsmParser();
+
+   /* Workaround for bug in llvm 4.0 that causes image intrinsics
+* to disappear.
+* https://reviews.llvm.org/D26348
+*
+* Workaround for bug in llvm that causes the GPU to hang in presence
+* of nested loops because there is an exec mask issue. The proper
+* solution is to fix LLVM but this might require a bunch of work.
+* https://bugs.llvm.org/show_bug.cgi?id=37744
+*
+* "mesa" is the prefix for error messages.
+*/
+   const char *argv[3] = { "mesa", "-simplifycfg-sink-common=false",
+   "-amdgpu-skip-threshold=1" };
+   LLVMParseCommandLineOptions(3, argv, NULL);
+}
+
+static once_flag radv_init_llvm_target_once_flag = ONCE_FLAG_INIT;
+
+static LLVMTargetRef radv_get_llvm_target(const char *triple)
+{
+   LLVMTargetRef target = NULL;
+   char *err_message = NULL;
+
+   call_once(_init_llvm_target_once_flag, radv_init_llvm_target);
+
+   if (LLVMGetTargetFromTriple(triple, , _message)) {
+   fprintf(stderr, "Cannot find target for triple %s ", triple);
+   if (err_message) {
+   fprintf(stderr, "%s\n", err_message);
+   }
+   LLVMDisposeMessage(err_message);
+   return NULL;
+   }
+   return target;
+}
+
+static LLVMTargetMachineRef radv_create_target_machine(enum radeon_family 
family,
+  enum 
ac_target_machine_options tm_options,
+  const char **out_triple)
+{
+   assert(family >= CHIP_TAHITI);
+   char features[256];
+   const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? "amdgcn-mesa-mesa3d" : 
"amdgcn--";
+   LLVMTargetRef target = radv_get_llvm_target(triple);
+
+   snprintf(features, sizeof(features),
+
"+DumpCode,+vgpr-spilling,-fp32-denormals,+fp64-denormals%s%s%s%s",
+tm_options & AC_TM_SISCHED ? ",+si-scheduler" : "",
+tm_options & AC_TM_FORCE_ENABLE_XNACK ? ",+xnack" : "",
+tm_options & AC_TM_FORCE_DISABLE_XNACK ? ",-xnack" : "",
+tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? ",-promote-alloca" : 
"");
+
+   LLVMTargetMachineRef tm = LLVMCreateTargetMachine(
+target,
+triple,
+ac_get_llvm_processor_name(family),
+features,
+LLVMCodeGenLevelDefault,
+LLVMRelocDefault,
+LLVMCodeModelDefault);
+
+   if (out_triple)
+   *out_triple = triple;
+   return tm;
+}
+
  static struct radv_shader_variant *
  shader_variant_create(struct radv_device *device,
   struct radv_shader_module *module,
@@ -501,7 +578,7 @@ shader_variant_create(struct radv_device *device,
 tm_options |= 

Re: [Mesa-dev] [PATCH] radv: add a workaround for DXVK hangs by setting amdgpu-skip-threshold

2018-06-08 Thread Grazvydas Ignotas
I haven't tried it, but maybe calling LLVMParseCommandLineOptions()
second time would work (after creating the target machine, through
call_once) to avoid all this code duplication?

Gražvydas

On Fri, Jun 8, 2018 at 12:38 PM, Samuel Pitoiset
 wrote:
> Workaround for bug in llvm that causes the GPU to hang in presence
> of nested loops because there is an exec mask issue. The proper
> solution is to fix LLVM but this might require a bunch of work.
>
> This fixes a bunch of GPU hangs that happen with DXVK.
>
> Vega10:
> Totals from affected shaders:
> SGPRS: 110456 -> 110456 (0.00 %)
> VGPRS: 122800 -> 122800 (0.00 %)
> Spilled SGPRs: 7478 -> 7478 (0.00 %)
> Spilled VGPRs: 36 -> 36 (0.00 %)
> Code Size: 9901104 -> 9922928 (0.22 %) bytes
> Max Waves: 7143 -> 7143 (0.00 %)
>
> Code size slightly increases because it inserts more branch
> instructions but that's expected. I don't see any real performance
> changes.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105613
> Cc: mesa-sta...@lists.freedesktop.org
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_shader.c | 79 +++-
>  1 file changed, 78 insertions(+), 1 deletion(-)
>
> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
> index 68f2a55e81..76790a1904 100644
> --- a/src/amd/vulkan/radv_shader.c
> +++ b/src/amd/vulkan/radv_shader.c
> @@ -36,6 +36,7 @@
>
>  #include 
>  #include 
> +#include 
>
>  #include "sid.h"
>  #include "gfx9d.h"
> @@ -467,6 +468,82 @@ radv_fill_shader_variant(struct radv_device *device,
> memcpy(ptr, binary->code, binary->code_size);
>  }
>
> +static void radv_init_llvm_target()
> +{
> +   LLVMInitializeAMDGPUTargetInfo();
> +   LLVMInitializeAMDGPUTarget();
> +   LLVMInitializeAMDGPUTargetMC();
> +   LLVMInitializeAMDGPUAsmPrinter();
> +
> +   /* For inline assembly. */
> +   LLVMInitializeAMDGPUAsmParser();
> +
> +   /* Workaround for bug in llvm 4.0 that causes image intrinsics
> +* to disappear.
> +* https://reviews.llvm.org/D26348
> +*
> +* Workaround for bug in llvm that causes the GPU to hang in presence
> +* of nested loops because there is an exec mask issue. The proper
> +* solution is to fix LLVM but this might require a bunch of work.
> +* https://bugs.llvm.org/show_bug.cgi?id=37744
> +*
> +* "mesa" is the prefix for error messages.
> +*/
> +   const char *argv[3] = { "mesa", "-simplifycfg-sink-common=false",
> +   "-amdgpu-skip-threshold=1" };
> +   LLVMParseCommandLineOptions(3, argv, NULL);
> +}
> +
> +static once_flag radv_init_llvm_target_once_flag = ONCE_FLAG_INIT;
> +
> +static LLVMTargetRef radv_get_llvm_target(const char *triple)
> +{
> +   LLVMTargetRef target = NULL;
> +   char *err_message = NULL;
> +
> +   call_once(_init_llvm_target_once_flag, radv_init_llvm_target);
> +
> +   if (LLVMGetTargetFromTriple(triple, , _message)) {
> +   fprintf(stderr, "Cannot find target for triple %s ", triple);
> +   if (err_message) {
> +   fprintf(stderr, "%s\n", err_message);
> +   }
> +   LLVMDisposeMessage(err_message);
> +   return NULL;
> +   }
> +   return target;
> +}
> +
> +static LLVMTargetMachineRef radv_create_target_machine(enum radeon_family 
> family,
> +  enum 
> ac_target_machine_options tm_options,
> +  const char 
> **out_triple)
> +{
> +   assert(family >= CHIP_TAHITI);
> +   char features[256];
> +   const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? 
> "amdgcn-mesa-mesa3d" : "amdgcn--";
> +   LLVMTargetRef target = radv_get_llvm_target(triple);
> +
> +   snprintf(features, sizeof(features),
> +
> "+DumpCode,+vgpr-spilling,-fp32-denormals,+fp64-denormals%s%s%s%s",
> +tm_options & AC_TM_SISCHED ? ",+si-scheduler" : "",
> +tm_options & AC_TM_FORCE_ENABLE_XNACK ? ",+xnack" : "",
> +tm_options & AC_TM_FORCE_DISABLE_XNACK ? ",-xnack" : "",
> +tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? 
> ",-promote-alloca" : "");
> +
> +   LLVMTargetMachineRef tm = LLVMCreateTargetMachine(
> +target,
> +triple,
> +ac_get_llvm_processor_name(family),
> +features,
> +LLVMCodeGenLevelDefault,
> +LLVMRelocDefault,
> +LLVMCodeModelDefault);
> +
> +   if (out_triple)
> +   *out_triple = triple;
> +   return tm;
> +}
> +
>  static struct radv_shader_variant *
>  shader_variant_create(struct radv_device *device,
>  

[Mesa-dev] [PATCH] radv: add a workaround for DXVK hangs by setting amdgpu-skip-threshold

2018-06-08 Thread Samuel Pitoiset
Workaround for bug in llvm that causes the GPU to hang in presence
of nested loops because there is an exec mask issue. The proper
solution is to fix LLVM but this might require a bunch of work.

This fixes a bunch of GPU hangs that happen with DXVK.

Vega10:
Totals from affected shaders:
SGPRS: 110456 -> 110456 (0.00 %)
VGPRS: 122800 -> 122800 (0.00 %)
Spilled SGPRs: 7478 -> 7478 (0.00 %)
Spilled VGPRs: 36 -> 36 (0.00 %)
Code Size: 9901104 -> 9922928 (0.22 %) bytes
Max Waves: 7143 -> 7143 (0.00 %)

Code size slightly increases because it inserts more branch
instructions but that's expected. I don't see any real performance
changes.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105613
Cc: mesa-sta...@lists.freedesktop.org
Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_shader.c | 79 +++-
 1 file changed, 78 insertions(+), 1 deletion(-)

diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 68f2a55e81..76790a1904 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -36,6 +36,7 @@
 
 #include 
 #include 
+#include 
 
 #include "sid.h"
 #include "gfx9d.h"
@@ -467,6 +468,82 @@ radv_fill_shader_variant(struct radv_device *device,
memcpy(ptr, binary->code, binary->code_size);
 }
 
+static void radv_init_llvm_target()
+{
+   LLVMInitializeAMDGPUTargetInfo();
+   LLVMInitializeAMDGPUTarget();
+   LLVMInitializeAMDGPUTargetMC();
+   LLVMInitializeAMDGPUAsmPrinter();
+
+   /* For inline assembly. */
+   LLVMInitializeAMDGPUAsmParser();
+
+   /* Workaround for bug in llvm 4.0 that causes image intrinsics
+* to disappear.
+* https://reviews.llvm.org/D26348
+*
+* Workaround for bug in llvm that causes the GPU to hang in presence
+* of nested loops because there is an exec mask issue. The proper
+* solution is to fix LLVM but this might require a bunch of work.
+* https://bugs.llvm.org/show_bug.cgi?id=37744
+*
+* "mesa" is the prefix for error messages.
+*/
+   const char *argv[3] = { "mesa", "-simplifycfg-sink-common=false",
+   "-amdgpu-skip-threshold=1" };
+   LLVMParseCommandLineOptions(3, argv, NULL);
+}
+
+static once_flag radv_init_llvm_target_once_flag = ONCE_FLAG_INIT;
+
+static LLVMTargetRef radv_get_llvm_target(const char *triple)
+{
+   LLVMTargetRef target = NULL;
+   char *err_message = NULL;
+
+   call_once(_init_llvm_target_once_flag, radv_init_llvm_target);
+
+   if (LLVMGetTargetFromTriple(triple, , _message)) {
+   fprintf(stderr, "Cannot find target for triple %s ", triple);
+   if (err_message) {
+   fprintf(stderr, "%s\n", err_message);
+   }
+   LLVMDisposeMessage(err_message);
+   return NULL;
+   }
+   return target;
+}
+
+static LLVMTargetMachineRef radv_create_target_machine(enum radeon_family 
family,
+  enum 
ac_target_machine_options tm_options,
+  const char **out_triple)
+{
+   assert(family >= CHIP_TAHITI);
+   char features[256];
+   const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? 
"amdgcn-mesa-mesa3d" : "amdgcn--";
+   LLVMTargetRef target = radv_get_llvm_target(triple);
+
+   snprintf(features, sizeof(features),
+
"+DumpCode,+vgpr-spilling,-fp32-denormals,+fp64-denormals%s%s%s%s",
+tm_options & AC_TM_SISCHED ? ",+si-scheduler" : "",
+tm_options & AC_TM_FORCE_ENABLE_XNACK ? ",+xnack" : "",
+tm_options & AC_TM_FORCE_DISABLE_XNACK ? ",-xnack" : "",
+tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? 
",-promote-alloca" : "");
+
+   LLVMTargetMachineRef tm = LLVMCreateTargetMachine(
+target,
+triple,
+ac_get_llvm_processor_name(family),
+features,
+LLVMCodeGenLevelDefault,
+LLVMRelocDefault,
+LLVMCodeModelDefault);
+
+   if (out_triple)
+   *out_triple = triple;
+   return tm;
+}
+
 static struct radv_shader_variant *
 shader_variant_create(struct radv_device *device,
  struct radv_shader_module *module,
@@ -501,7 +578,7 @@ shader_variant_create(struct radv_device *device,
tm_options |= AC_TM_SUPPORTS_SPILL;
if (device->instance->perftest_flags & RADV_PERFTEST_SISCHED)
tm_options |= AC_TM_SISCHED;
-   tm = ac_create_target_machine(chip_family, tm_options, NULL);
+   tm = radv_create_target_machine(chip_family, tm_options, NULL);
 
if (gs_copy_shader) {