Module: Mesa
Branch: staging/20.0
Commit: 1e598bf8e0a1e94fa87235b02c58b93996ea93f4
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=1e598bf8e0a1e94fa87235b02c58b93996ea93f4

Author: Timur Kristóf <[email protected]>
Date:   Wed Mar 11 13:39:46 2020 +0100

radv/llvm: fix subgroup shuffle for chips without bpermute

bpermute only exists on GFX8+ and only with Wave32 on GFX10. Instead
we have to use readlane with a waterfall loop to defeat the LLVM
backend.

This fixes DOOM Eternal which requires subgroup shuffle.

Cc: <[email protected]>
Signed-off-by: Samuel Pitoiset <[email protected]>
Reviewed-by: Bas Nieuwenhuizen <[email protected]>
Tested-by: Marge Bot 
<https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4284>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4284>
(cherry picked from commit 7ac8bb33cd6025f805a390e7647506e932f4db0d)

Squashed with:

radv: Enable subgroup shuffle on GFX10 when ACO is used.

Signed-off-by: Timur Kristóf <[email protected]>
Reviewed-by: Samuel Pitoiset <[email protected]>
Tested-by: Marge Bot 
<https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4159>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4159>
(cherry picked from commit cfa299eadb21893348c60906dfde8feb175c7f14)

---

 .pick_status.json             |  4 ++--
 src/amd/llvm/ac_nir_to_llvm.c | 29 +++++++++++++++++++++++++++--
 src/amd/vulkan/radv_device.c  |  5 +++--
 3 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index 5e2cecd5210..fdcfddb2cbc 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -1480,7 +1480,7 @@
         "description": "radv/llvm: fix subgroup shuffle for chips without 
bpermute",
         "nominated": true,
         "nomination_type": 0,
-        "resolution": 0,
+        "resolution": 1,
         "master_sha": null,
         "because_sha": null
     },
@@ -4054,7 +4054,7 @@
         "description": "radv: Enable subgroup shuffle on GFX10 when ACO is 
used.",
         "nominated": false,
         "nomination_type": null,
-        "resolution": 4,
+        "resolution": 1,
         "master_sha": null,
         "because_sha": null
     },
diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c
index c609384948f..5bc8291a2ee 100644
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@@ -3868,8 +3868,33 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
                break;
        }
        case nir_intrinsic_shuffle:
-               result = ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]),
-                               get_src(ctx, instr->src[1]));
+               if (ctx->ac.chip_class == GFX8 ||
+                   ctx->ac.chip_class == GFX9 ||
+                   (ctx->ac.chip_class == GFX10 && ctx->ac.wave_size == 32)) {
+                       result = ac_build_shuffle(&ctx->ac, get_src(ctx, 
instr->src[0]),
+                                                 get_src(ctx, instr->src[1]));
+               } else {
+                       LLVMValueRef src = get_src(ctx, instr->src[0]);
+                       LLVMValueRef index = get_src(ctx, instr->src[1]);
+                       LLVMTypeRef type = LLVMTypeOf(src);
+                       struct waterfall_context wctx;
+                       LLVMValueRef index_val;
+
+                       index_val = enter_waterfall(ctx, &wctx, index, true);
+
+                       src = LLVMBuildZExt(ctx->ac.builder, src,
+                                           ctx->ac.i32, "");
+
+                       result = ac_build_intrinsic(&ctx->ac, 
"llvm.amdgcn.readlane",
+                                                   ctx->ac.i32,
+                                                   (LLVMValueRef []) { src, 
index_val }, 2,
+                                                   AC_FUNC_ATTR_READNONE |
+                                                   AC_FUNC_ATTR_CONVERGENT);
+
+                       result = LLVMBuildTrunc(ctx->ac.builder, result, type, 
"");
+
+                       result = exit_waterfall(ctx, &wctx, result);
+               }
                break;
        case nir_intrinsic_reduce:
                result = ac_build_reduce(&ctx->ac,
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 0a2e3836eef..25ec15d297e 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -1449,8 +1449,9 @@ radv_get_physical_device_properties_1_1(struct 
radv_physical_device *pdevice,
                                         VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
                                         VK_SUBGROUP_FEATURE_QUAD_BIT;
 
-       if (pdevice->rad_info.chip_class == GFX8 ||
-           pdevice->rad_info.chip_class == GFX9) {
+       if (((pdevice->rad_info.chip_class == GFX6 ||
+             pdevice->rad_info.chip_class == GFX7) && !pdevice->use_aco) ||
+           pdevice->rad_info.chip_class >= GFX8) {
                p->subgroupSupportedOperations |= 
VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
                                                  
VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT;
        }

_______________________________________________
mesa-commit mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to