Module: Mesa
Branch: 19.2
Commit: 5c98b3657791d0786abc60ff55f5aa755d8f82ce
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=5c98b3657791d0786abc60ff55f5aa755d8f82ce

Author: Samuel Pitoiset <[email protected]>
Date:   Fri Aug 23 17:53:05 2019 +0200

radv/gfx10: fix implementation of exclusive scans

This implementation is loosely based on ROCm.
https://github.com/RadeonOpenCompute/ROCm-Device-Libs/blob/master/ockl/src/wfredscan.cl

This fixes dEQP-VK.subgroups.arithmetic.*.subgroupexclusive* on GFX10.

Fixes: 227c29a80de ("amd/common/gfx10: implement scan & reduce operations")
Signed-off-by: Samuel Pitoiset <[email protected]>
Reviewed-by: Bas Nieuwenhuizen <[email protected]>
(cherry picked from commit c9aa843961d2c3cb34e7cb2dc843b93d723e0692)
Conflicts resolved by Dylan Baker

---

 src/amd/common/ac_llvm_build.c | 83 +++++++++++++++++++++++++++++-------------
 1 file changed, 58 insertions(+), 25 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 5abae00d8f6..07f356a5ee2 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -4218,8 +4218,43 @@ ac_build_scan(struct ac_llvm_context *ctx, nir_op op, 
LLVMValueRef src, LLVMValu
 {
        LLVMValueRef result, tmp;
 
-       if (ctx->chip_class >= GFX10) {
-               result = inclusive ? src : identity;
+       if (inclusive) {
+               result = src;
+       } else if (ctx->chip_class >= GFX10) {
+               /* wavefront shift_right by 1 on GFX10 (emulate dpp_wf_sr1) */
+               LLVMValueRef active, tmp1, tmp2;
+               LLVMValueRef tid = ac_get_thread_id(ctx);
+
+               tmp1 = ac_build_dpp(ctx, identity, src, dpp_row_sr(1), 0xf, 
0xf, false);
+
+               tmp2 = ac_build_permlane16(ctx, src, (uint64_t)~0, true, false);
+
+               if (maxprefix > 32) {
+                       active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid,
+                                              LLVMConstInt(ctx->i32, 32, 
false), "");
+
+                       tmp2 = LLVMBuildSelect(ctx->builder, active,
+                                              ac_build_readlane(ctx, src,
+                                                                
LLVMConstInt(ctx->i32, 31, false)),
+                                              tmp2, "");
+
+                       active = LLVMBuildOr(ctx->builder, active,
+                                            LLVMBuildICmp(ctx->builder, 
LLVMIntEQ,
+                                                          
LLVMBuildAnd(ctx->builder, tid,
+                                                                       
LLVMConstInt(ctx->i32, 0x1f, false), ""),
+                                                          
LLVMConstInt(ctx->i32, 0x10, false), ""), "");
+                       src = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, 
"");
+               } else if (maxprefix > 16) {
+                       active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid,
+                                              LLVMConstInt(ctx->i32, 16, 
false), "");
+
+                       src = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, 
"");
+               }
+
+               result = src;
+       } else if (ctx->chip_class >= GFX8) {
+               src = ac_build_dpp(ctx, identity, src, dpp_wf_sr1, 0xf, 0xf, 
false);
+               result = src;
        } else {
                if (!inclusive)
                        src = ac_build_dpp(ctx, identity, src, dpp_wf_sr1, 0xf, 
0xf, false);
@@ -4249,33 +4284,31 @@ ac_build_scan(struct ac_llvm_context *ctx, nir_op op, 
LLVMValueRef src, LLVMValu
                return result;
 
        if (ctx->chip_class >= GFX10) {
-               /* dpp_row_bcast{15,31} are not supported on gfx10. */
-               LLVMBuilderRef builder = ctx->builder;
                LLVMValueRef tid = ac_get_thread_id(ctx);
-               LLVMValueRef cc;
-               /* TODO-GFX10: Can we get better code-gen by putting this into
-                * a branch so that LLVM generates EXEC mask manipulations? */
-               if (inclusive)
-                       tmp = result;
-               else
-                       tmp = ac_build_alu_op(ctx, result, src, op);
-               tmp = ac_build_permlane16(ctx, tmp, ~(uint64_t)0, true, false);
-               tmp = ac_build_alu_op(ctx, result, tmp, op);
-               cc = LLVMBuildAnd(builder, tid, LLVMConstInt(ctx->i32, 16, 
false), "");
-               cc = LLVMBuildICmp(builder, LLVMIntNE, cc, ctx->i32_0, "");
-               result = LLVMBuildSelect(builder, cc, tmp, result, "");
+               LLVMValueRef active;
+
+               tmp = ac_build_permlane16(ctx, result, ~(uint64_t)0, true, 
false);
+
+               active = LLVMBuildICmp(ctx->builder, LLVMIntNE,
+                                      LLVMBuildAnd(ctx->builder, tid,
+                                                   LLVMConstInt(ctx->i32, 16, 
false), ""),
+                                      ctx->i32_0, "");
+
+               tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
+
+               result = ac_build_alu_op(ctx, result, tmp, op);
+
                if (maxprefix <= 32)
                        return result;
 
-               if (inclusive)
-                       tmp = result;
-               else
-                       tmp = ac_build_alu_op(ctx, result, src, op);
-               tmp = ac_build_readlane(ctx, tmp, LLVMConstInt(ctx->i32, 31, 
false));
-               tmp = ac_build_alu_op(ctx, result, tmp, op);
-               cc = LLVMBuildICmp(builder, LLVMIntUGE, tid,
-                                  LLVMConstInt(ctx->i32, 32, false), "");
-               result = LLVMBuildSelect(builder, cc, tmp, result, "");
+               tmp = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 31, 
false));
+
+               active = LLVMBuildICmp(ctx->builder, LLVMIntUGE, tid,
+                                      LLVMConstInt(ctx->i32, 32, false), "");
+
+               tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
+
+               result = ac_build_alu_op(ctx, result, tmp, op);
                return result;
        }
 

_______________________________________________
mesa-commit mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to