On 18/01/18 00:34, Bas Nieuwenhuizen wrote:
Fixes: 91074bb11bda "radv/ac: Implement Float64 SSBO stores."
---
  src/amd/common/ac_nir_to_llvm.c | 46 +++++++++++++++++++++++++++--------------
  1 file changed, 31 insertions(+), 15 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 12f7772a5c..513289c838 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2418,6 +2418,36 @@ static uint32_t widen_mask(uint32_t mask, unsigned 
multiplier)
        return new_mask;
  }
+static LLVMValueRef extract_vector_range(struct ac_llvm_context *ctx, LLVMValueRef src,
+                                         unsigned start, unsigned count)
+{
+       LLVMTypeRef type = LLVMTypeOf(src);
+
+       if (LLVMGetTypeKind(type) != LLVMVectorTypeKind) {
+               assert(start == 0);
+               assert(count == 1);
+               return src;
+       }
+
+       unsigned src_elements = LLVMGetVectorSize(type);
+       assert(start < src_elements);
+       assert(start + count <= src_elements);
+
+       if (start == 0 && count == src_elements)
+               return src;
+
+       if (count == 1)
+               return LLVMBuildExtractElement(ctx->builder, src, LLVMConstInt(ctx->i32, 
start, false), "");
+
+       assert(count <= 8);
+       LLVMValueRef indices[8];
+       for (unsigned i = 0; i < count; ++i)
+               indices[i] = LLVMConstInt(ctx->i32, i, false);
+
+       LLVMValueRef swizzle = LLVMConstVector(indices, count);
+       return LLVMBuildShuffleVector(ctx->builder, src, src, swizzle, "");
+}
+
  static void visit_store_ssbo(struct ac_nir_context *ctx,
                               nir_intrinsic_instr *instr)
  {
@@ -2467,28 +2497,14 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
if (count == 4) {
                        store_name = "llvm.amdgcn.buffer.store.v4f32";
-                       data = base_data;
                } else if (count == 2) {
-                       tmp = LLVMBuildExtractElement(ctx->ac.builder,

The tmp declaration needs to be removed above all this to avoid an unused var warning.


-                                                     base_data, 
LLVMConstInt(ctx->ac.i32, start, false), "");
-                       data = LLVMBuildInsertElement(ctx->ac.builder, 
LLVMGetUndef(ctx->ac.v2f32), tmp,
-                                                     ctx->ac.i32_0, "");
-
-                       tmp = LLVMBuildExtractElement(ctx->ac.builder,
-                                                     base_data, 
LLVMConstInt(ctx->ac.i32, start + 1, false), "");
-                       data = LLVMBuildInsertElement(ctx->ac.builder, data, 
tmp,
-                                                     ctx->ac.i32_1, "");
                        store_name = "llvm.amdgcn.buffer.store.v2f32";
} else {
                        assert(count == 1);
-                       if (ac_get_llvm_num_components(base_data) > 1)
-                               data = LLVMBuildExtractElement(ctx->ac.builder, 
base_data,
-                                                              LLVMConstInt(ctx->ac.i32, 
start, false), "");
-                       else
-                               data = base_data;
                        store_name = "llvm.amdgcn.buffer.store.f32";
                }
+               data = extract_vector_range(base_data, start, count);

This needs to be:

        data = extract_vector_range(&ctx->ac, base_data, start, count);
offset = base_offset;
                if (start != 0) {

Did you have more changes locally that didn't get sent? The test still fails for me:

R600_DEBUG=nir /home/tarceri/git/piglit/bin/arb_gpu_shader_fp64-layout-std140-fp64-shader -auto -fbo

expected[6] = 10. Read value: 6.3333333333333
expected[7] = 3. Read value: 8
PIGLIT: {"result": "fail" }


Also the std430 layout test has further issues:

R600_DEBUG=nir /home/tarceri/git/piglit/bin/arb_gpu_shader_fp64-layout-std430-fp64-shader -auto -fbo

Invalid insertelement operands!
  %36 = insertelement <4 x i64> %35, i32 undef, i32 1
Invalid insertelement operands!
  %37 = insertelement <4 x i64> %36, i32 undef, i32 2
LLVM ERROR: Broken function found, compilation aborted!

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to