hliao added a comment.

The code could be simply converted to a kernel one following the same pattern:

  struct S {
          float *p;
          float a[64];
          int n;
  };
  
  __global__ void kernel(S s) {
          *s.p = s.a[s.n];
  }

Here's the LLVM IR after frontend

  define protected amdgpu_kernel void @_Z6kernel1S(%struct.S.coerce %0) #2 {
    %2 = alloca %struct.S, align 8, addrspace(5)
    %3 = addrspacecast %struct.S addrspace(5)* %2 to %struct.S*
    %4 = bitcast %struct.S* %3 to %struct.S.coerce*
    %5 = getelementptr inbounds %struct.S.coerce, %struct.S.coerce* %4, i32 0, 
i32 0
    %6 = extractvalue %struct.S.coerce %0, 0
    store float addrspace(1)* %6, float addrspace(1)** %5, align 8
    %7 = getelementptr inbounds %struct.S.coerce, %struct.S.coerce* %4, i32 0, 
i32 1
    %8 = extractvalue %struct.S.coerce %0, 1
    store [64 x float] %8, [64 x float]* %7, align 8
    %9 = getelementptr inbounds %struct.S.coerce, %struct.S.coerce* %4, i32 0, 
i32 2
    %10 = extractvalue %struct.S.coerce %0, 2
    store i32 %10, i32* %9, align 8
    %11 = getelementptr inbounds %struct.S, %struct.S* %3, i32 0, i32 1
    %12 = getelementptr inbounds %struct.S, %struct.S* %3, i32 0, i32 2
    %13 = load i32, i32* %12, align 8, !tbaa !12
    %14 = sext i32 %13 to i64
    %15 = getelementptr inbounds [64 x float], [64 x float]* %11, i64 0, i64 %14
    %16 = load float, float* %15, align 4, !tbaa !14
    %17 = getelementptr inbounds %struct.S, %struct.S* %3, i32 0, i32 0
    %18 = load float*, float** %17, align 8, !tbaa !16
    store float %16, float* %18, align 4, !tbaa !14
    ret void
  }

and here's the optimized IR before codegen

  target datalayout = 
"e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7"
  target triple = "amdgcn-amd-amdhsa"
  
  %struct.S.coerce = type { float addrspace(1)*, [64 x float], i32 }
  %struct.S = type { float*, [64 x float], i32 }
  
  ; Function Attrs: nofree norecurse nounwind writeonly
  define protected amdgpu_kernel void @_Z6kernel1S(%struct.S.coerce %0) 
local_unnamed_addr #0 {
    %2 = alloca %struct.S, align 8, addrspace(5)
    %3 = bitcast %struct.S addrspace(5)* %2 to float addrspace(1)* addrspace(5)*
    %4 = extractvalue %struct.S.coerce %0, 0
    store float addrspace(1)* %4, float addrspace(1)* addrspace(5)* %3, align 8
    %5 = extractvalue %struct.S.coerce %0, 1
    %6 = extractvalue [64 x float] %5, 0
    %7 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 0
    store float %6, float addrspace(5)* %7, align 8
    %8 = extractvalue [64 x float] %5, 1
    %9 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 1
    store float %8, float addrspace(5)* %9, align 4
    %10 = extractvalue [64 x float] %5, 2
    %11 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 2
    store float %10, float addrspace(5)* %11, align 8
    %12 = extractvalue [64 x float] %5, 3
    %13 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 3
    store float %12, float addrspace(5)* %13, align 4
    %14 = extractvalue [64 x float] %5, 4
    %15 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 4
    store float %14, float addrspace(5)* %15, align 8
    %16 = extractvalue [64 x float] %5, 5
    %17 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 5
    store float %16, float addrspace(5)* %17, align 4
    %18 = extractvalue [64 x float] %5, 6
    %19 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 6
    store float %18, float addrspace(5)* %19, align 8
    %20 = extractvalue [64 x float] %5, 7
    %21 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 7
    store float %20, float addrspace(5)* %21, align 4
    %22 = extractvalue [64 x float] %5, 8
    %23 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 8
    store float %22, float addrspace(5)* %23, align 8
    %24 = extractvalue [64 x float] %5, 9
    %25 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 9
    store float %24, float addrspace(5)* %25, align 4
    %26 = extractvalue [64 x float] %5, 10
    %27 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 10
    store float %26, float addrspace(5)* %27, align 8
    %28 = extractvalue [64 x float] %5, 11
    %29 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 11
    store float %28, float addrspace(5)* %29, align 4
    %30 = extractvalue [64 x float] %5, 12
    %31 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 12
    store float %30, float addrspace(5)* %31, align 8
    %32 = extractvalue [64 x float] %5, 13
    %33 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 13
    store float %32, float addrspace(5)* %33, align 4
    %34 = extractvalue [64 x float] %5, 14
    %35 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 14
    store float %34, float addrspace(5)* %35, align 8
    %36 = extractvalue [64 x float] %5, 15
    %37 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 15
    store float %36, float addrspace(5)* %37, align 4
    %38 = extractvalue [64 x float] %5, 16
    %39 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 16
    store float %38, float addrspace(5)* %39, align 8
    %40 = extractvalue [64 x float] %5, 17
    %41 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 17
    store float %40, float addrspace(5)* %41, align 4
    %42 = extractvalue [64 x float] %5, 18
    %43 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 18
    store float %42, float addrspace(5)* %43, align 8
    %44 = extractvalue [64 x float] %5, 19
    %45 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 19
    store float %44, float addrspace(5)* %45, align 4
    %46 = extractvalue [64 x float] %5, 20
    %47 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 20
    store float %46, float addrspace(5)* %47, align 8
    %48 = extractvalue [64 x float] %5, 21
    %49 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 21
    store float %48, float addrspace(5)* %49, align 4
    %50 = extractvalue [64 x float] %5, 22
    %51 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 22
    store float %50, float addrspace(5)* %51, align 8
    %52 = extractvalue [64 x float] %5, 23
    %53 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 23
    store float %52, float addrspace(5)* %53, align 4
    %54 = extractvalue [64 x float] %5, 24
    %55 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 24
    store float %54, float addrspace(5)* %55, align 8
    %56 = extractvalue [64 x float] %5, 25
    %57 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 25
    store float %56, float addrspace(5)* %57, align 4
    %58 = extractvalue [64 x float] %5, 26
    %59 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 26
    store float %58, float addrspace(5)* %59, align 8
    %60 = extractvalue [64 x float] %5, 27
    %61 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 27
    store float %60, float addrspace(5)* %61, align 4
    %62 = extractvalue [64 x float] %5, 28
    %63 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 28
    store float %62, float addrspace(5)* %63, align 8
    %64 = extractvalue [64 x float] %5, 29
    %65 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 29
    store float %64, float addrspace(5)* %65, align 4
    %66 = extractvalue [64 x float] %5, 30
    %67 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 30
    store float %66, float addrspace(5)* %67, align 8
    %68 = extractvalue [64 x float] %5, 31
    %69 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 31
    store float %68, float addrspace(5)* %69, align 4
    %70 = extractvalue [64 x float] %5, 32
    %71 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 32
    store float %70, float addrspace(5)* %71, align 8
    %72 = extractvalue [64 x float] %5, 33
    %73 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 33
    store float %72, float addrspace(5)* %73, align 4
    %74 = extractvalue [64 x float] %5, 34
    %75 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 34
    store float %74, float addrspace(5)* %75, align 8
    %76 = extractvalue [64 x float] %5, 35
    %77 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 35
    store float %76, float addrspace(5)* %77, align 4
    %78 = extractvalue [64 x float] %5, 36
    %79 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 36
    store float %78, float addrspace(5)* %79, align 8
    %80 = extractvalue [64 x float] %5, 37
    %81 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 37
    store float %80, float addrspace(5)* %81, align 4
    %82 = extractvalue [64 x float] %5, 38
    %83 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 38
    store float %82, float addrspace(5)* %83, align 8
    %84 = extractvalue [64 x float] %5, 39
    %85 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 39
    store float %84, float addrspace(5)* %85, align 4
    %86 = extractvalue [64 x float] %5, 40
    %87 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 40
    store float %86, float addrspace(5)* %87, align 8
    %88 = extractvalue [64 x float] %5, 41
    %89 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 41
    store float %88, float addrspace(5)* %89, align 4
    %90 = extractvalue [64 x float] %5, 42
    %91 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 42
    store float %90, float addrspace(5)* %91, align 8
    %92 = extractvalue [64 x float] %5, 43
    %93 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 43
    store float %92, float addrspace(5)* %93, align 4
    %94 = extractvalue [64 x float] %5, 44
    %95 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 44
    store float %94, float addrspace(5)* %95, align 8
    %96 = extractvalue [64 x float] %5, 45
    %97 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 45
    store float %96, float addrspace(5)* %97, align 4
    %98 = extractvalue [64 x float] %5, 46
    %99 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 46
    store float %98, float addrspace(5)* %99, align 8
    %100 = extractvalue [64 x float] %5, 47
    %101 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 47
    store float %100, float addrspace(5)* %101, align 4
    %102 = extractvalue [64 x float] %5, 48
    %103 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 48
    store float %102, float addrspace(5)* %103, align 8
    %104 = extractvalue [64 x float] %5, 49
    %105 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 49
    store float %104, float addrspace(5)* %105, align 4
    %106 = extractvalue [64 x float] %5, 50
    %107 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 50
    store float %106, float addrspace(5)* %107, align 8
    %108 = extractvalue [64 x float] %5, 51
    %109 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 51
    store float %108, float addrspace(5)* %109, align 4
    %110 = extractvalue [64 x float] %5, 52
    %111 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 52
    store float %110, float addrspace(5)* %111, align 8
    %112 = extractvalue [64 x float] %5, 53
    %113 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 53
    store float %112, float addrspace(5)* %113, align 4
    %114 = extractvalue [64 x float] %5, 54
    %115 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 54
    store float %114, float addrspace(5)* %115, align 8
    %116 = extractvalue [64 x float] %5, 55
    %117 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 55
    store float %116, float addrspace(5)* %117, align 4
    %118 = extractvalue [64 x float] %5, 56
    %119 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 56
    store float %118, float addrspace(5)* %119, align 8
    %120 = extractvalue [64 x float] %5, 57
    %121 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 57
    store float %120, float addrspace(5)* %121, align 4
    %122 = extractvalue [64 x float] %5, 58
    %123 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 58
    store float %122, float addrspace(5)* %123, align 8
    %124 = extractvalue [64 x float] %5, 59
    %125 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 59
    store float %124, float addrspace(5)* %125, align 4
    %126 = extractvalue [64 x float] %5, 60
    %127 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 60
    store float %126, float addrspace(5)* %127, align 8
    %128 = extractvalue [64 x float] %5, 61
    %129 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 61
    store float %128, float addrspace(5)* %129, align 4
    %130 = extractvalue [64 x float] %5, 62
    %131 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 62
    store float %130, float addrspace(5)* %131, align 8
    %132 = extractvalue [64 x float] %5, 63
    %133 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 63
    store float %132, float addrspace(5)* %133, align 4
    %134 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 2
    %135 = extractvalue %struct.S.coerce %0, 2
    store i32 %135, i32 addrspace(5)* %134, align 8
    %136 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, 
i32 1, i32 %135
    %137 = bitcast float addrspace(5)* %136 to i32 addrspace(5)*
    %138 = load i32, i32 addrspace(5)* %137, align 4, !tbaa !4
    %139 = bitcast %struct.S addrspace(5)* %2 to i32* addrspace(5)*
    %140 = load i32*, i32* addrspace(5)* %139, align 8, !tbaa !8
    store i32 %138, i32* %140, align 4, !tbaa !4
    ret void
  }

and here's the optimized after this patch, the `alloca` is eliminated.

  target datalayout = 
"e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7"
  target triple = "amdgcn-amd-amdhsa"
  
  %struct.S = type { float*, [64 x float], i32 }
  
  ; Function Attrs: nofree norecurse nounwind writeonly
  define protected amdgpu_kernel void @_Z6kernel1S(%struct.S addrspace(4)* 
nocapture readonly byref(%struct.S) align 8 %0) local_unnamed_addr #0 {
    %2 = getelementptr %struct.S, %struct.S addrspace(4)* %0, i64 0, i32 2
    %3 = load i32, i32 addrspace(4)* %2, align 8, !tbaa !5
    %4 = sext i32 %3 to i64
    %5 = getelementptr %struct.S, %struct.S addrspace(4)* %0, i64 0, i32 1, i64 
%4
    %6 = load float, float addrspace(4)* %5, align 4, !tbaa !11
    %7 = getelementptr %struct.S, %struct.S addrspace(4)* %0, i64 0, i32 0
    %8 = load float*, float* addrspace(4)* %7, align 8, !tbaa !13
    store float %6, float* %8, align 4, !tbaa !11
    ret void
  }



In D89980#2371290 <https://reviews.llvm.org/D89980#2371290>, @arsenm wrote:

> In D89980#2371270 <https://reviews.llvm.org/D89980#2371270>, @hliao wrote:
>
>> In D89980#2368506 <https://reviews.llvm.org/D89980#2368506>, @arsenm wrote:
>>
>>> I think this is a dead end approach. I don't see the connection to the 
>>> original problem you are trying to solve. Can you send me an IR testcase 
>>> that this is supposed to help?
>>
>> That's probably commonly known. If we pass an aggregate parameter directly 
>> by value and dynamically index it late, that `alloca` cannot be promoted as 
>> that aggregate value in LLVM IR cannot be dynamically indexed. For example,
>>
>>   struct S {
>>      int a[100];
>>      int n;
>>   };
>>   
>>   int foo(S s) {
>>     return s.a[s.n];
>>   }
>
> This example is not a kernel
>
>> If the underlying ABI chooses to pass `s` directly by value, we have the 
>> following pseudo IR.
>>
>>   %s = alloca S
>>   ; store `s` value into %s as the parameter is treated as a local variable 
>> by filling its initial value from LLVM IR parameter.
>>   ...
>>   ; regular parameter access through %s with dynamic indices
>>
>> that `store` from the parameter from LLVM IR is an aggregate value store. 
>> Later, when %s is to be promoted, as it's once dynamically indexed, we 
>> cannot promote it as dynamic index on aggregate values is not representable 
>> in LLVM IR.
>>
>> In contrast, if a parameter is passed by value indirectly, that `store` is 
>> replaced with a `memcpy`. It's straightforward to promote '%s' as they are 
>> all memory operands of the same layout.
>>
>> If you need detailed IR, I may post here for your reference.
>
> I need an actual source and IR example.  I think you are describing the 
> missing promotion of pointers inside byref arguments. We need better 
> promotion here, not eliminate it. It needs to cast the byref pointer, or cast 
> the pointers inside the struct when accessed




Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D89980/new/

https://reviews.llvm.org/D89980

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to