hliao added a comment. The code could be simply converted to a kernel one following the same pattern:
struct S { float *p; float a[64]; int n; }; __global__ void kernel(S s) { *s.p = s.a[s.n]; } Here's the LLVM IR after frontend define protected amdgpu_kernel void @_Z6kernel1S(%struct.S.coerce %0) #2 { %2 = alloca %struct.S, align 8, addrspace(5) %3 = addrspacecast %struct.S addrspace(5)* %2 to %struct.S* %4 = bitcast %struct.S* %3 to %struct.S.coerce* %5 = getelementptr inbounds %struct.S.coerce, %struct.S.coerce* %4, i32 0, i32 0 %6 = extractvalue %struct.S.coerce %0, 0 store float addrspace(1)* %6, float addrspace(1)** %5, align 8 %7 = getelementptr inbounds %struct.S.coerce, %struct.S.coerce* %4, i32 0, i32 1 %8 = extractvalue %struct.S.coerce %0, 1 store [64 x float] %8, [64 x float]* %7, align 8 %9 = getelementptr inbounds %struct.S.coerce, %struct.S.coerce* %4, i32 0, i32 2 %10 = extractvalue %struct.S.coerce %0, 2 store i32 %10, i32* %9, align 8 %11 = getelementptr inbounds %struct.S, %struct.S* %3, i32 0, i32 1 %12 = getelementptr inbounds %struct.S, %struct.S* %3, i32 0, i32 2 %13 = load i32, i32* %12, align 8, !tbaa !12 %14 = sext i32 %13 to i64 %15 = getelementptr inbounds [64 x float], [64 x float]* %11, i64 0, i64 %14 %16 = load float, float* %15, align 4, !tbaa !14 %17 = getelementptr inbounds %struct.S, %struct.S* %3, i32 0, i32 0 %18 = load float*, float** %17, align 8, !tbaa !16 store float %16, float* %18, align 4, !tbaa !14 ret void } and here's the optimized IR before codegen target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7" target triple = "amdgcn-amd-amdhsa" %struct.S.coerce = type { float addrspace(1)*, [64 x float], i32 } %struct.S = type { float*, [64 x float], i32 } ; Function Attrs: nofree norecurse nounwind writeonly define protected amdgpu_kernel void @_Z6kernel1S(%struct.S.coerce %0) local_unnamed_addr #0 { %2 = alloca %struct.S, align 8, addrspace(5) %3 = bitcast %struct.S addrspace(5)* %2 to float addrspace(1)* addrspace(5)* %4 = extractvalue %struct.S.coerce %0, 0 store float addrspace(1)* %4, float addrspace(1)* addrspace(5)* %3, align 8 %5 = extractvalue %struct.S.coerce %0, 1 %6 = extractvalue [64 x float] %5, 0 %7 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 0 store float %6, float addrspace(5)* %7, align 8 %8 = extractvalue [64 x float] %5, 1 %9 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 1 store float %8, float addrspace(5)* %9, align 4 %10 = extractvalue [64 x float] %5, 2 %11 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 2 store float %10, float addrspace(5)* %11, align 8 %12 = extractvalue [64 x float] %5, 3 %13 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 3 store float %12, float addrspace(5)* %13, align 4 %14 = extractvalue [64 x float] %5, 4 %15 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 4 store float %14, float addrspace(5)* %15, align 8 %16 = extractvalue [64 x float] %5, 5 %17 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 5 store float %16, float addrspace(5)* %17, align 4 %18 = extractvalue [64 x float] %5, 6 %19 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 6 store float %18, float addrspace(5)* %19, align 8 %20 = extractvalue [64 x float] %5, 7 %21 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 7 store float %20, float addrspace(5)* %21, align 4 %22 = extractvalue [64 x float] %5, 8 %23 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 8 store float %22, float addrspace(5)* %23, align 8 %24 = extractvalue [64 x float] %5, 9 %25 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 9 store float %24, float addrspace(5)* %25, align 4 %26 = extractvalue [64 x float] %5, 10 %27 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 10 store float %26, float addrspace(5)* %27, align 8 %28 = extractvalue [64 x float] %5, 11 %29 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 11 store float %28, float addrspace(5)* %29, align 4 %30 = extractvalue [64 x float] %5, 12 %31 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 12 store float %30, float addrspace(5)* %31, align 8 %32 = extractvalue [64 x float] %5, 13 %33 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 13 store float %32, float addrspace(5)* %33, align 4 %34 = extractvalue [64 x float] %5, 14 %35 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 14 store float %34, float addrspace(5)* %35, align 8 %36 = extractvalue [64 x float] %5, 15 %37 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 15 store float %36, float addrspace(5)* %37, align 4 %38 = extractvalue [64 x float] %5, 16 %39 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 16 store float %38, float addrspace(5)* %39, align 8 %40 = extractvalue [64 x float] %5, 17 %41 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 17 store float %40, float addrspace(5)* %41, align 4 %42 = extractvalue [64 x float] %5, 18 %43 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 18 store float %42, float addrspace(5)* %43, align 8 %44 = extractvalue [64 x float] %5, 19 %45 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 19 store float %44, float addrspace(5)* %45, align 4 %46 = extractvalue [64 x float] %5, 20 %47 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 20 store float %46, float addrspace(5)* %47, align 8 %48 = extractvalue [64 x float] %5, 21 %49 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 21 store float %48, float addrspace(5)* %49, align 4 %50 = extractvalue [64 x float] %5, 22 %51 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 22 store float %50, float addrspace(5)* %51, align 8 %52 = extractvalue [64 x float] %5, 23 %53 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 23 store float %52, float addrspace(5)* %53, align 4 %54 = extractvalue [64 x float] %5, 24 %55 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 24 store float %54, float addrspace(5)* %55, align 8 %56 = extractvalue [64 x float] %5, 25 %57 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 25 store float %56, float addrspace(5)* %57, align 4 %58 = extractvalue [64 x float] %5, 26 %59 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 26 store float %58, float addrspace(5)* %59, align 8 %60 = extractvalue [64 x float] %5, 27 %61 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 27 store float %60, float addrspace(5)* %61, align 4 %62 = extractvalue [64 x float] %5, 28 %63 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 28 store float %62, float addrspace(5)* %63, align 8 %64 = extractvalue [64 x float] %5, 29 %65 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 29 store float %64, float addrspace(5)* %65, align 4 %66 = extractvalue [64 x float] %5, 30 %67 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 30 store float %66, float addrspace(5)* %67, align 8 %68 = extractvalue [64 x float] %5, 31 %69 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 31 store float %68, float addrspace(5)* %69, align 4 %70 = extractvalue [64 x float] %5, 32 %71 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 32 store float %70, float addrspace(5)* %71, align 8 %72 = extractvalue [64 x float] %5, 33 %73 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 33 store float %72, float addrspace(5)* %73, align 4 %74 = extractvalue [64 x float] %5, 34 %75 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 34 store float %74, float addrspace(5)* %75, align 8 %76 = extractvalue [64 x float] %5, 35 %77 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 35 store float %76, float addrspace(5)* %77, align 4 %78 = extractvalue [64 x float] %5, 36 %79 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 36 store float %78, float addrspace(5)* %79, align 8 %80 = extractvalue [64 x float] %5, 37 %81 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 37 store float %80, float addrspace(5)* %81, align 4 %82 = extractvalue [64 x float] %5, 38 %83 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 38 store float %82, float addrspace(5)* %83, align 8 %84 = extractvalue [64 x float] %5, 39 %85 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 39 store float %84, float addrspace(5)* %85, align 4 %86 = extractvalue [64 x float] %5, 40 %87 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 40 store float %86, float addrspace(5)* %87, align 8 %88 = extractvalue [64 x float] %5, 41 %89 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 41 store float %88, float addrspace(5)* %89, align 4 %90 = extractvalue [64 x float] %5, 42 %91 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 42 store float %90, float addrspace(5)* %91, align 8 %92 = extractvalue [64 x float] %5, 43 %93 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 43 store float %92, float addrspace(5)* %93, align 4 %94 = extractvalue [64 x float] %5, 44 %95 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 44 store float %94, float addrspace(5)* %95, align 8 %96 = extractvalue [64 x float] %5, 45 %97 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 45 store float %96, float addrspace(5)* %97, align 4 %98 = extractvalue [64 x float] %5, 46 %99 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 46 store float %98, float addrspace(5)* %99, align 8 %100 = extractvalue [64 x float] %5, 47 %101 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 47 store float %100, float addrspace(5)* %101, align 4 %102 = extractvalue [64 x float] %5, 48 %103 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 48 store float %102, float addrspace(5)* %103, align 8 %104 = extractvalue [64 x float] %5, 49 %105 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 49 store float %104, float addrspace(5)* %105, align 4 %106 = extractvalue [64 x float] %5, 50 %107 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 50 store float %106, float addrspace(5)* %107, align 8 %108 = extractvalue [64 x float] %5, 51 %109 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 51 store float %108, float addrspace(5)* %109, align 4 %110 = extractvalue [64 x float] %5, 52 %111 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 52 store float %110, float addrspace(5)* %111, align 8 %112 = extractvalue [64 x float] %5, 53 %113 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 53 store float %112, float addrspace(5)* %113, align 4 %114 = extractvalue [64 x float] %5, 54 %115 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 54 store float %114, float addrspace(5)* %115, align 8 %116 = extractvalue [64 x float] %5, 55 %117 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 55 store float %116, float addrspace(5)* %117, align 4 %118 = extractvalue [64 x float] %5, 56 %119 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 56 store float %118, float addrspace(5)* %119, align 8 %120 = extractvalue [64 x float] %5, 57 %121 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 57 store float %120, float addrspace(5)* %121, align 4 %122 = extractvalue [64 x float] %5, 58 %123 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 58 store float %122, float addrspace(5)* %123, align 8 %124 = extractvalue [64 x float] %5, 59 %125 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 59 store float %124, float addrspace(5)* %125, align 4 %126 = extractvalue [64 x float] %5, 60 %127 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 60 store float %126, float addrspace(5)* %127, align 8 %128 = extractvalue [64 x float] %5, 61 %129 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 61 store float %128, float addrspace(5)* %129, align 4 %130 = extractvalue [64 x float] %5, 62 %131 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 62 store float %130, float addrspace(5)* %131, align 8 %132 = extractvalue [64 x float] %5, 63 %133 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 63 store float %132, float addrspace(5)* %133, align 4 %134 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 2 %135 = extractvalue %struct.S.coerce %0, 2 store i32 %135, i32 addrspace(5)* %134, align 8 %136 = getelementptr inbounds %struct.S, %struct.S addrspace(5)* %2, i32 0, i32 1, i32 %135 %137 = bitcast float addrspace(5)* %136 to i32 addrspace(5)* %138 = load i32, i32 addrspace(5)* %137, align 4, !tbaa !4 %139 = bitcast %struct.S addrspace(5)* %2 to i32* addrspace(5)* %140 = load i32*, i32* addrspace(5)* %139, align 8, !tbaa !8 store i32 %138, i32* %140, align 4, !tbaa !4 ret void } and here's the optimized after this patch, the `alloca` is eliminated. target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7" target triple = "amdgcn-amd-amdhsa" %struct.S = type { float*, [64 x float], i32 } ; Function Attrs: nofree norecurse nounwind writeonly define protected amdgpu_kernel void @_Z6kernel1S(%struct.S addrspace(4)* nocapture readonly byref(%struct.S) align 8 %0) local_unnamed_addr #0 { %2 = getelementptr %struct.S, %struct.S addrspace(4)* %0, i64 0, i32 2 %3 = load i32, i32 addrspace(4)* %2, align 8, !tbaa !5 %4 = sext i32 %3 to i64 %5 = getelementptr %struct.S, %struct.S addrspace(4)* %0, i64 0, i32 1, i64 %4 %6 = load float, float addrspace(4)* %5, align 4, !tbaa !11 %7 = getelementptr %struct.S, %struct.S addrspace(4)* %0, i64 0, i32 0 %8 = load float*, float* addrspace(4)* %7, align 8, !tbaa !13 store float %6, float* %8, align 4, !tbaa !11 ret void } In D89980#2371290 <https://reviews.llvm.org/D89980#2371290>, @arsenm wrote: > In D89980#2371270 <https://reviews.llvm.org/D89980#2371270>, @hliao wrote: > >> In D89980#2368506 <https://reviews.llvm.org/D89980#2368506>, @arsenm wrote: >> >>> I think this is a dead end approach. I don't see the connection to the >>> original problem you are trying to solve. Can you send me an IR testcase >>> that this is supposed to help? >> >> That's probably commonly known. If we pass an aggregate parameter directly >> by value and dynamically index it late, that `alloca` cannot be promoted as >> that aggregate value in LLVM IR cannot be dynamically indexed. For example, >> >> struct S { >> int a[100]; >> int n; >> }; >> >> int foo(S s) { >> return s.a[s.n]; >> } > > This example is not a kernel > >> If the underlying ABI chooses to pass `s` directly by value, we have the >> following pseudo IR. >> >> %s = alloca S >> ; store `s` value into %s as the parameter is treated as a local variable >> by filling its initial value from LLVM IR parameter. >> ... >> ; regular parameter access through %s with dynamic indices >> >> that `store` from the parameter from LLVM IR is an aggregate value store. >> Later, when %s is to be promoted, as it's once dynamically indexed, we >> cannot promote it as dynamic index on aggregate values is not representable >> in LLVM IR. >> >> In contrast, if a parameter is passed by value indirectly, that `store` is >> replaced with a `memcpy`. It's straightforward to promote '%s' as they are >> all memory operands of the same layout. >> >> If you need detailed IR, I may post here for your reference. > > I need an actual source and IR example. I think you are describing the > missing promotion of pointers inside byref arguments. We need better > promotion here, not eliminate it. It needs to cast the byref pointer, or cast > the pointers inside the struct when accessed Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D89980/new/ https://reviews.llvm.org/D89980 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits