https://github.com/ritter-x2a updated https://github.com/llvm/llvm-project/pull/130617
>From ee992060fc09c567ef8a2ea6c362c8d35fd5d672 Mon Sep 17 00:00:00 2001 From: Fabian Ritter <fabian.rit...@amd.com> Date: Mon, 10 Mar 2025 06:55:10 -0400 Subject: [PATCH 1/2] [SeparateConstOffsetFromGEP] Preserve inbounds flag based on ValueTracking If we know that the initial GEP was inbounds, and we change it to a sequence of GEPs from the same base pointer where every offset is non-negative, then the new GEPs are inbounds. For SWDEV-516125. --- .../Scalar/SeparateConstOffsetFromGEP.cpp | 18 +++++++++++---- .../AMDGPU/preserve-inbounds.ll | 23 +++++++++++++++++++ .../NVPTX/split-gep-and-gvn.ll | 16 ++++++------- .../NVPTX/split-gep.ll | 8 +++---- 4 files changed, 48 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index ab8e979e7b40a..7f93115499bc9 100644 --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -1052,6 +1052,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { } } + bool MayRecoverInbounds = AccumulativeByteOffset >= 0 && GEP->isInBounds(); + // Remove the constant offset in each sequential index. The resultant GEP // computes the variadic base. // Notice that we don't remove struct field indices here. If LowerGEP is @@ -1079,6 +1081,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { // and the old index if they are not used. RecursivelyDeleteTriviallyDeadInstructions(UserChainTail); RecursivelyDeleteTriviallyDeadInstructions(OldIdx); + MayRecoverInbounds = + MayRecoverInbounds && computeKnownBits(NewIdx, *DL).isNonNegative(); } } } @@ -1100,11 +1104,15 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { // address with silently-wrapping two's complement arithmetic". // Therefore, the final code will be a semantically equivalent. // - // TODO(jingyue): do some range analysis to keep as many inbounds as - // possible. GEPs with inbounds are more friendly to alias analysis. - // TODO(gep_nowrap): Preserve nuw at least. - GEPNoWrapFlags NewGEPFlags = GEPNoWrapFlags::none(); - GEP->setNoWrapFlags(GEPNoWrapFlags::none()); + // If the initial GEP was inbounds and all variable indices and the + // accumulated offsets are non-negative, they can be added in any order and + // the intermediate results are in bounds. So, we can preserve the inbounds + // flag for both GEPs. GEPs with inbounds are more friendly to alias analysis. + // + // TODO(gep_nowrap): Preserve nuw? + GEPNoWrapFlags NewGEPFlags = + MayRecoverInbounds ? GEPNoWrapFlags::inBounds() : GEPNoWrapFlags::none(); + GEP->setNoWrapFlags(NewGEPFlags); // Lowers a GEP to either GEPs with a single index or arithmetic operations. if (LowerGEP) { diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll index 422e5d8215502..01619aa481ddd 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll @@ -16,3 +16,26 @@ entry: %arrayidx = getelementptr inbounds i32, ptr %p, i64 %idx ret ptr %arrayidx } + +; All offsets must be positive, so inbounds can be preserved. +define void @must_be_inbounds(ptr %dst, ptr %src, i32 %i) { +; CHECK-LABEL: @must_be_inbounds( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[I_PROM:%.*]] = zext i32 [[I:%.*]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 [[I_PROM]] +; CHECK-NEXT: [[ARRAYIDX_SRC2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4 +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX_SRC2]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 [[I_PROM]] +; CHECK-NEXT: [[ARRAYIDX_DST4:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 4 +; CHECK-NEXT: store float [[TMP1]], ptr [[ARRAYIDX_DST4]], align 4 +; CHECK-NEXT: ret void +; +entry: + %i.prom = zext i32 %i to i64 + %idx = add nsw i64 %i.prom, 1 + %arrayidx.src = getelementptr inbounds float, ptr %src, i64 %idx + %3 = load float, ptr %arrayidx.src, align 4 + %arrayidx.dst = getelementptr inbounds float, ptr %dst, i64 %idx + store float %3, ptr %arrayidx.dst, align 4 + ret void +} diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll index 9a73feb2c4b5c..4474585bf9b06 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll @@ -157,19 +157,19 @@ define void @sum_of_array3(i32 %x, i32 %y, ptr nocapture %output) { ; IR-NEXT: .preheader: ; IR-NEXT: [[TMP0:%.*]] = zext i32 [[Y]] to i64 ; IR-NEXT: [[TMP1:%.*]] = zext i32 [[X]] to i64 -; IR-NEXT: [[TMP2:%.*]] = getelementptr [32 x [32 x float]], ptr addrspace(3) @array, i64 0, i64 [[TMP1]], i64 [[TMP0]] +; IR-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x [32 x float]], ptr addrspace(3) @array, i64 0, i64 [[TMP1]], i64 [[TMP0]] ; IR-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(3) [[TMP2]] to ptr ; IR-NEXT: [[TMP4:%.*]] = load float, ptr [[TMP3]], align 4 ; IR-NEXT: [[TMP5:%.*]] = fadd float [[TMP4]], 0.000000e+00 -; IR-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 4 +; IR-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 4 ; IR-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(3) [[TMP6]] to ptr ; IR-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4 ; IR-NEXT: [[TMP9:%.*]] = fadd float [[TMP5]], [[TMP8]] -; IR-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 128 +; IR-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 128 ; IR-NEXT: [[TMP11:%.*]] = addrspacecast ptr addrspace(3) [[TMP10]] to ptr ; IR-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4 ; IR-NEXT: [[TMP13:%.*]] = fadd float [[TMP9]], [[TMP12]] -; IR-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 132 +; IR-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 132 ; IR-NEXT: [[TMP15:%.*]] = addrspacecast ptr addrspace(3) [[TMP14]] to ptr ; IR-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4 ; IR-NEXT: [[TMP17:%.*]] = fadd float [[TMP13]], [[TMP16]] @@ -224,19 +224,19 @@ define void @sum_of_array4(i32 %x, i32 %y, ptr nocapture %output) { ; IR-NEXT: .preheader: ; IR-NEXT: [[TMP0:%.*]] = zext i32 [[Y]] to i64 ; IR-NEXT: [[TMP1:%.*]] = zext i32 [[X]] to i64 -; IR-NEXT: [[TMP2:%.*]] = getelementptr [32 x [32 x float]], ptr addrspace(3) @array, i64 0, i64 [[TMP1]], i64 [[TMP0]] +; IR-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x [32 x float]], ptr addrspace(3) @array, i64 0, i64 [[TMP1]], i64 [[TMP0]] ; IR-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(3) [[TMP2]] to ptr ; IR-NEXT: [[TMP4:%.*]] = load float, ptr [[TMP3]], align 4 ; IR-NEXT: [[TMP5:%.*]] = fadd float [[TMP4]], 0.000000e+00 -; IR-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 4 +; IR-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 4 ; IR-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(3) [[TMP6]] to ptr ; IR-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4 ; IR-NEXT: [[TMP9:%.*]] = fadd float [[TMP5]], [[TMP8]] -; IR-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 128 +; IR-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 128 ; IR-NEXT: [[TMP11:%.*]] = addrspacecast ptr addrspace(3) [[TMP10]] to ptr ; IR-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4 ; IR-NEXT: [[TMP13:%.*]] = fadd float [[TMP9]], [[TMP12]] -; IR-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 132 +; IR-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 132 ; IR-NEXT: [[TMP15:%.*]] = addrspacecast ptr addrspace(3) [[TMP14]] to ptr ; IR-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4 ; IR-NEXT: [[TMP17:%.*]] = fadd float [[TMP13]], [[TMP16]] diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll index 77b3434f4f159..da04a6e979425 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll @@ -372,8 +372,8 @@ define ptr @trunk_explicit(ptr %ptr, i64 %idx) { ; CHECK-LABEL: define ptr @trunk_explicit( ; CHECK-SAME: ptr [[PTR:%.*]], i64 [[IDX:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT0:%.*]], ptr [[PTR]], i64 0, i32 3, i64 [[IDX]], i32 1 -; CHECK-NEXT: [[PTR21:%.*]] = getelementptr i8, ptr [[TMP0]], i64 3216 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT0:%.*]], ptr [[PTR]], i64 0, i32 3, i64 [[IDX]], i32 1 +; CHECK-NEXT: [[PTR21:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 3216 ; CHECK-NEXT: ret ptr [[PTR21]] ; entry: @@ -389,8 +389,8 @@ define ptr @trunk_long_idx(ptr %ptr, i64 %idx) { ; CHECK-LABEL: define ptr @trunk_long_idx( ; CHECK-SAME: ptr [[PTR:%.*]], i64 [[IDX:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT0:%.*]], ptr [[PTR]], i64 0, i32 3, i64 [[IDX]], i32 1 -; CHECK-NEXT: [[PTR21:%.*]] = getelementptr i8, ptr [[TMP0]], i64 3216 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT0:%.*]], ptr [[PTR]], i64 0, i32 3, i64 [[IDX]], i32 1 +; CHECK-NEXT: [[PTR21:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 3216 ; CHECK-NEXT: ret ptr [[PTR21]] ; entry: >From cd1072024db9556a14febc9153f7d76926715dee Mon Sep 17 00:00:00 2001 From: Fabian Ritter <fabian.rit...@amd.com> Date: Tue, 11 Mar 2025 10:05:26 -0400 Subject: [PATCH 2/2] Simplify test case and add more --- .../AMDGPU/preserve-inbounds.ll | 126 ++++++++++++++++-- 1 file changed, 112 insertions(+), 14 deletions(-) diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll index 01619aa481ddd..6865fcf663c58 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll @@ -17,25 +17,123 @@ entry: ret ptr %arrayidx } -; All offsets must be positive, so inbounds can be preserved. -define void @must_be_inbounds(ptr %dst, ptr %src, i32 %i) { +; All indices must be non-negative, so inbounds can be preserved. +define ptr @must_be_inbounds(ptr %p, i32 %i) { ; CHECK-LABEL: @must_be_inbounds( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[I_PROM:%.*]] = zext i32 [[I:%.*]] to i64 -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 [[I_PROM]] -; CHECK-NEXT: [[ARRAYIDX_SRC2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4 -; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX_SRC2]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 [[I_PROM]] -; CHECK-NEXT: [[ARRAYIDX_DST4:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 4 -; CHECK-NEXT: store float [[TMP1]], ptr [[ARRAYIDX_DST4]], align 4 -; CHECK-NEXT: ret void +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[I_PROM]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] ; entry: %i.prom = zext i32 %i to i64 %idx = add nsw i64 %i.prom, 1 - %arrayidx.src = getelementptr inbounds float, ptr %src, i64 %idx - %3 = load float, ptr %arrayidx.src, align 4 - %arrayidx.dst = getelementptr inbounds float, ptr %dst, i64 %idx - store float %3, ptr %arrayidx.dst, align 4 - ret void + %arrayidx = getelementptr inbounds i32, ptr %p, i64 %idx + ret ptr %arrayidx +} + +; idx must be non-negative -> preserve inbounds +define ptr @sign_bit_clear(ptr %p, i64 %i) { +; CHECK-LABEL: @sign_bit_clear( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDX:%.*]] = and i64 [[I:%.*]], 9223372036854775807 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[IDX]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX]] +; +entry: + %idx = and i64 %i, u0x7fffffffffffffff + %idx.add = add i64 %idx, 1 + %arrayidx = getelementptr inbounds i32, ptr %p, i64 %idx.add + ret ptr %arrayidx +} + +; idx may be negative -> don't preserve inbounds +define ptr @sign_bit_not_clear(ptr %p, i64 %i) { +; CHECK-LABEL: @sign_bit_not_clear( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDX:%.*]] = and i64 [[I:%.*]], -256 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[IDX]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; +entry: + %idx = and i64 %i, u0xffffffffffffff00 + %idx.add = add i64 %idx, 1 + %arrayidx = getelementptr inbounds i32, ptr %p, i64 %idx.add + ret ptr %arrayidx +} + +; idx may be 0 or very negative -> don't preserve inbounds +define ptr @only_sign_bit_not_clear(ptr %p, i64 %i) { +; CHECK-LABEL: @only_sign_bit_not_clear( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDX:%.*]] = and i64 [[I:%.*]], -9223372036854775808 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[IDX]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; +entry: + %idx = and i64 %i, u0x8000000000000000 + %idx.add = add i64 %idx, 1 + %arrayidx = getelementptr inbounds i32, ptr %p, i64 %idx.add + ret ptr %arrayidx +} + +; all indices non-negative -> preserve inbounds +define ptr @multi_level_nonnegative(ptr %p, i64 %idx1, i64 %idx2) { +; CHECK-LABEL: @multi_level_nonnegative( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MASKED_IDX1:%.*]] = and i64 [[IDX1:%.*]], 255 +; CHECK-NEXT: [[MASKED_IDX2:%.*]] = and i64 [[IDX2:%.*]], 65535 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [10 x [20 x i32]], ptr [[P:%.*]], i64 0, i64 [[MASKED_IDX1]], i64 [[MASKED_IDX2]] +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 180 +; CHECK-NEXT: ret ptr [[ARRAYIDX3]] +; +entry: + %masked.idx1 = and i64 %idx1, u0xff + %masked.idx2 = and i64 %idx2, u0xffff + %idx1.add = add i64 %masked.idx1, 2 + %idx2.add = add i64 %masked.idx2, 5 + %arrayidx = getelementptr inbounds [10 x [20 x i32]], ptr %p, i64 0, i64 %idx1.add, i64 %idx2.add + ret ptr %arrayidx +} + +; It doesn't matter that %idx2.add might be negative, the indices in the resulting GEPs are all non-negative -> preserve inbounds +define ptr @multi_level_mixed_okay(ptr %p, i64 %idx1, i64 %idx2) { +; CHECK-LABEL: @multi_level_mixed_okay( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MASKED_IDX1:%.*]] = and i64 [[IDX1:%.*]], 255 +; CHECK-NEXT: [[MASKED_IDX2:%.*]] = and i64 [[IDX2:%.*]], 65535 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [10 x [20 x i32]], ptr [[P:%.*]], i64 0, i64 [[MASKED_IDX1]], i64 [[MASKED_IDX2]] +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 156 +; CHECK-NEXT: ret ptr [[ARRAYIDX3]] +; +entry: + %masked.idx1 = and i64 %idx1, u0xff + %masked.idx2 = and i64 %idx2, u0xffff + %idx1.add = add i64 %masked.idx1, 2 + %idx2.add = add i64 %masked.idx2, -1 + %arrayidx = getelementptr inbounds [10 x [20 x i32]], ptr %p, i64 0, i64 %idx1.add, i64 %idx2.add + ret ptr %arrayidx +} + +; One index may be negative -> don't preserve inbounds +define ptr @multi_level_mixed_not_okay(ptr %p, i64 %idx1, i64 %idx2) { +; CHECK-LABEL: @multi_level_mixed_not_okay( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MASKED_IDX1:%.*]] = and i64 [[IDX1:%.*]], -256 +; CHECK-NEXT: [[MASKED_IDX2:%.*]] = and i64 [[IDX2:%.*]], 65535 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [10 x [20 x i32]], ptr [[P:%.*]], i64 0, i64 [[MASKED_IDX1]], i64 [[MASKED_IDX2]] +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr i8, ptr [[TMP0]], i64 156 +; CHECK-NEXT: ret ptr [[ARRAYIDX3]] +; +entry: + %masked.idx1 = and i64 %idx1, u0xffffffffffffff00 + %masked.idx2 = and i64 %idx2, u0xffff + %idx1.add = add i64 %masked.idx1, 2 + %idx2.add = add i64 %masked.idx2, -1 + %arrayidx = getelementptr inbounds [10 x [20 x i32]], ptr %p, i64 0, i64 %idx1.add, i64 %idx2.add + ret ptr %arrayidx } _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits