https://github.com/ritter-x2a created https://github.com/llvm/llvm-project/pull/130617
If we know that the initial GEP was inbounds, and we change it to a sequence of GEPs from the same base pointer where every offset is non-negative, then the new GEPs are inbounds. For SWDEV-516125. >From e880d4330b4a2fe9b281013b128763e6d24f81dd Mon Sep 17 00:00:00 2001 From: Fabian Ritter <fabian.rit...@amd.com> Date: Mon, 10 Mar 2025 06:55:10 -0400 Subject: [PATCH] [SeparateConstOffsetFromGEP] Preserve inbounds flag based on ValueTracking If we know that the initial GEP was inbounds, and we change it to a sequence of GEPs from the same base pointer where every offset is non-negative, then the new GEPs are inbounds. For SWDEV-516125. --- .../Scalar/SeparateConstOffsetFromGEP.cpp | 18 +++++++++++---- .../AMDGPU/preserve-inbounds.ll | 23 +++++++++++++++++++ .../NVPTX/split-gep-and-gvn.ll | 16 ++++++------- .../NVPTX/split-gep.ll | 8 +++---- 4 files changed, 48 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index 138a71ce79cef..070afdf0752f4 100644 --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -1052,6 +1052,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { } } + bool MayRecoverInbounds = AccumulativeByteOffset >= 0 && GEP->isInBounds(); + // Remove the constant offset in each sequential index. The resultant GEP // computes the variadic base. // Notice that we don't remove struct field indices here. If LowerGEP is @@ -1079,6 +1081,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { // and the old index if they are not used. RecursivelyDeleteTriviallyDeadInstructions(UserChainTail); RecursivelyDeleteTriviallyDeadInstructions(OldIdx); + MayRecoverInbounds = + MayRecoverInbounds && computeKnownBits(NewIdx, *DL).isNonNegative(); } } } @@ -1100,11 +1104,15 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { // address with silently-wrapping two's complement arithmetic". // Therefore, the final code will be a semantically equivalent. // - // TODO(jingyue): do some range analysis to keep as many inbounds as - // possible. GEPs with inbounds are more friendly to alias analysis. - // TODO(gep_nowrap): Preserve nuw at least. - auto NewGEPFlags = GEPNoWrapFlags::none(); - GEP->setNoWrapFlags(GEPNoWrapFlags::none()); + // If the initial GEP was inbounds and all variable indices and the + // accumulated offsets are non-negative, they can be added in any order and + // the intermediate results are in bounds. So, we can preserve the inbounds + // flag for both GEPs. GEPs with inbounds are more friendly to alias analysis. + // + // TODO(gep_nowrap): Preserve nuw? + auto NewGEPFlags = + MayRecoverInbounds ? GEPNoWrapFlags::inBounds() : GEPNoWrapFlags::none(); + GEP->setNoWrapFlags(NewGEPFlags); // Lowers a GEP to either GEPs with a single index or arithmetic operations. if (LowerGEP) { diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll index 877de38776839..91b5bc874c154 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll @@ -24,3 +24,26 @@ entry: store float %3, ptr %arrayidx.dst, align 4 ret void } + +; All offsets must be positive, so inbounds can be preserved. +define void @must_be_inbounds(ptr %dst, ptr %src, i32 %i) { +; CHECK-LABEL: @must_be_inbounds( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[I_PROM:%.*]] = zext i32 [[I:%.*]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 [[I_PROM]] +; CHECK-NEXT: [[ARRAYIDX_SRC2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4 +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX_SRC2]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 [[I_PROM]] +; CHECK-NEXT: [[ARRAYIDX_DST4:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 4 +; CHECK-NEXT: store float [[TMP1]], ptr [[ARRAYIDX_DST4]], align 4 +; CHECK-NEXT: ret void +; +entry: + %i.prom = zext i32 %i to i64 + %idx = add nsw i64 %i.prom, 1 + %arrayidx.src = getelementptr inbounds float, ptr %src, i64 %idx + %3 = load float, ptr %arrayidx.src, align 4 + %arrayidx.dst = getelementptr inbounds float, ptr %dst, i64 %idx + store float %3, ptr %arrayidx.dst, align 4 + ret void +} diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll index 9a73feb2c4b5c..4474585bf9b06 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll @@ -157,19 +157,19 @@ define void @sum_of_array3(i32 %x, i32 %y, ptr nocapture %output) { ; IR-NEXT: .preheader: ; IR-NEXT: [[TMP0:%.*]] = zext i32 [[Y]] to i64 ; IR-NEXT: [[TMP1:%.*]] = zext i32 [[X]] to i64 -; IR-NEXT: [[TMP2:%.*]] = getelementptr [32 x [32 x float]], ptr addrspace(3) @array, i64 0, i64 [[TMP1]], i64 [[TMP0]] +; IR-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x [32 x float]], ptr addrspace(3) @array, i64 0, i64 [[TMP1]], i64 [[TMP0]] ; IR-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(3) [[TMP2]] to ptr ; IR-NEXT: [[TMP4:%.*]] = load float, ptr [[TMP3]], align 4 ; IR-NEXT: [[TMP5:%.*]] = fadd float [[TMP4]], 0.000000e+00 -; IR-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 4 +; IR-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 4 ; IR-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(3) [[TMP6]] to ptr ; IR-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4 ; IR-NEXT: [[TMP9:%.*]] = fadd float [[TMP5]], [[TMP8]] -; IR-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 128 +; IR-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 128 ; IR-NEXT: [[TMP11:%.*]] = addrspacecast ptr addrspace(3) [[TMP10]] to ptr ; IR-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4 ; IR-NEXT: [[TMP13:%.*]] = fadd float [[TMP9]], [[TMP12]] -; IR-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 132 +; IR-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 132 ; IR-NEXT: [[TMP15:%.*]] = addrspacecast ptr addrspace(3) [[TMP14]] to ptr ; IR-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4 ; IR-NEXT: [[TMP17:%.*]] = fadd float [[TMP13]], [[TMP16]] @@ -224,19 +224,19 @@ define void @sum_of_array4(i32 %x, i32 %y, ptr nocapture %output) { ; IR-NEXT: .preheader: ; IR-NEXT: [[TMP0:%.*]] = zext i32 [[Y]] to i64 ; IR-NEXT: [[TMP1:%.*]] = zext i32 [[X]] to i64 -; IR-NEXT: [[TMP2:%.*]] = getelementptr [32 x [32 x float]], ptr addrspace(3) @array, i64 0, i64 [[TMP1]], i64 [[TMP0]] +; IR-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x [32 x float]], ptr addrspace(3) @array, i64 0, i64 [[TMP1]], i64 [[TMP0]] ; IR-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(3) [[TMP2]] to ptr ; IR-NEXT: [[TMP4:%.*]] = load float, ptr [[TMP3]], align 4 ; IR-NEXT: [[TMP5:%.*]] = fadd float [[TMP4]], 0.000000e+00 -; IR-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 4 +; IR-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 4 ; IR-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(3) [[TMP6]] to ptr ; IR-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4 ; IR-NEXT: [[TMP9:%.*]] = fadd float [[TMP5]], [[TMP8]] -; IR-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 128 +; IR-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 128 ; IR-NEXT: [[TMP11:%.*]] = addrspacecast ptr addrspace(3) [[TMP10]] to ptr ; IR-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4 ; IR-NEXT: [[TMP13:%.*]] = fadd float [[TMP9]], [[TMP12]] -; IR-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 132 +; IR-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 132 ; IR-NEXT: [[TMP15:%.*]] = addrspacecast ptr addrspace(3) [[TMP14]] to ptr ; IR-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4 ; IR-NEXT: [[TMP17:%.*]] = fadd float [[TMP13]], [[TMP16]] diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll index 77b3434f4f159..da04a6e979425 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll @@ -372,8 +372,8 @@ define ptr @trunk_explicit(ptr %ptr, i64 %idx) { ; CHECK-LABEL: define ptr @trunk_explicit( ; CHECK-SAME: ptr [[PTR:%.*]], i64 [[IDX:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT0:%.*]], ptr [[PTR]], i64 0, i32 3, i64 [[IDX]], i32 1 -; CHECK-NEXT: [[PTR21:%.*]] = getelementptr i8, ptr [[TMP0]], i64 3216 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT0:%.*]], ptr [[PTR]], i64 0, i32 3, i64 [[IDX]], i32 1 +; CHECK-NEXT: [[PTR21:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 3216 ; CHECK-NEXT: ret ptr [[PTR21]] ; entry: @@ -389,8 +389,8 @@ define ptr @trunk_long_idx(ptr %ptr, i64 %idx) { ; CHECK-LABEL: define ptr @trunk_long_idx( ; CHECK-SAME: ptr [[PTR:%.*]], i64 [[IDX:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT0:%.*]], ptr [[PTR]], i64 0, i32 3, i64 [[IDX]], i32 1 -; CHECK-NEXT: [[PTR21:%.*]] = getelementptr i8, ptr [[TMP0]], i64 3216 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT0:%.*]], ptr [[PTR]], i64 0, i32 3, i64 [[IDX]], i32 1 +; CHECK-NEXT: [[PTR21:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 3216 ; CHECK-NEXT: ret ptr [[PTR21]] ; entry: _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits