Author: Drew Kersnar Date: 2025-08-08T12:05:29-05:00 New Revision: 90e8c8e7186616241549e7bc62d95b51467a674b
URL: https://github.com/llvm/llvm-project/commit/90e8c8e7186616241549e7bc62d95b51467a674b DIFF: https://github.com/llvm/llvm-project/commit/90e8c8e7186616241549e7bc62d95b51467a674b.diff LOG: [InferAlignment] Propagate alignment between loads/stores of the same base pointer (#145733) We can derive and upgrade alignment for loads/stores using other well-aligned loads/stores. This optimization does a single forward pass through each basic block and uses loads/stores (the alignment and the offset) to derive the best possible alignment for a base pointer, caching the result. If it encounters another load/store based on that pointer, it tries to upgrade the alignment. The optimization must be a forward pass within a basic block because control flow and exception throwing can impact alignment guarantees. --------- Co-authored-by: Nikita Popov <git...@npopov.com> Added: llvm/test/Transforms/InferAlignment/propagate-from-other-load-stores.ll Modified: clang/test/CodeGen/attr-counted-by-for-pointers.c clang/test/OpenMP/bug57757.cpp llvm/lib/Transforms/Scalar/InferAlignment.cpp llvm/test/Transforms/PhaseOrdering/X86/masked-memory-ops-with-cf.ll Removed: ################################################################################ diff --git a/clang/test/CodeGen/attr-counted-by-for-pointers.c b/clang/test/CodeGen/attr-counted-by-for-pointers.c index e939e49a61d4d..0d72b58c78fd1 100644 --- a/clang/test/CodeGen/attr-counted-by-for-pointers.c +++ b/clang/test/CodeGen/attr-counted-by-for-pointers.c @@ -32,7 +32,7 @@ struct annotated_ptr { // SANITIZE-WITH-ATTR-NEXT: entry: // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16 -// SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4 +// SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 8 // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2:![0-9]+]] // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label [[CONT10:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3:![0-9]+]], !nosanitize [[META2]] @@ -85,7 +85,7 @@ void test1(struct annotated_ptr *p, int index, struct foo *value) { // SANITIZE-WITH-ATTR-NEXT: entry: // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16 -// SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4 +// SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 8 // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label [[CONT10:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] @@ -138,7 +138,7 @@ void test2(struct annotated_ptr *p, int index, struct foo *value) { // SANITIZE-WITH-ATTR-NEXT: entry: // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16 -// SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4 +// SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 8 // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: [[DOTNOT:%.*]] = icmp ugt i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: br i1 [[DOTNOT]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], label [[CONT10:%.*]], !prof [[PROF15:![0-9]+]], !nosanitize [[META2]] @@ -311,7 +311,7 @@ size_t test6(struct annotated_ptr *p, int index) { // SANITIZE-WITH-ATTR-NEXT: entry: // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16 -// SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4 +// SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 8 // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label [[CONT10:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] diff --git a/clang/test/OpenMP/bug57757.cpp b/clang/test/OpenMP/bug57757.cpp index eabf233dde247..caf53a5b62c1c 100644 --- a/clang/test/OpenMP/bug57757.cpp +++ b/clang/test/OpenMP/bug57757.cpp @@ -46,7 +46,7 @@ void foo() { // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 52 // CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 48 // CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[TBAA19:![0-9]+]], !noalias [[META13]] -// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA16]], !noalias [[META13]] +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 8, !tbaa [[TBAA16]], !noalias [[META13]] // CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP6]], align 4, !tbaa [[TBAA20:![0-9]+]], !noalias [[META13]] // CHECK-NEXT: tail call void [[TMP8]](i32 noundef [[TMP9]], float noundef [[TMP10]]) #[[ATTR2:[0-9]+]], !noalias [[META13]] // CHECK-NEXT: br label [[DOTOMP_OUTLINED__EXIT]] diff --git a/llvm/lib/Transforms/Scalar/InferAlignment.cpp b/llvm/lib/Transforms/Scalar/InferAlignment.cpp index 0ddc23152d84f..e9bf59c6850a3 100644 --- a/llvm/lib/Transforms/Scalar/InferAlignment.cpp +++ b/llvm/lib/Transforms/Scalar/InferAlignment.cpp @@ -58,14 +58,55 @@ bool inferAlignment(Function &F, AssumptionCache &AC, DominatorTree &DT) { } // Compute alignment from known bits. + auto InferFromKnownBits = [&](Instruction &I, Value *PtrOp) { + KnownBits Known = computeKnownBits(PtrOp, DL, &AC, &I, &DT); + unsigned TrailZ = + std::min(Known.countMinTrailingZeros(), +Value::MaxAlignmentExponent); + return Align(1ull << std::min(Known.getBitWidth() - 1, TrailZ)); + }; + + // Propagate alignment between loads and stores that originate from the + // same base pointer. + DenseMap<Value *, Align> BestBasePointerAligns; + auto InferFromBasePointer = [&](Value *PtrOp, Align LoadStoreAlign) { + APInt OffsetFromBase(DL.getIndexTypeSizeInBits(PtrOp->getType()), 0); + PtrOp = PtrOp->stripAndAccumulateConstantOffsets(DL, OffsetFromBase, true); + // Derive the base pointer alignment from the load/store alignment + // and the offset from the base pointer. + Align BasePointerAlign = + commonAlignment(LoadStoreAlign, OffsetFromBase.getLimitedValue()); + + auto [It, Inserted] = + BestBasePointerAligns.try_emplace(PtrOp, BasePointerAlign); + if (!Inserted) { + // If the stored base pointer alignment is better than the + // base pointer alignment we derived, we may be able to use it + // to improve the load/store alignment. If not, store the + // improved base pointer alignment for future iterations. + if (It->second > BasePointerAlign) { + Align BetterLoadStoreAlign = + commonAlignment(It->second, OffsetFromBase.getLimitedValue()); + return BetterLoadStoreAlign; + } + It->second = BasePointerAlign; + } + return LoadStoreAlign; + }; + for (BasicBlock &BB : F) { + // We need to reset the map for each block because alignment information + // can only be propagated from instruction A to B if A dominates B. + // This is because control flow (and exception throwing) could be dependent + // on the address (and its alignment) at runtime. Some sort of dominator + // tree approach could be better, but doing a simple forward pass through a + // single basic block is correct too. + BestBasePointerAligns.clear(); + for (Instruction &I : BB) { Changed |= tryToImproveAlign( DL, &I, [&](Value *PtrOp, Align OldAlign, Align PrefAlign) { - KnownBits Known = computeKnownBits(PtrOp, DL, &AC, &I, &DT); - unsigned TrailZ = std::min(Known.countMinTrailingZeros(), - +Value::MaxAlignmentExponent); - return Align(1ull << std::min(Known.getBitWidth() - 1, TrailZ)); + return std::max(InferFromKnownBits(I, PtrOp), + InferFromBasePointer(PtrOp, OldAlign)); }); } } diff --git a/llvm/test/Transforms/InferAlignment/propagate-from-other-load-stores.ll b/llvm/test/Transforms/InferAlignment/propagate-from-other-load-stores.ll new file mode 100644 index 0000000000000..3fc7c59a512a5 --- /dev/null +++ b/llvm/test/Transforms/InferAlignment/propagate-from-other-load-stores.ll @@ -0,0 +1,194 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=infer-alignment -S | FileCheck %s +%struct.S1 = type { %struct.float3, %struct.float3, i32, i32 } +%struct.float3 = type { float, float, float } + + +; ------------------------------------------------------------------------------ +; Test that we can propagate the align 16 to the load and store that are set to align 4 +; ------------------------------------------------------------------------------ + +define void @prop_align(ptr %v, ptr %vout) { +; CHECK-LABEL: define void @prop_align( +; CHECK-SAME: ptr [[V:%.*]], ptr [[VOUT:%.*]]) { +; CHECK-NEXT: [[DOTUNPACK_UNPACK:%.*]] = load float, ptr [[V]], align 16 +; CHECK-NEXT: [[DOTUNPACK_ELT7:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 4 +; CHECK-NEXT: [[DOTUNPACK_UNPACK8:%.*]] = load float, ptr [[DOTUNPACK_ELT7]], align 4 +; CHECK-NEXT: [[DOTUNPACK_ELT9:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 8 +; CHECK-NEXT: [[DOTUNPACK_UNPACK10:%.*]] = load float, ptr [[DOTUNPACK_ELT9]], align 8 +; CHECK-NEXT: [[DOTELT1:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 12 +; CHECK-NEXT: [[DOTUNPACK2_UNPACK:%.*]] = load float, ptr [[DOTELT1]], align 4 +; CHECK-NEXT: [[DOTUNPACK2_ELT12:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 16 +; CHECK-NEXT: [[DOTUNPACK2_UNPACK13:%.*]] = load float, ptr [[DOTUNPACK2_ELT12]], align 16 +; CHECK-NEXT: [[DOTUNPACK2_ELT14:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 20 +; CHECK-NEXT: [[DOTUNPACK2_UNPACK15:%.*]] = load float, ptr [[DOTUNPACK2_ELT14]], align 4 +; CHECK-NEXT: [[DOTELT3:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 24 +; CHECK-NEXT: [[DOTUNPACK4:%.*]] = load i32, ptr [[DOTELT3]], align 8 +; CHECK-NEXT: [[DOTELT5:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 28 +; CHECK-NEXT: [[DOTUNPACK6:%.*]] = load i32, ptr [[DOTELT5]], align 4 +; CHECK-NEXT: store float [[DOTUNPACK_UNPACK]], ptr [[VOUT]], align 16 +; CHECK-NEXT: [[VOUT_REPACK23:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 4 +; CHECK-NEXT: store float [[DOTUNPACK_UNPACK8]], ptr [[VOUT_REPACK23]], align 4 +; CHECK-NEXT: [[VOUT_REPACK25:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 8 +; CHECK-NEXT: store float [[DOTUNPACK_UNPACK10]], ptr [[VOUT_REPACK25]], align 8 +; CHECK-NEXT: [[VOUT_REPACK17:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 12 +; CHECK-NEXT: store float [[DOTUNPACK2_UNPACK]], ptr [[VOUT_REPACK17]], align 4 +; CHECK-NEXT: [[VOUT_REPACK17_REPACK27:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 16 +; CHECK-NEXT: store float [[DOTUNPACK2_UNPACK13]], ptr [[VOUT_REPACK17_REPACK27]], align 16 +; CHECK-NEXT: [[VOUT_REPACK17_REPACK29:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 20 +; CHECK-NEXT: store float [[DOTUNPACK2_UNPACK15]], ptr [[VOUT_REPACK17_REPACK29]], align 4 +; CHECK-NEXT: [[VOUT_REPACK19:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 24 +; CHECK-NEXT: store i32 [[DOTUNPACK4]], ptr [[VOUT_REPACK19]], align 8 +; CHECK-NEXT: [[VOUT_REPACK21:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 28 +; CHECK-NEXT: store i32 [[DOTUNPACK6]], ptr [[VOUT_REPACK21]], align 4 +; CHECK-NEXT: ret void +; + %.unpack.unpack = load float, ptr %v, align 16 + %.unpack.elt7 = getelementptr inbounds nuw i8, ptr %v, i64 4 + %.unpack.unpack8 = load float, ptr %.unpack.elt7, align 4 + %.unpack.elt9 = getelementptr inbounds nuw i8, ptr %v, i64 8 + %.unpack.unpack10 = load float, ptr %.unpack.elt9, align 8 + %.elt1 = getelementptr inbounds nuw i8, ptr %v, i64 12 + %.unpack2.unpack = load float, ptr %.elt1, align 4 + %.unpack2.elt12 = getelementptr inbounds nuw i8, ptr %v, i64 16 + %.unpack2.unpack13 = load float, ptr %.unpack2.elt12, align 4 + %.unpack2.elt14 = getelementptr inbounds nuw i8, ptr %v, i64 20 + %.unpack2.unpack15 = load float, ptr %.unpack2.elt14, align 4 + %.elt3 = getelementptr inbounds nuw i8, ptr %v, i64 24 + %.unpack4 = load i32, ptr %.elt3, align 8 + %.elt5 = getelementptr inbounds nuw i8, ptr %v, i64 28 + %.unpack6 = load i32, ptr %.elt5, align 4 + store float %.unpack.unpack, ptr %vout, align 16 + %vout.repack23 = getelementptr inbounds nuw i8, ptr %vout, i64 4 + store float %.unpack.unpack8, ptr %vout.repack23, align 4 + %vout.repack25 = getelementptr inbounds nuw i8, ptr %vout, i64 8 + store float %.unpack.unpack10, ptr %vout.repack25, align 8 + %vout.repack17 = getelementptr inbounds nuw i8, ptr %vout, i64 12 + store float %.unpack2.unpack, ptr %vout.repack17, align 4 + %vout.repack17.repack27 = getelementptr inbounds nuw i8, ptr %vout, i64 16 + store float %.unpack2.unpack13, ptr %vout.repack17.repack27, align 4 + %vout.repack17.repack29 = getelementptr inbounds nuw i8, ptr %vout, i64 20 + store float %.unpack2.unpack15, ptr %vout.repack17.repack29, align 4 + %vout.repack19 = getelementptr inbounds nuw i8, ptr %vout, i64 24 + store i32 %.unpack4, ptr %vout.repack19, align 8 + %vout.repack21 = getelementptr inbounds nuw i8, ptr %vout, i64 28 + store i32 %.unpack6, ptr %vout.repack21, align 4 + ret void +} + +; ------------------------------------------------------------------------------ +; Test that alignment is not propagated from a source that does not dominate the destination +; ------------------------------------------------------------------------------ + +define void @no_prop_align(ptr %v, ptr %vout, i1 %cond) { +; CHECK-LABEL: define void @no_prop_align( +; CHECK-SAME: ptr [[V:%.*]], ptr [[VOUT:%.*]], i1 [[COND:%.*]]) { +; CHECK-NEXT: br i1 [[COND]], label %[[BRANCH1:.*]], label %[[BRANCH2:.*]] +; CHECK: [[BRANCH1]]: +; CHECK-NEXT: [[DOTUNPACK_UNPACK:%.*]] = load float, ptr [[V]], align 16 +; CHECK-NEXT: [[DOTUNPACK_ELT7:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 4 +; CHECK-NEXT: [[DOTUNPACK_UNPACK8:%.*]] = load float, ptr [[DOTUNPACK_ELT7]], align 4 +; CHECK-NEXT: [[DOTUNPACK_ELT9:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 8 +; CHECK-NEXT: [[DOTUNPACK_UNPACK10:%.*]] = load float, ptr [[DOTUNPACK_ELT9]], align 8 +; CHECK-NEXT: [[DOTELT1:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 12 +; CHECK-NEXT: [[DOTUNPACK2_UNPACK:%.*]] = load float, ptr [[DOTELT1]], align 4 +; CHECK-NEXT: br label %[[END:.*]] +; CHECK: [[BRANCH2]]: +; CHECK-NEXT: [[DOTUNPACK2_ELT12:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 16 +; CHECK-NEXT: [[DOTUNPACK2_UNPACK13:%.*]] = load float, ptr [[DOTUNPACK2_ELT12]], align 4 +; CHECK-NEXT: [[DOTUNPACK2_ELT14:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 20 +; CHECK-NEXT: [[DOTUNPACK2_UNPACK15:%.*]] = load float, ptr [[DOTUNPACK2_ELT14]], align 4 +; CHECK-NEXT: [[DOTELT3:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 24 +; CHECK-NEXT: [[DOTUNPACK4:%.*]] = load i32, ptr [[DOTELT3]], align 8 +; CHECK-NEXT: [[DOTELT5:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 28 +; CHECK-NEXT: [[DOTUNPACK6:%.*]] = load i32, ptr [[DOTELT5]], align 4 +; CHECK-NEXT: br label %[[END]] +; CHECK: [[END]]: +; CHECK-NEXT: ret void +; + br i1 %cond, label %branch1, label %branch2 + +branch1: + %.unpack.unpack = load float, ptr %v, align 16 + %.unpack.elt7 = getelementptr inbounds nuw i8, ptr %v, i64 4 + %.unpack.unpack8 = load float, ptr %.unpack.elt7, align 4 + %.unpack.elt9 = getelementptr inbounds nuw i8, ptr %v, i64 8 + %.unpack.unpack10 = load float, ptr %.unpack.elt9, align 8 + %.elt1 = getelementptr inbounds nuw i8, ptr %v, i64 12 + %.unpack2.unpack = load float, ptr %.elt1, align 4 + br label %end + +branch2: + %.unpack2.elt12 = getelementptr inbounds nuw i8, ptr %v, i64 16 + %.unpack2.unpack13 = load float, ptr %.unpack2.elt12, align 4 + %.unpack2.elt14 = getelementptr inbounds nuw i8, ptr %v, i64 20 + %.unpack2.unpack15 = load float, ptr %.unpack2.elt14, align 4 + %.elt3 = getelementptr inbounds nuw i8, ptr %v, i64 24 + %.unpack4 = load i32, ptr %.elt3, align 8 + %.elt5 = getelementptr inbounds nuw i8, ptr %v, i64 28 + %.unpack6 = load i32, ptr %.elt5, align 4 + br label %end + +end: + ret void +} + +; ------------------------------------------------------------------------------ +; Test that we can propagate to/from negative offset GEPs +; ------------------------------------------------------------------------------ + +define void @prop_align_negative_offset(ptr %v) { +; CHECK-LABEL: define void @prop_align_negative_offset( +; CHECK-SAME: ptr [[V:%.*]]) { +; CHECK-NEXT: [[LOADALIGNED:%.*]] = load float, ptr [[V]], align 16 +; CHECK-NEXT: [[GEPNEGATIVE:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 -16 +; CHECK-NEXT: [[LOADUNALIGNED:%.*]] = load float, ptr [[GEPNEGATIVE]], align 16 +; CHECK-NEXT: ret void +; + %loadAligned= load float, ptr %v, align 16 + %gepNegative = getelementptr inbounds nuw i8, ptr %v, i64 -16 + %loadUnaligned = load float, ptr %gepNegative, align 4 + ret void +} + +define void @prop_align_negative_offset_2(ptr %v) { +; CHECK-LABEL: define void @prop_align_negative_offset_2( +; CHECK-SAME: ptr [[V:%.*]]) { +; CHECK-NEXT: [[GEPNEGATIVE:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 -16 +; CHECK-NEXT: [[LOADALIGNED:%.*]] = load float, ptr [[GEPNEGATIVE]], align 16 +; CHECK-NEXT: [[LOADUNALIGNED:%.*]] = load float, ptr [[V]], align 16 +; CHECK-NEXT: ret void +; + %gepNegative = getelementptr inbounds nuw i8, ptr %v, i64 -16 + %loadAligned = load float, ptr %gepNegative, align 16 + %loadUnaligned= load float, ptr %v, align 4 + ret void +} + +define void @prop_align_negative_offset_3(ptr %v) { +; CHECK-LABEL: define void @prop_align_negative_offset_3( +; CHECK-SAME: ptr [[V:%.*]]) { +; CHECK-NEXT: [[LOADALIGNED:%.*]] = load float, ptr [[V]], align 16 +; CHECK-NEXT: [[GEPNEGATIVE:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 -8 +; CHECK-NEXT: [[LOADUNALIGNED:%.*]] = load float, ptr [[GEPNEGATIVE]], align 8 +; CHECK-NEXT: ret void +; + %loadAligned= load float, ptr %v, align 16 + %gepNegative = getelementptr inbounds nuw i8, ptr %v, i64 -8 + %loadUnaligned = load float, ptr %gepNegative, align 4 + ret void +} + +define void @prop_align_negative_offset_4(ptr %v) { +; CHECK-LABEL: define void @prop_align_negative_offset_4( +; CHECK-SAME: ptr [[V:%.*]]) { +; CHECK-NEXT: [[LOADALIGNED:%.*]] = load float, ptr [[V]], align 16 +; CHECK-NEXT: [[GEPNEGATIVE:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 -20 +; CHECK-NEXT: [[LOADUNALIGNED:%.*]] = load float, ptr [[GEPNEGATIVE]], align 4 +; CHECK-NEXT: ret void +; + %loadAligned= load float, ptr %v, align 16 + %gepNegative = getelementptr inbounds nuw i8, ptr %v, i64 -20 + %loadUnaligned = load float, ptr %gepNegative, align 4 + ret void +} diff --git a/llvm/test/Transforms/PhaseOrdering/X86/masked-memory-ops-with-cf.ll b/llvm/test/Transforms/PhaseOrdering/X86/masked-memory-ops-with-cf.ll index 405a26de3d6af..c649f29effeda 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/masked-memory-ops-with-cf.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/masked-memory-ops-with-cf.ll @@ -13,7 +13,7 @@ define void @basic(i1 %cond, ptr %b, ptr %p, ptr %q) { ; CHECK-NEXT: [[TMP5:%.*]] = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr [[B:%.*]], i32 8, <1 x i1> [[TMP0]], <1 x i64> poison) ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <1 x i16> -; CHECK-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> [[TMP7]], ptr [[B]], i32 2, <1 x i1> [[TMP0]]) +; CHECK-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> [[TMP7]], ptr [[B]], i32 8, <1 x i1> [[TMP0]]) ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP4]] to <1 x i32> ; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP8]], ptr [[P]], i32 4, <1 x i1> [[TMP0]]) ; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP6]] to <1 x i64> _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits