llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-mlir Author: jeanPerier <details> <summary>Changes</summary> Implement the new PromotableAliaserInterface for LLVM IR dialect. Note that since LLVM IR pointer are opaque, the type conversion logic is still done by the load/store as before, but this patch allows load/store after bitcast, launder.invariant.group, strip.invariant.group, zero offset getelementptr, and addrspacecast to be candidates for mem2reg promotion. Without this patch, mem2reg does not succeeds on any of the newly added tests. --- Full diff: https://github.com/llvm/llvm-project/pull/199226.diff 4 Files Affected: - (modified) mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td (+4) - (modified) mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td (+8-1) - (modified) mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp (+44) - (modified) mlir/test/Dialect/LLVMIR/mem2reg.mlir (+311) ``````````diff diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td index 688bc19cbf18a..52aa16e3b7a14 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td @@ -403,6 +403,8 @@ def LLVM_InvariantEndOp : LLVM_ZeroResultIntrOp<"invariant.end", [2], def LLVM_LaunderInvariantGroupOp : LLVM_OneResultIntrOp<"launder.invariant.group", [], [0], [DeclareOpInterfaceMethods<PromotableOpInterface>, + DeclareOpInterfaceMethods<PromotableAliaserInterface, + ["getPromotableSlotAliases"]>, SameOperandsAndResultType]> { let arguments = (ins LLVM_AnyPointer:$ptr); let results = (outs LLVM_AnyPointer:$res); @@ -412,6 +414,8 @@ def LLVM_LaunderInvariantGroupOp def LLVM_StripInvariantGroupOp : LLVM_OneResultIntrOp<"strip.invariant.group", [], [0], [DeclareOpInterfaceMethods<PromotableOpInterface>, + DeclareOpInterfaceMethods<PromotableAliaserInterface, + ["getPromotableSlotAliases"]>, SameOperandsAndResultType]> { let arguments = (ins LLVM_AnyPointer:$ptr); let results = (outs LLVM_AnyPointer:$res); diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td index d7c8cf236f0da..b51c689fbdbcd 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -286,6 +286,8 @@ def LLVM_AllocaOp : LLVM_Op<"alloca", def LLVM_GEPOp : LLVM_Op<"getelementptr", [Pure, DeclareOpInterfaceMethods<PromotableOpInterface>, + DeclareOpInterfaceMethods<PromotableAliaserInterface, + ["getPromotableSlotAliases"]>, DeclareOpInterfaceMethods<SafeMemorySlotAccessOpInterface>, DeclareOpInterfaceMethods<DestructurableAccessorOpInterface>, DeclareOpInterfaceMethods<ViewLikeOpInterface> @@ -632,7 +634,10 @@ class LLVM_DereferenceableCastOp<string mnemonic, string instName, Type type, } def LLVM_BitcastOp : LLVM_CastOp<"bitcast", "BitCast", LLVM_AnyNonAggregate, - LLVM_AnyNonAggregate, [DeclareOpInterfaceMethods<PromotableOpInterface>]> { + LLVM_AnyNonAggregate, + [DeclareOpInterfaceMethods<PromotableOpInterface>, + DeclareOpInterfaceMethods<PromotableAliaserInterface, + ["getPromotableSlotAliases"]>]> { let hasFolder = 1; let hasVerifier = 1; } @@ -640,6 +645,8 @@ def LLVM_AddrSpaceCastOp : LLVM_CastOp<"addrspacecast", "AddrSpaceCast", LLVM_ScalarOrVectorOf<LLVM_AnyPointer>, LLVM_ScalarOrVectorOf<LLVM_AnyPointer>, [DeclareOpInterfaceMethods<PromotableOpInterface>, + DeclareOpInterfaceMethods<PromotableAliaserInterface, + ["getPromotableSlotAliases"]>, DeclareOpInterfaceMethods<ViewLikeOpInterface>]> { let hasFolder = 1; } diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp index 66d6a592a93df..c79c1199df770 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp @@ -536,6 +536,18 @@ DeletionKind LLVM::BitcastOp::removeBlockingUses( return DeletionKind::Delete; } +void LLVM::BitcastOp::getPromotableSlotAliases( + OpOperand &aliasedSlotPointerOperand, const MemorySlot &parentSlot, + SmallVectorImpl<MemorySlot> &newMemorySlots) { + // Only pointer-to-pointer bitcasts alias a memory slot. With opaque + // pointers, the alias slot keeps the parent's element type since the + // pointer type itself does not carry it. + if (!isa<LLVM::LLVMPointerType>(getResult().getType()) || + !isa<LLVM::LLVMPointerType>(getArg().getType())) + return; + newMemorySlots.push_back(MemorySlot{getResult(), parentSlot.elemType}); +} + bool LLVM::AddrSpaceCastOp::canUsesBeRemoved( const SmallPtrSetImpl<OpOperand *> &blockingUses, SmallVectorImpl<OpOperand *> &newBlockingUses, @@ -548,6 +560,15 @@ DeletionKind LLVM::AddrSpaceCastOp::removeBlockingUses( return DeletionKind::Delete; } +void LLVM::AddrSpaceCastOp::getPromotableSlotAliases( + OpOperand &aliasedSlotPointerOperand, const MemorySlot &parentSlot, + SmallVectorImpl<MemorySlot> &newMemorySlots) { + // Only the scalar pointer form aliases a memory slot. + if (!isa<LLVM::LLVMPointerType>(getResult().getType())) + return; + newMemorySlots.push_back(MemorySlot{getResult(), parentSlot.elemType}); +} + bool LLVM::LifetimeStartOp::canUsesBeRemoved( const SmallPtrSetImpl<OpOperand *> &blockingUses, SmallVectorImpl<OpOperand *> &newBlockingUses, @@ -608,6 +629,12 @@ DeletionKind LLVM::LaunderInvariantGroupOp::removeBlockingUses( return DeletionKind::Delete; } +void LLVM::LaunderInvariantGroupOp::getPromotableSlotAliases( + OpOperand &aliasedSlotPointerOperand, const MemorySlot &parentSlot, + SmallVectorImpl<MemorySlot> &newMemorySlots) { + newMemorySlots.push_back(MemorySlot{getResult(), parentSlot.elemType}); +} + bool LLVM::StripInvariantGroupOp::canUsesBeRemoved( const SmallPtrSetImpl<OpOperand *> &blockingUses, SmallVectorImpl<OpOperand *> &newBlockingUses, @@ -620,6 +647,12 @@ DeletionKind LLVM::StripInvariantGroupOp::removeBlockingUses( return DeletionKind::Delete; } +void LLVM::StripInvariantGroupOp::getPromotableSlotAliases( + OpOperand &aliasedSlotPointerOperand, const MemorySlot &parentSlot, + SmallVectorImpl<MemorySlot> &newMemorySlots) { + newMemorySlots.push_back(MemorySlot{getResult(), parentSlot.elemType}); +} + bool LLVM::DbgDeclareOp::canUsesBeRemoved( const SmallPtrSetImpl<OpOperand *> &blockingUses, SmallVectorImpl<OpOperand *> &newBlockingUses, @@ -694,6 +727,17 @@ DeletionKind LLVM::GEPOp::removeBlockingUses( return DeletionKind::Delete; } +void LLVM::GEPOp::getPromotableSlotAliases( + OpOperand &aliasedSlotPointerOperand, const MemorySlot &parentSlot, + SmallVectorImpl<MemorySlot> &newMemorySlots) { + // Only zero-index GEPs are no-op aliases of the slot pointer; non-zero + // indices step into the slot and cannot be projected back generically. + if (!hasAllZeroIndices(*this) || + !isa<LLVM::LLVMPointerType>(getResult().getType())) + return; + newMemorySlots.push_back(MemorySlot{getResult(), parentSlot.elemType}); +} + /// Returns the amount of bytes the provided GEP elements will offset the /// pointer by. Returns nullopt if no constant offset could be computed. static std::optional<uint64_t> gepToByteOffset(const DataLayout &dataLayout, diff --git a/mlir/test/Dialect/LLVMIR/mem2reg.mlir b/mlir/test/Dialect/LLVMIR/mem2reg.mlir index 3316b4bb955c3..7862789fa0188 100644 --- a/mlir/test/Dialect/LLVMIR/mem2reg.mlir +++ b/mlir/test/Dialect/LLVMIR/mem2reg.mlir @@ -1180,3 +1180,314 @@ llvm.func @dead_direct_use(%arg0 : i1) { } llvm.return } + +// ----- + +// CHECK-LABEL: llvm.func @promote_load_through_bitcast +// CHECK-SAME: (%[[ARG0:.*]]: i32) -> i32 +llvm.func @promote_load_through_bitcast(%arg0: i32) -> i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: llvm.alloca + // CHECK-NOT: llvm.bitcast + %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr + %2 = llvm.bitcast %1 : !llvm.ptr to !llvm.ptr + llvm.store %arg0, %1 : i32, !llvm.ptr + %3 = llvm.load %2 : !llvm.ptr -> i32 + // CHECK: llvm.return %[[ARG0]] : i32 + llvm.return %3 : i32 +} + +// ----- + +// CHECK-LABEL: llvm.func @promote_store_through_bitcast +// CHECK-SAME: (%[[ARG0:.*]]: i32) -> i32 +llvm.func @promote_store_through_bitcast(%arg0: i32) -> i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: llvm.alloca + // CHECK-NOT: llvm.bitcast + %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr + %2 = llvm.bitcast %1 : !llvm.ptr to !llvm.ptr + llvm.store %arg0, %2 : i32, !llvm.ptr + %3 = llvm.load %1 : !llvm.ptr -> i32 + // CHECK: llvm.return %[[ARG0]] : i32 + llvm.return %3 : i32 +} + +// ----- + +// CHECK-LABEL: llvm.func @promote_store_and_load_through_bitcast +// CHECK-SAME: (%[[ARG0:.*]]: i32) -> i32 +llvm.func @promote_store_and_load_through_bitcast(%arg0: i32) -> i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: llvm.alloca + // CHECK-NOT: llvm.bitcast + %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr + %2 = llvm.bitcast %1 : !llvm.ptr to !llvm.ptr + llvm.store %arg0, %2 : i32, !llvm.ptr + %3 = llvm.load %2 : !llvm.ptr -> i32 + // CHECK: llvm.return %[[ARG0]] : i32 + llvm.return %3 : i32 +} + +// ----- + +// CHECK-LABEL: llvm.func @promote_through_chained_bitcasts +// CHECK-SAME: (%[[ARG0:.*]]: i32) -> i32 +llvm.func @promote_through_chained_bitcasts(%arg0: i32) -> i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: llvm.alloca + // CHECK-NOT: llvm.bitcast + %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr + %2 = llvm.bitcast %1 : !llvm.ptr to !llvm.ptr + %3 = llvm.bitcast %2 : !llvm.ptr to !llvm.ptr + llvm.store %arg0, %3 : i32, !llvm.ptr + %4 = llvm.load %3 : !llvm.ptr -> i32 + // CHECK: llvm.return %[[ARG0]] : i32 + llvm.return %4 : i32 +} + +// ----- + +// CHECK-LABEL: llvm.func amdgpu_kernelcc @promote_through_addrspacecast +// CHECK-SAME: (%[[ARG0:.*]]: i32) -> i32 +llvm.func amdgpu_kernelcc @promote_through_addrspacecast(%arg0: i32) -> i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: llvm.alloca + // CHECK-NOT: llvm.addrspacecast + %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr<5> + %2 = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr + llvm.store %arg0, %2 : i32, !llvm.ptr + %3 = llvm.load %2 : !llvm.ptr -> i32 + // CHECK: llvm.return %[[ARG0]] : i32 + llvm.return %3 : i32 +} + +// ----- + +// CHECK-LABEL: llvm.func @promote_through_zero_gep +// CHECK-SAME: (%[[ARG0:.*]]: i32) -> i32 +llvm.func @promote_through_zero_gep(%arg0: i32) -> i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: llvm.alloca + // CHECK-NOT: llvm.getelementptr + %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr + %2 = llvm.getelementptr %1[0] : (!llvm.ptr) -> !llvm.ptr, i32 + llvm.store %arg0, %2 : i32, !llvm.ptr + %3 = llvm.load %2 : !llvm.ptr -> i32 + // CHECK: llvm.return %[[ARG0]] : i32 + llvm.return %3 : i32 +} + +// ----- + +// Non-zero GEPs are not aliases of the whole slot, so promotion must fail. + +// CHECK-LABEL: llvm.func @no_promote_through_nonzero_gep +llvm.func @no_promote_through_nonzero_gep(%arg0: i32) -> i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK: llvm.alloca + %1 = llvm.alloca %0 x !llvm.array<2 x i32> : (i32) -> !llvm.ptr + %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<2 x i32> + llvm.store %arg0, %2 : i32, !llvm.ptr + %3 = llvm.load %2 : !llvm.ptr -> i32 + llvm.return %3 : i32 +} + +// ----- + +// CHECK-LABEL: llvm.func @promote_through_launder_invariant_group +// CHECK-SAME: (%[[ARG0:.*]]: i32) -> i32 +llvm.func @promote_through_launder_invariant_group(%arg0: i32) -> i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: llvm.alloca + // CHECK-NOT: llvm.intr.launder.invariant.group + %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr + %2 = llvm.intr.launder.invariant.group %1 : !llvm.ptr + llvm.store %arg0, %2 : i32, !llvm.ptr + %3 = llvm.load %2 : !llvm.ptr -> i32 + // CHECK: llvm.return %[[ARG0]] : i32 + llvm.return %3 : i32 +} + +// ----- + +// CHECK-LABEL: llvm.func @promote_through_strip_invariant_group +// CHECK-SAME: (%[[ARG0:.*]]: i32) -> i32 +llvm.func @promote_through_strip_invariant_group(%arg0: i32) -> i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: llvm.alloca + // CHECK-NOT: llvm.intr.strip.invariant.group + %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr + %2 = llvm.intr.strip.invariant.group %1 : !llvm.ptr + llvm.store %arg0, %2 : i32, !llvm.ptr + %3 = llvm.load %2 : !llvm.ptr -> i32 + // CHECK: llvm.return %[[ARG0]] : i32 + llvm.return %3 : i32 +} + +// ----- + +// CHECK-LABEL: llvm.func @promote_through_alias_across_blocks +// CHECK-SAME: (%[[COND:.*]]: i1, %[[ARG1:.*]]: i32, %[[ARG2:.*]]: i32) -> i32 +llvm.func @promote_through_alias_across_blocks(%cond: i1, %arg1: i32, %arg2: i32) -> i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: llvm.alloca + // CHECK-NOT: llvm.bitcast + %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr + %2 = llvm.bitcast %1 : !llvm.ptr to !llvm.ptr + llvm.cond_br %cond, ^bb1, ^bb2 +^bb1: + llvm.store %arg1, %2 : i32, !llvm.ptr + // CHECK: llvm.br ^[[BB3:.*]](%[[ARG1]] : i32) + llvm.br ^bb3 +^bb2: + llvm.store %arg2, %1 : i32, !llvm.ptr + // CHECK: llvm.br ^[[BB3]](%[[ARG2]] : i32) + llvm.br ^bb3 +// CHECK: ^[[BB3]](%[[PHI:.*]]: i32): +^bb3: + %3 = llvm.load %2 : !llvm.ptr -> i32 + // CHECK: llvm.return %[[PHI]] : i32 + llvm.return %3 : i32 +} + +// ----- + +// Load through a bitcast alias, with a load type that differs from the slot's +// element type: `createExtractAndCast` must emit a same-size bitcast of the +// reaching definition. + +// CHECK-LABEL: @load_int_from_float_through_bitcast +llvm.func @load_int_from_float_through_bitcast() -> i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: llvm.alloca + // CHECK-NOT: llvm.bitcast %{{.*}} : !llvm.ptr + %1 = llvm.alloca %0 x f32 : (i32) -> !llvm.ptr + %2 = llvm.bitcast %1 : !llvm.ptr to !llvm.ptr + %3 = llvm.load %2 : !llvm.ptr -> i32 + // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef : f32 + // CHECK: %[[CAST:.*]] = llvm.bitcast %[[UNDEF]] : f32 to i32 + // CHECK: llvm.return %[[CAST]] : i32 + llvm.return %3 : i32 +} + +// ----- + +// Store through a bitcast alias, with a store value type that differs from +// the slot's element type: `createInsertAndCast` must emit a same-size bitcast +// of the stored value. + +// CHECK-LABEL: @store_float_to_int_through_bitcast +// CHECK-SAME: %[[ARG:.*]]: f32 +llvm.func @store_float_to_int_through_bitcast(%arg: f32) -> i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: llvm.alloca + // CHECK-NOT: llvm.bitcast %{{.*}} : !llvm.ptr + %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr + %2 = llvm.bitcast %1 : !llvm.ptr to !llvm.ptr + llvm.store %arg, %2 : f32, !llvm.ptr + %3 = llvm.load %1 : !llvm.ptr -> i32 + // CHECK: %[[CAST:.*]] = llvm.bitcast %[[ARG]] : f32 to i32 + // CHECK: llvm.return %[[CAST]] : i32 + llvm.return %3 : i32 +} + +// ----- + +// Same as above, but the load also goes through a bitcast alias. + +// CHECK-LABEL: @store_int_to_vector_through_bitcasts +// CHECK-SAME: %[[ARG:.*]]: i32 +llvm.func @store_int_to_vector_through_bitcasts(%arg: i32) -> vector<4xi8> { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: llvm.alloca + // CHECK-NOT: llvm.bitcast %{{.*}} : !llvm.ptr + %1 = llvm.alloca %0 x vector<2xi16> : (i32) -> !llvm.ptr + %2 = llvm.bitcast %1 : !llvm.ptr to !llvm.ptr + llvm.store %arg, %2 : i32, !llvm.ptr + %3 = llvm.load %2 : !llvm.ptr -> vector<4xi8> + // CHECK: %[[CAST:.*]] = llvm.bitcast %[[ARG]] : i32 to vector<4xi8> + // CHECK: llvm.return %[[CAST]] + llvm.return %3 : vector<4xi8> +} + +// ----- + +// Narrowing load through an addrspacecast alias: `createExtractAndCast` emits +// a truncating sequence. + +// CHECK-LABEL: llvm.func amdgpu_kernelcc @load_smaller_through_addrspacecast +llvm.func amdgpu_kernelcc @load_smaller_through_addrspacecast() -> f32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: llvm.alloca + // CHECK-NOT: llvm.addrspacecast + %1 = llvm.alloca %0 x f64 : (i32) -> !llvm.ptr<5> + %2 = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr + %3 = llvm.load %2 : !llvm.ptr -> f32 + // CHECK: %[[CAST:.*]] = llvm.bitcast %{{.*}} : f64 to i64 + // CHECK: %[[TRUNC:.*]] = llvm.trunc %[[CAST]] : i64 to i32 + // CHECK: %[[RES:.*]] = llvm.bitcast %[[TRUNC]] : i32 to f32 + // CHECK: llvm.return %[[RES]] : f32 + llvm.return %3 : f32 +} + +// ----- + +// Partial store through a zero-index GEP alias: `createInsertAndCast` emits +// the mask-and-combine sequence into the reaching definition projected back +// to the slot's element type. + +// CHECK-LABEL: @partial_store_through_zero_gep +// CHECK-SAME: %[[ARG:.+]]: vector<1xi8> +llvm.func @partial_store_through_zero_gep(%arg: vector<1xi8>) { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: llvm.alloca + // CHECK-NOT: llvm.getelementptr + // CHECK: %[[UNDEF:.+]] = llvm.mlir.undef : f32 + %1 = llvm.alloca %0 x f32 : (i32) -> !llvm.ptr + %2 = llvm.getelementptr %1[0] : (!llvm.ptr) -> !llvm.ptr, f32 + // CHECK: %[[CASTED_DEF:.+]] = llvm.bitcast %[[UNDEF]] : f32 to i32 + // CHECK: %[[CASTED_ARG:.+]] = llvm.bitcast %[[ARG]] : vector<1xi8> to i8 + // CHECK: %[[ZEXT:.+]] = llvm.zext %[[CASTED_ARG]] : i8 to i32 + // CHECK: %[[MASK:.+]] = llvm.mlir.constant(-256 : i32) : i32 + // CHECK: %[[MASKED:.+]] = llvm.and %[[CASTED_DEF]], %[[MASK]] + // CHECK: %[[NEW_DEF:.+]] = llvm.or %[[MASKED]], %[[ZEXT]] + // CHECK: %[[CASTED_NEW_DEF:.+]] = llvm.bitcast %[[NEW_DEF]] : i32 to f32 + llvm.store %arg, %2 : vector<1xi8>, !llvm.ptr + llvm.return +} + +// ----- + +// Cross-block partial store through a launder.invariant.group alias: the +// mask-and-combine value flows through the block-argument added at the join +// point, and the load on the other side reads it back through `createExtract +// AndCast`. + +// CHECK-LABEL: @cross_block_partial_store_through_alias +// CHECK-SAME: (%[[COND:.*]]: i1, %[[ARG:.+]]: i16) -> i32 +llvm.func @cross_block_partial_store_through_alias(%cond: i1, %arg: i16) -> i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: llvm.alloca + // CHECK-NOT: llvm.intr.launder.invariant.group + // CHECK: %[[UNDEF:.+]] = llvm.mlir.undef : i32 + %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr + %2 = llvm.intr.launder.invariant.group %1 : !llvm.ptr + llvm.cond_br %cond, ^bb1, ^bb2 +^bb1: + // CHECK: %[[ZEXT:.+]] = llvm.zext %[[ARG]] : i16 to i32 + // CHECK: %[[MASK:.+]] = llvm.mlir.constant(-65536 : i32) : i32 + // CHECK: %[[MASKED:.+]] = llvm.and %[[UNDEF]], %[[MASK]] + // CHECK: %[[NEW_DEF:.+]] = llvm.or %[[MASKED]], %[[ZEXT]] + // CHECK: llvm.br ^[[BB3:.*]](%[[NEW_DEF]] : i32) + llvm.store %arg, %2 : i16, !llvm.ptr + llvm.br ^bb3 +^bb2: + // CHECK: llvm.br ^[[BB3]](%[[UNDEF]] : i32) + llvm.br ^bb3 +// CHECK: ^[[BB3]](%[[PHI:.*]]: i32): +^bb3: + %3 = llvm.load %2 : !llvm.ptr -> i32 + // CHECK: llvm.return %[[PHI]] : i32 + llvm.return %3 : i32 +} `````````` </details> https://github.com/llvm/llvm-project/pull/199226 _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
