Author: Drew Kersnar Date: 2025-08-19T11:13:57-05:00 New Revision: 069ad2353c4bd32b883e2d5ce4a7f191bf6098e1
URL: https://github.com/llvm/llvm-project/commit/069ad2353c4bd32b883e2d5ce4a7f191bf6098e1 DIFF: https://github.com/llvm/llvm-project/commit/069ad2353c4bd32b883e2d5ce4a7f191bf6098e1.diff LOG: [NVPTXLowerArgs] Add align attribute to return value of addrspace.wrap intrinsic (#153889) If alignment inference happens after NVPTXLowerArgs these addrspace wrap intrinsics can prevent computeKnownBits from deriving alignment of loads/stores from parameters. To solve this, we can insert an alignment annotation on the generated intrinsic so that computeKnownBits does not need to traverse through it to find the alignment. Added: llvm/test/CodeGen/NVPTX/lower-args-alignment.ll Modified: llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll llvm/test/CodeGen/NVPTX/lower-args.ll llvm/test/CodeGen/NVPTX/lower-byval-args.ll Removed: ################################################################################ diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 2445005bf98ce..520ce4deb9a57 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -1027,9 +1027,16 @@ static inline bool isAddLike(const SDValue V) { (V->getOpcode() == ISD::OR && V->getFlags().hasDisjoint()); } +static SDValue stripAssertAlign(SDValue N) { + if (N.getOpcode() == ISD::AssertAlign) + N = N.getOperand(0); + return N; +} + // selectBaseADDR - Match a dag node which will serve as the base address for an // ADDR operand pair. static SDValue selectBaseADDR(SDValue N, SelectionDAG *DAG) { + N = stripAssertAlign(N); if (const auto *GA = dyn_cast<GlobalAddressSDNode>(N)) return DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N), GA->getValueType(0), GA->getOffset(), @@ -1044,6 +1051,7 @@ static SDValue selectBaseADDR(SDValue N, SelectionDAG *DAG) { } static SDValue accumulateOffset(SDValue &Addr, SDLoc DL, SelectionDAG *DAG) { + Addr = stripAssertAlign(Addr); APInt AccumulatedOffset(64u, 0); while (isAddLike(Addr)) { const auto *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); @@ -1055,7 +1063,7 @@ static SDValue accumulateOffset(SDValue &Addr, SDLoc DL, SelectionDAG *DAG) { break; AccumulatedOffset += CI; - Addr = Addr->getOperand(0); + Addr = stripAssertAlign(Addr->getOperand(0)); } return DAG->getSignedTargetConstant(AccumulatedOffset.getSExtValue(), DL, MVT::i32); diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp index f4362fe8d9056..e2bbe57c0085c 100644 --- a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp @@ -412,6 +412,22 @@ static void adjustByValArgAlignment(Argument *Arg, Value *ArgInParamAS, } } +// Create a call to the nvvm_internal_addrspace_wrap intrinsic and set the +// alignment of the return value based on the alignment of the argument. +static CallInst *createNVVMInternalAddrspaceWrap(IRBuilder<> &IRB, + Argument &Arg) { + CallInst *ArgInParam = + IRB.CreateIntrinsic(Intrinsic::nvvm_internal_addrspace_wrap, + {IRB.getPtrTy(ADDRESS_SPACE_PARAM), Arg.getType()}, + &Arg, {}, Arg.getName() + ".param"); + + if (MaybeAlign ParamAlign = Arg.getParamAlign()) + ArgInParam->addRetAttr( + Attribute::getWithAlignment(ArgInParam->getContext(), *ParamAlign)); + + return ArgInParam; +} + namespace { struct ArgUseChecker : PtrUseVisitor<ArgUseChecker> { using Base = PtrUseVisitor<ArgUseChecker>; @@ -515,10 +531,7 @@ void copyByValParam(Function &F, Argument &Arg) { Arg.getParamAlign().value_or(DL.getPrefTypeAlign(StructType))); Arg.replaceAllUsesWith(AllocA); - Value *ArgInParam = - IRB.CreateIntrinsic(Intrinsic::nvvm_internal_addrspace_wrap, - {IRB.getPtrTy(ADDRESS_SPACE_PARAM), Arg.getType()}, - &Arg, {}, Arg.getName()); + CallInst *ArgInParam = createNVVMInternalAddrspaceWrap(IRB, Arg); // Be sure to propagate alignment to this load; LLVM doesn't know that NVPTX // addrspacecast preserves alignment. Since params are constant, this load @@ -549,9 +562,7 @@ static void handleByValParam(const NVPTXTargetMachine &TM, Argument *Arg) { SmallVector<Use *, 16> UsesToUpdate(llvm::make_pointer_range(Arg->uses())); IRBuilder<> IRB(&*FirstInst); - Value *ArgInParamAS = IRB.CreateIntrinsic( - Intrinsic::nvvm_internal_addrspace_wrap, - {IRB.getPtrTy(ADDRESS_SPACE_PARAM), Arg->getType()}, {Arg}); + CallInst *ArgInParamAS = createNVVMInternalAddrspaceWrap(IRB, *Arg); for (Use *U : UsesToUpdate) convertToParamAS(U, ArgInParamAS, HasCvtaParam, IsGridConstant); @@ -581,10 +592,7 @@ static void handleByValParam(const NVPTXTargetMachine &TM, Argument *Arg) { // argument already in the param address space, we need to use the noop // intrinsic, this had the added benefit of preventing other optimizations // from folding away this pair of addrspacecasts. - auto *ParamSpaceArg = - IRB.CreateIntrinsic(Intrinsic::nvvm_internal_addrspace_wrap, - {IRB.getPtrTy(ADDRESS_SPACE_PARAM), Arg->getType()}, - Arg, {}, Arg->getName() + ".param"); + auto *ParamSpaceArg = createNVVMInternalAddrspaceWrap(IRB, *Arg); // Cast param address to generic address space. Value *GenericArg = IRB.CreateAddrSpaceCast( diff --git a/llvm/test/CodeGen/NVPTX/lower-args-alignment.ll b/llvm/test/CodeGen/NVPTX/lower-args-alignment.ll new file mode 100644 index 0000000000000..2051f6305cc03 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/lower-args-alignment.ll @@ -0,0 +1,36 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=nvptx-lower-args,infer-alignment -S | FileCheck %s + +target triple = "nvptx64-nvidia-cuda" + +; ------------------------------------------------------------------------------ +; Test that alignment can be inferred through llvm.nvvm.internal.addrspace.wrap.p101.p0 intrinsics +; thanks to the alignment attribute on the intrinsic +; ------------------------------------------------------------------------------ + +%struct.S1 = type { i32, i32, i32, i32 } +define ptx_kernel i32 @test_align8(ptr noundef readonly byval(%struct.S1) align 8 captures(none) %params) { +; CHECK-LABEL: define ptx_kernel i32 @test_align8( +; CHECK-SAME: ptr noundef readonly byval([[STRUCT_S1:%.*]]) align 8 captures(none) [[PARAMS:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call align 8 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[PARAMS]]) +; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(101) [[TMP0]], align 8 +; CHECK-NEXT: ret i32 [[LOAD]] +; +entry: + %load = load i32, ptr %params, align 4 + ret i32 %load +} + +define ptx_kernel i32 @test_align1(ptr noundef readonly byval(%struct.S1) align 1 captures(none) %params) { +; CHECK-LABEL: define ptx_kernel i32 @test_align1( +; CHECK-SAME: ptr noundef readonly byval([[STRUCT_S1:%.*]]) align 4 captures(none) [[PARAMS:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call align 1 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[PARAMS]]) +; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(101) [[TMP0]], align 4 +; CHECK-NEXT: ret i32 [[LOAD]] +; +entry: + %load = load i32, ptr %params, align 4 + ret i32 %load +} diff --git a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll index 045704bdcd3fc..f5df0fcde1883 100644 --- a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll +++ b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll @@ -72,7 +72,7 @@ define ptx_kernel void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %inpu ; PTX-NEXT: ret; ; OPT-LABEL: define ptx_kernel void @grid_const_int( ; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], i32 [[INPUT2:%.*]], ptr [[OUT:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { -; OPT-NEXT: [[INPUT11:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]]) +; OPT-NEXT: [[INPUT11:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]]) ; OPT-NEXT: [[TMP:%.*]] = load i32, ptr addrspace(101) [[INPUT11]], align 4 ; OPT-NEXT: [[ADD:%.*]] = add i32 [[TMP]], [[INPUT2]] ; OPT-NEXT: store i32 [[ADD]], ptr [[OUT]], align 4 @@ -101,7 +101,7 @@ define ptx_kernel void @grid_const_struct(ptr byval(%struct.s) align 4 %input, p ; PTX-NEXT: ret; ; OPT-LABEL: define ptx_kernel void @grid_const_struct( ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[OUT:%.*]]) #[[ATTR0]] { -; OPT-NEXT: [[INPUT1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]]) +; OPT-NEXT: [[INPUT1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]]) ; OPT-NEXT: [[GEP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 0 ; OPT-NEXT: [[GEP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 1 ; OPT-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(101) [[GEP13]], align 4 @@ -137,7 +137,7 @@ define ptx_kernel void @grid_const_escape(ptr byval(%struct.s) align 4 %input) { ; PTX-NEXT: ret; ; OPT-LABEL: define ptx_kernel void @grid_const_escape( ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]]) #[[ATTR0]] { -; OPT-NEXT: [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]]) +; OPT-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]]) ; OPT-NEXT: [[INPUT_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr ; OPT-NEXT: [[CALL:%.*]] = call i32 @escape(ptr [[INPUT_PARAM_GEN]]) ; OPT-NEXT: ret void @@ -180,9 +180,9 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4 ; PTX-NEXT: ret; ; OPT-LABEL: define ptx_kernel void @multiple_grid_const_escape( ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], i32 [[A:%.*]], ptr byval(i32) align 4 [[B:%.*]]) #[[ATTR0]] { -; OPT-NEXT: [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[B]]) +; OPT-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[B]]) ; OPT-NEXT: [[B_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr -; OPT-NEXT: [[TMP2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]]) +; OPT-NEXT: [[TMP2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]]) ; OPT-NEXT: [[INPUT_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP2]] to ptr ; OPT-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; OPT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 @@ -208,7 +208,7 @@ define ptx_kernel void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %i ; PTX-NEXT: ret; ; OPT-LABEL: define ptx_kernel void @grid_const_memory_escape( ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[ADDR:%.*]]) #[[ATTR0]] { -; OPT-NEXT: [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]]) +; OPT-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]]) ; OPT-NEXT: [[INPUT1:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr ; OPT-NEXT: store ptr [[INPUT1]], ptr [[ADDR]], align 8 ; OPT-NEXT: ret void @@ -235,7 +235,7 @@ define ptx_kernel void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4 ; PTX-NOT .local ; OPT-LABEL: define ptx_kernel void @grid_const_inlineasm_escape( ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[RESULT:%.*]]) #[[ATTR0]] { -; OPT-NEXT: [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]]) +; OPT-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]]) ; OPT-NEXT: [[INPUT1:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr ; OPT-NEXT: [[TMPPTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 0 ; OPT-NEXT: [[TMPPTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 1 @@ -357,7 +357,7 @@ define ptx_kernel void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr ; PTX-NEXT: ret; ; OPT-LABEL: define ptx_kernel void @grid_const_phi( ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] { -; OPT-NEXT: [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]]) +; OPT-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]]) ; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr ; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4 ; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0 @@ -416,7 +416,7 @@ define ptx_kernel void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1, ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] { ; OPT-NEXT: [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]]) ; OPT-NEXT: [[INPUT2_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr -; OPT-NEXT: [[TMP2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]]) +; OPT-NEXT: [[TMP2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]]) ; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP2]] to ptr ; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4 ; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0 @@ -471,7 +471,7 @@ define ptx_kernel void @grid_const_select(ptr byval(i32) align 4 %input1, ptr by ; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] { ; OPT-NEXT: [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]]) ; OPT-NEXT: [[INPUT2_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr -; OPT-NEXT: [[TMP2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]]) +; OPT-NEXT: [[TMP2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]]) ; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP2]] to ptr ; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4 ; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0 @@ -520,7 +520,7 @@ declare void @device_func(ptr byval(i32) align 4) define ptx_kernel void @test_forward_byval_arg(ptr byval(i32) align 4 %input) { ; OPT-LABEL: define ptx_kernel void @test_forward_byval_arg( ; OPT-SAME: ptr byval(i32) align 4 [[INPUT:%.*]]) #[[ATTR0]] { -; OPT-NEXT: [[INPUT_PARAM:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]]) +; OPT-NEXT: [[INPUT_PARAM:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]]) ; OPT-NEXT: [[INPUT_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[INPUT_PARAM]] to ptr ; OPT-NEXT: call void @device_func(ptr byval(i32) align 4 [[INPUT_PARAM_GEN]]) ; OPT-NEXT: ret void diff --git a/llvm/test/CodeGen/NVPTX/lower-args.ll b/llvm/test/CodeGen/NVPTX/lower-args.ll index 7c029ab516d6e..b4a51035c6610 100644 --- a/llvm/test/CodeGen/NVPTX/lower-args.ll +++ b/llvm/test/CodeGen/NVPTX/lower-args.ll @@ -200,7 +200,7 @@ define ptx_kernel void @ptr_as_int(i64 noundef %i, i32 noundef %v) { define ptx_kernel void @ptr_as_int_aggr(ptr nocapture noundef readonly byval(%struct.S) align 8 %s, i32 noundef %v) { ; IRC-LABEL: define ptx_kernel void @ptr_as_int_aggr( ; IRC-SAME: ptr noundef readonly byval([[STRUCT_S:%.*]]) align 8 captures(none) [[S:%.*]], i32 noundef [[V:%.*]]) { -; IRC-NEXT: [[S3:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]]) +; IRC-NEXT: [[S3:%.*]] = call align 8 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]]) ; IRC-NEXT: [[I:%.*]] = load i64, ptr addrspace(101) [[S3]], align 8 ; IRC-NEXT: [[P:%.*]] = inttoptr i64 [[I]] to ptr ; IRC-NEXT: [[P1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1) @@ -210,7 +210,7 @@ define ptx_kernel void @ptr_as_int_aggr(ptr nocapture noundef readonly byval(%st ; ; IRO-LABEL: define ptx_kernel void @ptr_as_int_aggr( ; IRO-SAME: ptr noundef readonly byval([[STRUCT_S:%.*]]) align 8 captures(none) [[S:%.*]], i32 noundef [[V:%.*]]) { -; IRO-NEXT: [[S1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]]) +; IRO-NEXT: [[S1:%.*]] = call align 8 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]]) ; IRO-NEXT: [[I:%.*]] = load i64, ptr addrspace(101) [[S1]], align 8 ; IRO-NEXT: [[P:%.*]] = inttoptr i64 [[I]] to ptr ; IRO-NEXT: store i32 [[V]], ptr [[P]], align 4 diff --git a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll index 20a35198c3c16..4d36ff9496ede 100644 --- a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll +++ b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll @@ -32,7 +32,7 @@ define dso_local ptx_kernel void @read_only(ptr nocapture noundef writeonly %out ; LOWER-ARGS-LABEL: define dso_local ptx_kernel void @read_only( ; LOWER-ARGS-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; LOWER-ARGS-NEXT: [[ENTRY:.*:]] -; LOWER-ARGS-NEXT: [[S3:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]]) +; LOWER-ARGS-NEXT: [[S3:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]]) ; LOWER-ARGS-NEXT: [[I:%.*]] = load i32, ptr addrspace(101) [[S3]], align 4 ; LOWER-ARGS-NEXT: store i32 [[I]], ptr [[OUT]], align 4 ; LOWER-ARGS-NEXT: ret void @@ -66,7 +66,7 @@ define dso_local ptx_kernel void @read_only_gep(ptr nocapture noundef writeonly ; LOWER-ARGS-LABEL: define dso_local ptx_kernel void @read_only_gep( ; LOWER-ARGS-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; LOWER-ARGS-NEXT: [[ENTRY:.*:]] -; LOWER-ARGS-NEXT: [[S3:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]]) +; LOWER-ARGS-NEXT: [[S3:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]]) ; LOWER-ARGS-NEXT: [[B4:%.*]] = getelementptr inbounds i8, ptr addrspace(101) [[S3]], i64 4 ; LOWER-ARGS-NEXT: [[I:%.*]] = load i32, ptr addrspace(101) [[B4]], align 4 ; LOWER-ARGS-NEXT: store i32 [[I]], ptr [[OUT]], align 4 @@ -128,7 +128,7 @@ define dso_local ptx_kernel void @escape_ptr(ptr nocapture noundef readnone %out ; COMMON-SAME: ptr noundef readnone captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; COMMON-NEXT: [[ENTRY:.*:]] ; COMMON-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4 -; COMMON-NEXT: [[S2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]]) +; COMMON-NEXT: [[S2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]]) ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false) ; COMMON-NEXT: call void @_Z6escapePv(ptr noundef nonnull [[S1]]) #[[ATTR6:[0-9]+]] ; COMMON-NEXT: ret void @@ -167,7 +167,7 @@ define dso_local ptx_kernel void @escape_ptr_gep(ptr nocapture noundef readnone ; COMMON-SAME: ptr noundef readnone captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; COMMON-NEXT: [[ENTRY:.*:]] ; COMMON-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4 -; COMMON-NEXT: [[S2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]]) +; COMMON-NEXT: [[S2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]]) ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false) ; COMMON-NEXT: [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S1]], i64 4 ; COMMON-NEXT: call void @_Z6escapePv(ptr noundef nonnull [[B]]) #[[ATTR6]] @@ -209,7 +209,7 @@ define dso_local ptx_kernel void @escape_ptr_store(ptr nocapture noundef writeon ; COMMON-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; COMMON-NEXT: [[ENTRY:.*:]] ; COMMON-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4 -; COMMON-NEXT: [[S2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]]) +; COMMON-NEXT: [[S2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]]) ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false) ; COMMON-NEXT: store ptr [[S1]], ptr [[OUT]], align 8 ; COMMON-NEXT: ret void @@ -246,7 +246,7 @@ define dso_local ptx_kernel void @escape_ptr_gep_store(ptr nocapture noundef wri ; COMMON-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; COMMON-NEXT: [[ENTRY:.*:]] ; COMMON-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4 -; COMMON-NEXT: [[S2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]]) +; COMMON-NEXT: [[S2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]]) ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false) ; COMMON-NEXT: [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S1]], i64 4 ; COMMON-NEXT: store ptr [[B]], ptr [[OUT]], align 8 @@ -286,7 +286,7 @@ define dso_local ptx_kernel void @escape_ptrtoint(ptr nocapture noundef writeonl ; COMMON-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; COMMON-NEXT: [[ENTRY:.*:]] ; COMMON-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4 -; COMMON-NEXT: [[S2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]]) +; COMMON-NEXT: [[S2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]]) ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false) ; COMMON-NEXT: [[I:%.*]] = ptrtoint ptr [[S1]] to i64 ; COMMON-NEXT: store i64 [[I]], ptr [[OUT]], align 8 @@ -324,7 +324,7 @@ define dso_local ptx_kernel void @memcpy_from_param(ptr nocapture noundef writeo ; LOWER-ARGS-LABEL: define dso_local ptx_kernel void @memcpy_from_param( ; LOWER-ARGS-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; LOWER-ARGS-NEXT: [[ENTRY:.*:]] -; LOWER-ARGS-NEXT: [[S3:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]]) +; LOWER-ARGS-NEXT: [[S3:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]]) ; LOWER-ARGS-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr [[OUT]], ptr addrspace(101) [[S3]], i64 16, i1 true) ; LOWER-ARGS-NEXT: ret void ; @@ -445,7 +445,7 @@ define dso_local ptx_kernel void @memcpy_to_param(ptr nocapture noundef readonly ; COMMON-SAME: ptr noundef readonly captures(none) [[IN:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; COMMON-NEXT: [[ENTRY:.*:]] ; COMMON-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4 -; COMMON-NEXT: [[S2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]]) +; COMMON-NEXT: [[S2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]]) ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false) ; COMMON-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[S1]], ptr [[IN]], i64 16, i1 true) ; COMMON-NEXT: ret void @@ -525,7 +525,7 @@ define dso_local ptx_kernel void @copy_on_store(ptr nocapture noundef readonly % ; COMMON-SAME: ptr noundef readonly captures(none) [[IN:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]], i1 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { ; COMMON-NEXT: [[BB:.*:]] ; COMMON-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4 -; COMMON-NEXT: [[S2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]]) +; COMMON-NEXT: [[S2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]]) ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false) ; COMMON-NEXT: [[I:%.*]] = load i32, ptr [[IN]], align 4 ; COMMON-NEXT: store i32 [[I]], ptr [[S1]], align 4 @@ -551,7 +551,7 @@ define ptx_kernel void @test_select(ptr byval(i32) align 4 %input1, ptr byval(i3 ; SM_60-NEXT: [[INPUT25:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]]) ; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT24]], ptr addrspace(101) align 4 [[INPUT25]], i64 4, i1 false) ; SM_60-NEXT: [[INPUT11:%.*]] = alloca i32, align 4 -; SM_60-NEXT: [[INPUT12:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]]) +; SM_60-NEXT: [[INPUT12:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]]) ; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 4, i1 false) ; SM_60-NEXT: [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT11]], ptr [[INPUT24]] ; SM_60-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4 @@ -563,7 +563,7 @@ define ptx_kernel void @test_select(ptr byval(i32) align 4 %input1, ptr byval(i3 ; SM_70-NEXT: [[BB:.*:]] ; SM_70-NEXT: [[TMP0:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]]) ; SM_70-NEXT: [[INPUT2_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP0]] to ptr -; SM_70-NEXT: [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]]) +; SM_70-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]]) ; SM_70-NEXT: [[INPUT1_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr ; SM_70-NEXT: [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT1_PARAM_GEN]], ptr [[INPUT2_PARAM_GEN]] ; SM_70-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4 @@ -577,7 +577,7 @@ define ptx_kernel void @test_select(ptr byval(i32) align 4 %input1, ptr byval(i3 ; COPY-NEXT: [[INPUT24:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]]) ; COPY-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT23]], ptr addrspace(101) align 4 [[INPUT24]], i64 4, i1 false) ; COPY-NEXT: [[INPUT11:%.*]] = alloca i32, align 4 -; COPY-NEXT: [[INPUT12:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]]) +; COPY-NEXT: [[INPUT12:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]]) ; COPY-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 4, i1 false) ; COPY-NEXT: [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT11]], ptr [[INPUT23]] ; COPY-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4 @@ -637,7 +637,7 @@ define ptx_kernel void @test_select_write(ptr byval(i32) align 4 %input1, ptr by ; COMMON-NEXT: [[INPUT24:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]]) ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT23]], ptr addrspace(101) align 4 [[INPUT24]], i64 4, i1 false) ; COMMON-NEXT: [[INPUT11:%.*]] = alloca i32, align 4 -; COMMON-NEXT: [[INPUT12:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]]) +; COMMON-NEXT: [[INPUT12:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]]) ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 4, i1 false) ; COMMON-NEXT: [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT11]], ptr [[INPUT23]] ; COMMON-NEXT: store i32 1, ptr [[PTRNEW]], align 4 @@ -682,7 +682,7 @@ define ptx_kernel void @test_phi(ptr byval(%struct.S) align 4 %input1, ptr byval ; SM_60-NEXT: [[INPUT25:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]]) ; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 8 [[INPUT24]], ptr addrspace(101) align 8 [[INPUT25]], i64 8, i1 false) ; SM_60-NEXT: [[INPUT11:%.*]] = alloca [[STRUCT_S]], align 4 -; SM_60-NEXT: [[INPUT12:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]]) +; SM_60-NEXT: [[INPUT12:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]]) ; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 8, i1 false) ; SM_60-NEXT: br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]] ; SM_60: [[FIRST]]: @@ -702,7 +702,7 @@ define ptx_kernel void @test_phi(ptr byval(%struct.S) align 4 %input1, ptr byval ; SM_70-NEXT: [[BB:.*:]] ; SM_70-NEXT: [[TMP0:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]]) ; SM_70-NEXT: [[INPUT2_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP0]] to ptr -; SM_70-NEXT: [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]]) +; SM_70-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]]) ; SM_70-NEXT: [[INPUT1_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr ; SM_70-NEXT: br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]] ; SM_70: [[FIRST]]: @@ -724,7 +724,7 @@ define ptx_kernel void @test_phi(ptr byval(%struct.S) align 4 %input1, ptr byval ; COPY-NEXT: [[INPUT24:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]]) ; COPY-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 8 [[INPUT23]], ptr addrspace(101) align 8 [[INPUT24]], i64 8, i1 false) ; COPY-NEXT: [[INPUT11:%.*]] = alloca [[STRUCT_S]], align 4 -; COPY-NEXT: [[INPUT12:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]]) +; COPY-NEXT: [[INPUT12:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]]) ; COPY-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 8, i1 false) ; COPY-NEXT: br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]] ; COPY: [[FIRST]]: @@ -808,7 +808,7 @@ define ptx_kernel void @test_phi_write(ptr byval(%struct.S) align 4 %input1, ptr ; COMMON-NEXT: [[INPUT25:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]]) ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 8 [[INPUT24]], ptr addrspace(101) align 8 [[INPUT25]], i64 8, i1 false) ; COMMON-NEXT: [[INPUT11:%.*]] = alloca [[STRUCT_S]], align 4 -; COMMON-NEXT: [[INPUT12:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]]) +; COMMON-NEXT: [[INPUT12:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]]) ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 8, i1 false) ; COMMON-NEXT: br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]] ; COMMON: [[FIRST]]: @@ -871,7 +871,7 @@ define ptx_kernel void @test_forward_byval_arg(ptr byval(i32) align 4 %input) { ; COMMON-LABEL: define ptx_kernel void @test_forward_byval_arg( ; COMMON-SAME: ptr byval(i32) align 4 [[INPUT:%.*]]) #[[ATTR3]] { ; COMMON-NEXT: [[INPUT1:%.*]] = alloca i32, align 4 -; COMMON-NEXT: [[INPUT2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]]) +; COMMON-NEXT: [[INPUT2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]]) ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT1]], ptr addrspace(101) align 4 [[INPUT2]], i64 4, i1 false) ; COMMON-NEXT: call void @device_func(ptr byval(i32) align 4 [[INPUT1]]) ; COMMON-NEXT: ret void _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits