https://github.com/ZakyHermawan created https://github.com/llvm/llvm-project/pull/190087
PoisonAttr already been introduced in https://github.com/llvm/llvm-project/pull/179082, but no lowering exist, this PR also address that. This PR also improve readability of checks in address-spaces.cu by removing information that we did not care. Note: - CIR->LLVM initialize global variables with poison, while OGCG initialize global variables with undef. Because of this, I did not merge the checks between LLVM and OGCG within the same prefix. - I think it is better to use variable name for global variables in this case instead of using regex matching to capture variables (e.g. @[[DEV:.*]]), because we did not use those captures anyway (at least for now) @andykaylor >From a30d3830c28be0ee50f76c836c06393709dfb8c4 Mon Sep 17 00:00:00 2001 From: ZakyHermawan <[email protected]> Date: Thu, 2 Apr 2026 06:49:09 +0700 Subject: [PATCH] [CIR][CUDA] Shadow variables, lower poison attribute, improve test readability Poison PoisonAtt already been introduced, but no lowering exist, this commit address that, Improve readability of checks in address-spaces.cu by removing information that we did not care, Note: - CIR->LLVM initialize global variables with poison, while OGCG initialize global variables with undef. Signed-off-by: ZakyHermawan <[email protected]> --- .../clang/CIR/Dialect/IR/CIRCUDAAttrs.td | 18 ++++++ clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp | 60 ++++++++++++++++++ clang/lib/CIR/CodeGen/CIRGenCUDARuntime.cpp | 19 ++++++ clang/lib/CIR/CodeGen/CIRGenCUDARuntime.h | 13 +++- clang/lib/CIR/CodeGen/CIRGenModule.cpp | 49 ++++++++++++++- clang/lib/CIR/CodeGen/CIRGenModule.h | 7 +++ .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 24 ++++--- clang/test/CIR/CodeGenCUDA/address-spaces.cu | 63 ++++++++++++++----- 8 files changed, 225 insertions(+), 28 deletions(-) diff --git a/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td b/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td index 5932db8323196..d68fb61fd115c 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td +++ b/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td @@ -37,6 +37,24 @@ def CIR_CUDAKernelNameAttr : CIR_Attr<"CUDAKernelName", "cu.kernel_name"> { let canHaveIllegalCXXABIType = 0; } +def CUDAShadowNameAttr : CIR_Attr<"CUDAShadowName", + "cu.shadow_name"> { + let summary = "Device-side global variable name for this shadow."; + let description = + [{ + This attribute is attached to global variable definitions and records the + mangled name of the global variable used on the device. + + In CUDA, __device__, __constant__ and __shared__ variables, as well as + surface and texture variables, will generate a shadow symbol on host. + We must preserve the correspodence in order to generate registration + functions. + }]; + + let parameters = (ins "std::string":$device_side_name); + let assemblyFormat = "`<` $device_side_name `>`"; +} + def CUDAExternallyInitializedAttr : CIR_Attr<"CUDAExternallyInitialized", "cu.externally_initialized"> { let summary = "The marked variable is externally initialized."; diff --git a/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp b/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp index 8b8e99023eceb..8fb7191e7a89e 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "CIRGenCUDARuntime.h" +#include "CIRGenCXXABI.h" #include "CIRGenFunction.h" #include "CIRGenModule.h" #include "mlir/IR/Operation.h" @@ -64,6 +65,11 @@ class CIRGenNVCUDARuntime : public CIRGenCUDARuntime { void emitDeviceStub(CIRGenFunction &cgf, cir::FuncOp fn, FunctionArgList &args) override; + + void internalizeDeviceSideVar(const VarDecl *d, + cir::GlobalLinkageKind &linkage) override; + + std::string getDeviceSideName(const NamedDecl *nd) override; }; } // namespace @@ -342,3 +348,57 @@ mlir::Operation *CIRGenNVCUDARuntime::getKernelHandle(cir::FuncOp fn, return globalOp; } + +void CIRGenNVCUDARuntime::internalizeDeviceSideVar( + const VarDecl *d, cir::GlobalLinkageKind &linkage) { + if (cgm.getLangOpts().GPURelocatableDeviceCode) + cgm.errorNYI( + "internalizeDeviceSideVar: GPU Relocatable Deviced Code (RDC)"); + + // __shared__ variables are odd. Shadows do get created, but + // they are not registered with the CUDA runtime, so they + // can't really be used to access their device-side + // counterparts. It's not clear yet whether it's nvcc's bug or + // a feature, but we've got to do the same for compatibility. + if (d->hasAttr<CUDADeviceAttr>() || d->hasAttr<CUDAConstantAttr>() || + d->hasAttr<CUDASharedAttr>()) { + linkage = cir::GlobalLinkageKind::InternalLinkage; + } + + if (d->getType()->isCUDADeviceBuiltinSurfaceType() || + d->getType()->isCUDADeviceBuiltinTextureType()) + cgm.errorNYI("internalizeDeviceSideVar: CUDA Surface/Texture support"); +} + +std::string CIRGenNVCUDARuntime::getDeviceSideName(const NamedDecl *nd) { + GlobalDecl gd; + // nd could be either a kernel or a variable. + if (auto *fd = dyn_cast<FunctionDecl>(nd)) + gd = GlobalDecl(fd, KernelReferenceKind::Kernel); + else + gd = GlobalDecl(nd); + std::string deviceSideName; + MangleContext *mc; + if (cgm.getLangOpts().CUDAIsDevice) + mc = &cgm.getCXXABI().getMangleContext(); + else + mc = deviceMC.get(); + if (mc->shouldMangleDeclName(nd)) { + SmallString<256> buffer; + llvm::raw_svector_ostream out(buffer); + mc->mangleName(gd, out); + deviceSideName = std::string(out.str()); + } else + deviceSideName = std::string(nd->getIdentifier()->getName()); + + // Make unique name for device side static file-scope variable for HIP. + if (cgm.getASTContext().shouldExternalize(nd) && + cgm.getLangOpts().GPURelocatableDeviceCode) { + SmallString<256> buffer; + llvm::raw_svector_ostream out(buffer); + out << deviceSideName; + cgm.printPostfixForExternalizedDecl(out, nd); + deviceSideName = std::string(out.str()); + } + return deviceSideName; +} diff --git a/clang/lib/CIR/CodeGen/CIRGenCUDARuntime.cpp b/clang/lib/CIR/CodeGen/CIRGenCUDARuntime.cpp index 25d981ef2f64b..8898071a35c12 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCUDARuntime.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenCUDARuntime.cpp @@ -20,6 +20,25 @@ using namespace clang; using namespace CIRGen; +static std::unique_ptr<MangleContext> initDeviceMC(CIRGenModule &cgm) { + // If the host and device have different C++ ABIs, mark it as the device + // mangle context so that the mangling needs to retrieve the additional + // device lambda mangling number instead of the regular host one. + if (cgm.getASTContext().getAuxTargetInfo() && + cgm.getASTContext().getTargetInfo().getCXXABI().isMicrosoft() && + cgm.getASTContext().getAuxTargetInfo()->getCXXABI().isItaniumFamily()) { + return std::unique_ptr<MangleContext>( + cgm.getASTContext().createDeviceMangleContext( + *cgm.getASTContext().getAuxTargetInfo())); + } + + return std::unique_ptr<MangleContext>(cgm.getASTContext().createMangleContext( + cgm.getASTContext().getAuxTargetInfo())); +} + +CIRGenCUDARuntime::CIRGenCUDARuntime(CIRGenModule &cgm) + : cgm(cgm), deviceMC(initDeviceMC(cgm)) {} + CIRGenCUDARuntime::~CIRGenCUDARuntime() {} RValue CIRGenCUDARuntime::emitCUDAKernelCallExpr(CIRGenFunction &cgf, diff --git a/clang/lib/CIR/CodeGen/CIRGenCUDARuntime.h b/clang/lib/CIR/CodeGen/CIRGenCUDARuntime.h index ba33602511e3b..39b6571849f29 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCUDARuntime.h +++ b/clang/lib/CIR/CodeGen/CIRGenCUDARuntime.h @@ -33,8 +33,11 @@ class CIRGenCUDARuntime { protected: CIRGenModule &cgm; + /// Mangle context for device. + std::unique_ptr<MangleContext> deviceMC; + public: - CIRGenCUDARuntime(CIRGenModule &cgm) : cgm(cgm) {} + CIRGenCUDARuntime(CIRGenModule &cgm); virtual ~CIRGenCUDARuntime(); virtual void emitDeviceStub(CIRGenFunction &cgf, cir::FuncOp fn, @@ -47,6 +50,14 @@ class CIRGenCUDARuntime { virtual mlir::Operation *getKernelHandle(cir::FuncOp fn, GlobalDecl gd) = 0; virtual mlir::Operation *getKernelStub(mlir::Operation *handle) = 0; + + /// Adjust linkage of shadow variables in host compilation + virtual void internalizeDeviceSideVar(const VarDecl *d, + cir::GlobalLinkageKind &linkage) = 0; + + /// Returns function or variable name on device side even if the current + /// compilation is for host. + virtual std::string getDeviceSideName(const NamedDecl *nd) = 0; }; CIRGenCUDARuntime *createNVCUDARuntime(CIRGenModule &cgm); diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index 2bc33c191bb32..88c4ac5d27115 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -434,6 +434,26 @@ bool CIRGenModule::shouldEmitCUDAGlobalVar(const VarDecl *global) const { global->getType()->isCUDADeviceBuiltinTextureType(); } +void CIRGenModule::printPostfixForExternalizedDecl(llvm::raw_ostream &os, + const Decl *d) { + // ptxas does not allow '.' in symbol names. On the other hand, HIP prefers + // postfix beginning with '.' since the symbol name can be demangled. + if (langOpts.HIP) + os << (isa<VarDecl>(d) ? ".static." : ".intern."); + else + os << (isa<VarDecl>(d) ? "__static__" : "__intern__"); + + // If the CUID is not specified we try to generate a unique postfix. + if (getLangOpts().CUID.empty()) { + // TODO: Once we add 'PreprocessorOpts' into CIRGenModule this part can be + // brought in from OG. + errorNYI(d->getSourceRange(), + "printPostfixForExternalizedDecl: CUID is not specified"); + } else { + os << getASTContext().getCUIDHash(); + } +} + void CIRGenModule::emitGlobal(clang::GlobalDecl gd) { if (const auto *cd = dyn_cast<clang::OpenACCConstructDecl>(gd.getDecl())) { emitGlobalOpenACCDecl(cd); @@ -1233,15 +1253,40 @@ void CIRGenModule::emitGlobalVarDefinition(const clang::VarDecl *vd, cir::CUDAExternallyInitializedAttr::get(&getMLIRContext())); } } else { - // TODO(cir): // Adjust linkage of shadow variables in host compilation - // getCUDARuntime().internalizeDeviceSideVar(vd, linkage); + getCUDARuntime().internalizeDeviceSideVar(vd, linkage); } // TODO(cir): // Handle variable registration // getCUDARuntime().handleVarRegistration(vd, gv); } + // Decorate CUDA shadow variables with the cu.shadow_name attribute so we know + // how to register them when lowering. + if (langOpts.CUDA && !langOpts.CUDAIsDevice && + (vd->hasAttr<CUDAConstantAttr>() || vd->hasAttr<CUDADeviceAttr>())) { + // Shadow variables and their properties must be registered with CUDA + // runtime. Skip Extern global variables, which will be registered in + // the TU where they are defined. + // + // Don't register a C++17 inline variable. The local symbol can be + // discarded and referencing a discarded local symbol from outside the + // comdat (__cuda_register_globals) is disallowed by the ELF spec. + // + // HIP managed variables need to be always recorded in device and host + // compilations for transformation. + // + // HIP managed variables and variables in CUDADeviceVarODRUsedByHost are + // added to llvm.compiler-used, therefore they are safe to be registered. + if ((!vd->hasExternalStorage() && !vd->isInline()) || + getASTContext().CUDADeviceVarODRUsedByHost.contains(vd) || + vd->hasAttr<HIPManagedAttr>()) { + auto shadowName = cudaRuntime->getDeviceSideName(cast<NamedDecl>(vd)); + auto attr = cir::CUDAShadowNameAttr::get(&getMLIRContext(), shadowName); + gv->setAttr(cir::CUDAShadowNameAttr::getMnemonic(), attr); + } + } + // Set initializer and finalize emission CIRGenModule::setInitializer(gv, init); if (emitter) diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h index 266510de84fd0..388b78f2a75e6 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.h +++ b/clang/lib/CIR/CodeGen/CIRGenModule.h @@ -632,6 +632,13 @@ class CIRGenModule : public CIRGenTypeCache { // related attributes. bool shouldEmitCUDAGlobalVar(const VarDecl *global) const; + /// Print the postfix for externalized static variable or kernels for single + /// source offloading languages CUDA and HIP. The unique postfix is created + /// using either the CUID argument, or the file's UniqueID and active macros. + /// The fallback method without a CUID requires that the offloading toolchain + /// does not define separate macros via the -cc1 options. + void printPostfixForExternalizedDecl(llvm::raw_ostream &os, const Decl *d); + /// Replace all uses of the old global with the new global, updating types /// and references as needed. Erases the old global when done. void replaceGlobal(cir::GlobalOp oldGV, cir::GlobalOp newGV); diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index ba89fbe3091bc..6881fef5d9f49 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -25,6 +25,7 @@ #include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/BuiltinDialect.h" #include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/Location.h" #include "mlir/IR/Types.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" @@ -390,7 +391,7 @@ class CIRAttrToValue { .Case<cir::BoolAttr, cir::IntAttr, cir::FPAttr, cir::ConstComplexAttr, cir::ConstArrayAttr, cir::ConstRecordAttr, cir::ConstVectorAttr, cir::ConstPtrAttr, cir::GlobalViewAttr, cir::TypeInfoAttr, - cir::UndefAttr, cir::VTableAttr, cir::ZeroAttr>( + cir::UndefAttr, cir::PoisonAttr, cir::VTableAttr, cir::ZeroAttr>( [&](auto attrT) { return visitCirAttr(attrT); }) .Default([&](auto attrT) { return mlir::Value(); }); } @@ -406,6 +407,7 @@ class CIRAttrToValue { mlir::Value visitCirAttr(cir::GlobalViewAttr attr); mlir::Value visitCirAttr(cir::TypeInfoAttr attr); mlir::Value visitCirAttr(cir::UndefAttr attr); + mlir::Value visitCirAttr(cir::PoisonAttr attr); mlir::Value visitCirAttr(cir::VTableAttr attr); mlir::Value visitCirAttr(cir::ZeroAttr attr); @@ -767,6 +769,13 @@ mlir::Value CIRAttrToValue::visitCirAttr(cir::UndefAttr undefAttr) { rewriter, loc, converter->convertType(undefAttr.getType())); } +/// PoisonAttr visitor. +mlir::Value CIRAttrToValue::visitCirAttr(cir::PoisonAttr poisonAttr) { + mlir::Location loc = parentOp->getLoc(); + return mlir::LLVM::PoisonOp::create( + rewriter, loc, converter->convertType(poisonAttr.getType())); +} + // VTableAttr visitor. mlir::Value CIRAttrToValue::visitCirAttr(cir::VTableAttr vtableArr) { mlir::Type llvmTy = converter->convertType(vtableArr.getType()); @@ -2603,11 +2612,10 @@ CIRToLLVMGlobalOpLowering::matchAndRewriteRegionInitializedGlobal( cir::GlobalOp op, mlir::Attribute init, mlir::ConversionPatternRewriter &rewriter) const { // TODO: Generalize this handling when more types are needed here. - assert( - (isa<cir::ConstArrayAttr, cir::ConstRecordAttr, cir::ConstVectorAttr, - cir::ConstPtrAttr, cir::ConstComplexAttr, cir::GlobalViewAttr, - cir::TypeInfoAttr, cir::UndefAttr, cir::VTableAttr, cir::ZeroAttr>( - init))); + assert((isa<cir::ConstArrayAttr, cir::ConstRecordAttr, cir::ConstVectorAttr, + cir::ConstPtrAttr, cir::ConstComplexAttr, cir::GlobalViewAttr, + cir::TypeInfoAttr, cir::UndefAttr, cir::PoisonAttr, + cir::VTableAttr, cir::ZeroAttr>(init))); // TODO(cir): once LLVM's dialect has proper equivalent attributes this // should be updated. For now, we use a custom op to initialize globals @@ -2674,8 +2682,8 @@ mlir::LogicalResult CIRToLLVMGlobalOpLowering::matchAndRewrite( } else if (mlir::isa<cir::ConstArrayAttr, cir::ConstVectorAttr, cir::ConstRecordAttr, cir::ConstPtrAttr, cir::ConstComplexAttr, cir::GlobalViewAttr, - cir::TypeInfoAttr, cir::UndefAttr, cir::VTableAttr, - cir::ZeroAttr>(init.value())) { + cir::TypeInfoAttr, cir::UndefAttr, cir::PoisonAttr, + cir::VTableAttr, cir::ZeroAttr>(init.value())) { // TODO(cir): once LLVM's dialect has proper equivalent attributes this // should be updated. For now, we use a custom op to initialize globals // to the appropriate value. diff --git a/clang/test/CIR/CodeGenCUDA/address-spaces.cu b/clang/test/CIR/CodeGenCUDA/address-spaces.cu index 1ed52378b99ac..65fa86ac4790c 100644 --- a/clang/test/CIR/CodeGenCUDA/address-spaces.cu +++ b/clang/test/CIR/CodeGenCUDA/address-spaces.cu @@ -15,20 +15,49 @@ // RUN: -I%S/Inputs/ %s -o %t.ll // RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s -// CIR-DEVICE: cir.global "private" internal dso_local @_ZZ2fnvE1j = #cir.undef : !s32i {alignment = 4 : i64} -// LLVM-DEVICE: @_ZZ2fnvE1j = internal global i32 undef, align 4 +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \ +// RUN: -x cuda -emit-cir -target-sdk-version=12.3 \ +// RUN: %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \ +// RUN: -x cuda -emit-llvm -target-sdk-version=12.3 \ +// RUN: %s -o %t.cir +// RUN: FileCheck --check-prefix=LLVM-HOST --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \ +// RUN: -x cuda -emit-llvm -target-sdk-version=12.3 \ +// RUN: %s -o %t.cir +// RUN: FileCheck --check-prefix=OGCG-HOST --input-file=%t.cir %s + +// CIR-DEVICE: cir.global {{.*}} @_ZZ2fnvE1j = #cir.undef +// LLVM-DEVICE: @_ZZ2fnvE1j = internal global i32 undef __device__ int a; -// CIR-DEVICE: cir.global external lang_address_space(offload_global) @[[DEV:.*]] = #cir.int<0> : !s32i {alignment = 4 : i64, cu.externally_initialized = #cir.cu.externally_initialized} -// LLVM-DEVICE: @[[DEV_LD:.*]] = externally_initialized global i32 0, align 4 -// OGCG-DEVICE: @[[DEV_OD:.*]] = addrspace(1) externally_initialized global i32 0, align 4 +// CIR-DEVICE: cir.global external lang_address_space(offload_global) @a = #cir.int<0> : !s32i {{{.*}}, cu.externally_initialized = #cir.cu.externally_initialized} +// LLVM-DEVICE: @a = externally_initialized global i32 0 +// OGCG-DEVICE: @a = addrspace(1) externally_initialized global i32 0 +// CIR-HOST: cir.global {{.*}} @a = #cir.poison : !s32i {{{.*}}, cu.shadow_name = #cir.cu.shadow_name<a>} +// LLVM-HOST: @a = internal global i32 poison +// OGCG-HOST: @a = internal global i32 undef + +__shared__ int b; +// CIR-DEVICE: cir.global external lang_address_space(offload_local) @b = #cir.poison {{.*}} +// LLVM-DEVICE: @b = global i32 poison +// OGCG-DEVICE: @b = addrspace(3) global i32 undef +// CIR-HOST: cir.global {{.*}} @b = #cir.poison +// LLVM-HOST: @b = internal global i32 poison +// OGCG-HOST: @b = internal global i32 undef __constant__ int c; -// CIR-DEVICE: cir.global constant external lang_address_space(offload_constant) @[[CONST:.*]] = #cir.int<0> : !s32i {alignment = 4 : i64, cu.externally_initialized = #cir.cu.externally_initialized} -// LLVM-DEVICE: @[[CONST_LL:.*]] = externally_initialized constant i32 0, align 4 -// OGCG-DEVICE: @[[CONST_OD:.*]] = addrspace(4) externally_initialized constant i32 0, align 4 +// CIR-DEVICE: cir.global constant external lang_address_space(offload_constant) @c = #cir.int<0> : !s32i {{{.*}}, cu.externally_initialized = #cir.cu.externally_initialized} +// LLVM-DEVICE: @c = externally_initialized constant i32 0 +// OGCG-DEVICE: @c = addrspace(4) externally_initialized constant i32 0 +// CIR-HOST: cir.global {{.*}} @c = #cir.poison : !s32i {{{.*}}, cu.shadow_name = #cir.cu.shadow_name<c>} +// LLVM-HOST: @c = internal global i32 poison +// OGCG-HOST: @c = internal global i32 undef -// OGCG-DEVICE: @_ZZ2fnvE1j = internal addrspace(3) global i32 undef, align 4 +// OGCG-DEVICE: @_ZZ2fnvE1j = internal addrspace(3) global i32 undef __global__ void fn() { int i = 0; @@ -46,16 +75,16 @@ __global__ void fn() { // CIR-DEVICE: cir.return // LLVM-DEVICE: define dso_local void @_Z2fnv() -// LLVM-DEVICE: %[[ALLOCA:.*]] = alloca i32, i64 1, align 4 -// LLVM-DEVICE: store i32 0, ptr %[[ALLOCA]], align 4 -// LLVM-DEVICE: %[[VAL:.*]] = load i32, ptr %[[ALLOCA]], align 4 -// LLVM-DEVICE: store i32 %[[VAL]], ptr @_ZZ2fnvE1j, align 4 +// LLVM-DEVICE: %[[ALLOCA:.*]] = alloca i32, i64 1 +// LLVM-DEVICE: store i32 0, ptr %[[ALLOCA]] +// LLVM-DEVICE: %[[VAL:.*]] = load i32, ptr %[[ALLOCA]] +// LLVM-DEVICE: store i32 %[[VAL]], ptr @_ZZ2fnvE1j // LLVM-DEVICE: ret void // OGCG-DEVICE: define dso_local ptx_kernel void @_Z2fnv() // OGCG-DEVICE: entry: -// OGCG-DEVICE: %[[ALLOCA:.*]] = alloca i32, align 4 -// OGCG-DEVICE: store i32 0, ptr %[[ALLOCA]], align 4 -// OGCG-DEVICE: %[[VAL:.*]] = load i32, ptr %[[ALLOCA]], align 4 -// OGCG-DEVICE: store i32 %[[VAL]], ptr addrspacecast (ptr addrspace(3) @_ZZ2fnvE1j to ptr), align 4 +// OGCG-DEVICE: %[[ALLOCA:.*]] = alloca i32 +// OGCG-DEVICE: store i32 0, ptr %[[ALLOCA]] +// OGCG-DEVICE: %[[VAL:.*]] = load i32, ptr %[[ALLOCA]] +// OGCG-DEVICE: store i32 %[[VAL]], ptr addrspacecast (ptr addrspace(3) @_ZZ2fnvE1j to ptr) // OGCG-DEVICE: ret void _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
