Author: abataev Date: Mon Apr 16 13:16:21 2018 New Revision: 330154 URL: http://llvm.org/viewvc/llvm-project?rev=330154&view=rev Log: [OPENMP] General code improvements.
Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h cfe/trunk/test/OpenMP/nvptx_teams_reduction_codegen.cpp Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp?rev=330154&r1=330153&r2=330154&view=diff ============================================================================== --- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp (original) +++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp Mon Apr 16 13:16:21 2018 @@ -91,11 +91,11 @@ enum OpenMPRTLFunctionNVPTX { /// Pre(post)-action for different OpenMP constructs specialized for NVPTX. class NVPTXActionTy final : public PrePostActionTy { - llvm::Value *EnterCallee; + llvm::Value *EnterCallee = nullptr; ArrayRef<llvm::Value *> EnterArgs; - llvm::Value *ExitCallee; + llvm::Value *ExitCallee = nullptr; ArrayRef<llvm::Value *> ExitArgs; - bool Conditional; + bool Conditional = false; llvm::BasicBlock *ContBlock = nullptr; public: @@ -179,7 +179,7 @@ class CheckVarsEscapingDeclContext final static llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> isDeclareTargetDeclaration(const ValueDecl *VD) { - for (const auto *D : VD->redecls()) { + for (const Decl *D : VD->redecls()) { if (!D->hasAttrs()) continue; if (const auto *Attr = D->getAttr<OMPDeclareTargetDeclAttr>()) @@ -233,7 +233,7 @@ class CheckVarsEscapingDeclContext final void VisitOpenMPCapturedStmt(const CapturedStmt *S) { if (!S) return; - for (const auto &C : S->captures()) { + for (const CapturedStmt::Capture &C : S->captures()) { if (C.capturesVariable() && !C.capturesVariableByCopy()) { const ValueDecl *VD = C.getCapturedVar(); markAsEscaped(VD); @@ -255,7 +255,7 @@ class CheckVarsEscapingDeclContext final return; ASTContext &C = CGF.getContext(); SmallVector<VarsDataTy, 4> GlobalizedVars; - for (const auto *D : EscapedDecls) + for (const ValueDecl *D : EscapedDecls) GlobalizedVars.emplace_back(C.getDeclAlign(D), D); std::stable_sort(GlobalizedVars.begin(), GlobalizedVars.end(), stable_sort_comparator); @@ -296,7 +296,7 @@ public: void VisitDeclStmt(const DeclStmt *S) { if (!S) return; - for (const auto *D : S->decls()) + for (const Decl *D : S->decls()) if (const auto *VD = dyn_cast_or_null<ValueDecl>(D)) VisitValueDecl(VD); } @@ -312,7 +312,7 @@ public: void VisitCapturedStmt(const CapturedStmt *S) { if (!S) return; - for (const auto &C : S->captures()) { + for (const CapturedStmt::Capture &C : S->captures()) { if (C.capturesVariable() && !C.capturesVariableByCopy()) { const ValueDecl *VD = C.getCapturedVar(); markAsEscaped(VD); @@ -324,7 +324,7 @@ public: void VisitLambdaExpr(const LambdaExpr *E) { if (!E) return; - for (const auto &C : E->captures()) { + for (const LambdaCapture &C : E->captures()) { if (C.capturesVariable()) { if (C.getCaptureKind() == LCK_ByRef) { const ValueDecl *VD = C.getCapturedVar(); @@ -338,7 +338,7 @@ public: void VisitBlockExpr(const BlockExpr *E) { if (!E) return; - for (const auto &C : E->getBlockDecl()->captures()) { + for (const BlockDecl::Capture &C : E->getBlockDecl()->captures()) { if (C.isByRef()) { const VarDecl *VD = C.getVariable(); markAsEscaped(VD); @@ -358,8 +358,9 @@ public: AllEscaped = true; Visit(Arg); AllEscaped = SavedAllEscaped; - } else + } else { Visit(Arg); + } } Visit(E->getCallee()); } @@ -383,8 +384,9 @@ public: AllEscaped = true; Visit(E->getSubExpr()); AllEscaped = SavedAllEscaped; - } else + } else { Visit(E->getSubExpr()); + } } void VisitImplicitCastExpr(const ImplicitCastExpr *E) { if (!E) @@ -394,8 +396,9 @@ public: AllEscaped = true; Visit(E->getSubExpr()); AllEscaped = SavedAllEscaped; - } else + } else { Visit(E->getSubExpr()); + } } void VisitExpr(const Expr *E) { if (!E) @@ -403,7 +406,7 @@ public: bool SavedAllEscaped = AllEscaped; if (!E->isLValue()) AllEscaped = false; - for (const auto *Child : E->children()) + for (const Stmt *Child : E->children()) if (Child) Visit(Child); AllEscaped = SavedAllEscaped; @@ -411,7 +414,7 @@ public: void VisitStmt(const Stmt *S) { if (!S) return; - for (const auto *Child : S->children()) + for (const Stmt *Child : S->children()) if (Child) Visit(Child); } @@ -553,19 +556,19 @@ static llvm::Value *getMasterThreadID(Co CGOpenMPRuntimeNVPTX::WorkerFunctionState::WorkerFunctionState( CodeGenModule &CGM, SourceLocation Loc) - : WorkerFn(nullptr), CGFI(nullptr), Loc(Loc) { + : WorkerFn(nullptr), CGFI(CGM.getTypes().arrangeNullaryFunction()), + Loc(Loc) { createWorkerFunction(CGM); } void CGOpenMPRuntimeNVPTX::WorkerFunctionState::createWorkerFunction( CodeGenModule &CGM) { // Create an worker function with no arguments. - CGFI = &CGM.getTypes().arrangeNullaryFunction(); WorkerFn = llvm::Function::Create( - CGM.getTypes().GetFunctionType(*CGFI), llvm::GlobalValue::InternalLinkage, + CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, /*placeholder=*/"_worker", &CGM.getModule()); - CGM.SetInternalFunctionAttributes(GlobalDecl(), WorkerFn, *CGFI); + CGM.SetInternalFunctionAttributes(GlobalDecl(), WorkerFn, CGFI); WorkerFn->setDoesNotRecurse(); } @@ -617,7 +620,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericKe // Now change the name of the worker function to correspond to this target // region's entry function. - WST.WorkerFn->setName(OutlinedFn->getName() + "_worker"); + WST.WorkerFn->setName(Twine(OutlinedFn->getName(), "_worker")); // Create the worker function emitWorkerFunction(WST); @@ -634,7 +637,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericEn llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master"); EST.ExitBB = CGF.createBasicBlock(".exit"); - auto *IsWorker = + llvm::Value *IsWorker = Bld.CreateICmpULT(getNVPTXThreadID(CGF), getThreadLimit(CGF)); Bld.CreateCondBr(IsWorker, WorkerBB, MasterCheckBB); @@ -643,7 +646,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericEn CGF.EmitBranch(EST.ExitBB); CGF.EmitBlock(MasterCheckBB); - auto *IsMaster = + llvm::Value *IsMaster = Bld.CreateICmpEQ(getNVPTXThreadID(CGF), getMasterThreadID(CGF)); Bld.CreateCondBr(IsMaster, MasterBB, EST.ExitBB); @@ -728,7 +731,7 @@ void CGOpenMPRuntimeNVPTX::emitSpmdKerne void CGOpenMPRuntimeNVPTX::emitSpmdEntryHeader( CodeGenFunction &CGF, EntryFunctionState &EST, const OMPExecutableDirective &D) { - auto &Bld = CGF.Builder; + CGBuilderTy &Bld = CGF.Builder; // Setup BBs in entry function. llvm::BasicBlock *ExecuteBB = CGF.createBasicBlock(".execute"); @@ -773,17 +776,18 @@ void CGOpenMPRuntimeNVPTX::emitSpmdEntry // warps participate in parallel work. static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name, CGOpenMPRuntimeNVPTX::ExecutionMode Mode) { - (void)new llvm::GlobalVariable( + auto *GVMode = new llvm::GlobalVariable( CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::WeakAnyLinkage, - llvm::ConstantInt::get(CGM.Int8Ty, Mode), Name + Twine("_exec_mode")); + llvm::ConstantInt::get(CGM.Int8Ty, Mode), Twine(Name, "_exec_mode")); + CGM.addCompilerUsedGlobal(GVMode); } void CGOpenMPRuntimeNVPTX::emitWorkerFunction(WorkerFunctionState &WST) { ASTContext &Ctx = CGM.getContext(); CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); - CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, WST.WorkerFn, *WST.CGFI, {}, + CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, WST.WorkerFn, WST.CGFI, {}, WST.Loc, WST.Loc); emitWorkerLoop(CGF, WST); CGF.FinishFunction(); @@ -850,9 +854,9 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoo CGF.EmitBlock(ExecuteBB); // Process work items: outlined parallel functions. - for (auto *W : Work) { + for (llvm::Function *W : Work) { // Try to match this outlined function. - auto *ID = Bld.CreatePointerBitCastOrAddrSpaceCast(W, CGM.Int8PtrTy); + llvm::Value *ID = Bld.CreatePointerBitCastOrAddrSpaceCast(W, CGM.Int8PtrTy); llvm::Value *WorkFnMatch = Bld.CreateICmpEQ(Bld.CreateLoad(WorkFn), ID, "work_match"); @@ -906,7 +910,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntime // Build void __kmpc_kernel_init(kmp_int32 thread_limit, int16_t // RequiresOMPRuntime); llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_init"); break; @@ -914,7 +918,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntime case OMPRTL_NVPTX__kmpc_kernel_deinit: { // Build void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized); llvm::Type *TypeParams[] = {CGM.Int16Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_deinit"); break; @@ -923,14 +927,14 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntime // Build void __kmpc_spmd_kernel_init(kmp_int32 thread_limit, // int16_t RequiresOMPRuntime, int16_t RequiresDataSharing); llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_init"); break; } case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit: { // Build void __kmpc_spmd_kernel_deinit(); - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_deinit"); break; @@ -939,7 +943,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntime /// Build void __kmpc_kernel_prepare_parallel( /// void *outlined_function, int16_t IsOMPRuntimeInitialized); llvm::Type *TypeParams[] = {CGM.Int8PtrTy, CGM.Int16Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_prepare_parallel"); break; @@ -949,14 +953,14 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntime /// int16_t IsOMPRuntimeInitialized); llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy, CGM.Int16Ty}; llvm::Type *RetTy = CGM.getTypes().ConvertType(CGM.getContext().BoolTy); - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(RetTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_parallel"); break; } case OMPRTL_NVPTX__kmpc_kernel_end_parallel: { /// Build void __kmpc_kernel_end_parallel(); - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_end_parallel"); break; @@ -965,7 +969,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntime // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 // global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); break; @@ -974,7 +978,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntime // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 // global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); break; @@ -983,7 +987,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntime // Build int32_t __kmpc_shuffle_int32(int32_t element, // int16_t lane_offset, int16_t warp_size); llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int32"); break; @@ -992,7 +996,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntime // Build int64_t __kmpc_shuffle_int64(int64_t element, // int16_t lane_offset, int16_t warp_size); llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int16Ty, CGM.Int16Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int64"); break; @@ -1018,7 +1022,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntime CGM.VoidPtrTy, ShuffleReduceFnTy->getPointerTo(), InterWarpCopyFnTy->getPointerTo()}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction( FnTy, /*Name=*/"__kmpc_nvptx_parallel_reduce_nowait"); @@ -1061,7 +1065,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntime InterWarpCopyFnTy->getPointerTo(), CopyToScratchpadFnTy->getPointerTo(), LoadReduceFnTy->getPointerTo()}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction( FnTy, /*Name=*/"__kmpc_nvptx_teams_reduce_nowait"); @@ -1070,7 +1074,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntime case OMPRTL_NVPTX__kmpc_end_reduce_nowait: { // Build __kmpc_end_reduce_nowait(kmp_int32 global_tid); llvm::Type *TypeParams[] = {CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction( FnTy, /*Name=*/"__kmpc_nvptx_end_reduce_nowait"); @@ -1078,7 +1082,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntime } case OMPRTL_NVPTX__kmpc_data_sharing_init_stack: { /// Build void __kmpc_data_sharing_init_stack(); - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack"); break; @@ -1087,7 +1091,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntime // Build void *__kmpc_data_sharing_push_stack(size_t size, // int16_t UseSharedMemory); llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction( FnTy, /*Name=*/"__kmpc_data_sharing_push_stack"); @@ -1096,7 +1100,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntime case OMPRTL_NVPTX__kmpc_data_sharing_pop_stack: { // Build void __kmpc_data_sharing_pop_stack(void *a); llvm::Type *TypeParams[] = {CGM.VoidPtrTy}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_data_sharing_pop_stack"); @@ -1106,14 +1110,14 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntime /// Build void __kmpc_begin_sharing_variables(void ***args, /// size_t n_args); llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy->getPointerTo(), CGM.SizeTy}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_begin_sharing_variables"); break; } case OMPRTL_NVPTX__kmpc_end_sharing_variables: { /// Build void __kmpc_end_sharing_variables(); - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_sharing_variables"); break; @@ -1121,7 +1125,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntime case OMPRTL_NVPTX__kmpc_get_shared_variables: { /// Build void __kmpc_get_shared_variables(void ***GlobalArgs); llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy->getPointerTo()}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_shared_variables"); break; @@ -1134,19 +1138,18 @@ void CGOpenMPRuntimeNVPTX::createOffload llvm::Constant *Addr, uint64_t Size, int32_t, llvm::GlobalValue::LinkageTypes) { - auto *F = dyn_cast<llvm::Function>(Addr); // TODO: Add support for global variables on the device after declare target // support. - if (!F) + if (!isa<llvm::Function>(Addr)) return; - llvm::Module *M = F->getParent(); - llvm::LLVMContext &Ctx = M->getContext(); + llvm::Module &M = CGM.getModule(); + llvm::LLVMContext &Ctx = CGM.getLLVMContext(); // Get "nvvm.annotations" metadata node - llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations"); + llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations"); llvm::Metadata *MDVals[] = { - llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx, "kernel"), + llvm::ConstantAsMetadata::get(Addr), llvm::MDString::get(Ctx, "kernel"), llvm::ConstantAsMetadata::get( llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))}; // Append metadata to nvvm.annotations @@ -1336,7 +1339,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa // handle the specifics of the allocation of the memory. // Use actual memory size of the record including the padding // for alignment purposes. - auto &Bld = CGF.Builder; + CGBuilderTy &Bld = CGF.Builder; llvm::Value *Size = CGF.getTypeSize(VD->getType()); CharUnits Align = CGM.getContext().getDeclAlign(VD); Size = Bld.CreateNUWAdd( @@ -1496,8 +1499,8 @@ void CGOpenMPRuntimeNVPTX::emitGenericPa Work.emplace_back(WFn); }; - auto *RTLoc = emitUpdateLocation(CGF, Loc); - auto *ThreadID = getThreadID(CGF, Loc); + llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); + llvm::Value *ThreadID = getThreadID(CGF, Loc); llvm::Value *Args[] = {RTLoc, ThreadID}; auto &&SeqGen = [this, Fn, CapturedVars, &Args, Loc](CodeGenFunction &CGF, @@ -1528,9 +1531,9 @@ void CGOpenMPRuntimeNVPTX::emitGenericPa RCG(CGF); }; - if (IfCond) + if (IfCond) { emitOMPIfClause(CGF, IfCond, L0ParallelGen, SeqGen); - else { + } else { CodeGenFunction::RunCleanupsScope Scope(CGF); RegionCodeGenTy ThenRCG(L0ParallelGen); ThenRCG(CGF); @@ -1588,8 +1591,8 @@ static llvm::Value *createRuntimeShuffle QualType ElemType, llvm::Value *Offset, SourceLocation Loc) { - auto &CGM = CGF.CGM; - auto &Bld = CGF.Builder; + CodeGenModule &CGM = CGF.CGM; + CGBuilderTy &Bld = CGF.Builder; CGOpenMPRuntimeNVPTX &RT = *(static_cast<CGOpenMPRuntimeNVPTX *>(&CGM.getOpenMPRuntime())); @@ -1605,12 +1608,11 @@ static llvm::Value *createRuntimeShuffle QualType CastTy = CGF.getContext().getIntTypeForBitwidth( Size.getQuantity() <= 4 ? 32 : 64, /*Signed=*/1); llvm::Value *ElemCast = castValueToType(CGF, Elem, ElemType, CastTy, Loc); - auto *WarpSize = + llvm::Value *WarpSize = Bld.CreateIntCast(getNVPTXWarpSize(CGF), CGM.Int16Ty, /*isSigned=*/true); - auto *ShuffledVal = - CGF.EmitRuntimeCall(RT.createNVPTXRuntimeFunction(ShuffleFn), - {ElemCast, Offset, WarpSize}); + llvm::Value *ShuffledVal = CGF.EmitRuntimeCall( + RT.createNVPTXRuntimeFunction(ShuffleFn), {ElemCast, Offset, WarpSize}); return castValueToType(CGF, ShuffledVal, CastTy, ElemType, Loc); } @@ -1643,19 +1645,19 @@ static void emitReductionListCopy( ArrayRef<const Expr *> Privates, Address SrcBase, Address DestBase, CopyOptionsTy CopyOptions = {nullptr, nullptr, nullptr}) { - auto &CGM = CGF.CGM; - auto &C = CGM.getContext(); - auto &Bld = CGF.Builder; - - auto *RemoteLaneOffset = CopyOptions.RemoteLaneOffset; - auto *ScratchpadIndex = CopyOptions.ScratchpadIndex; - auto *ScratchpadWidth = CopyOptions.ScratchpadWidth; + CodeGenModule &CGM = CGF.CGM; + ASTContext &C = CGM.getContext(); + CGBuilderTy &Bld = CGF.Builder; + + llvm::Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset; + llvm::Value *ScratchpadIndex = CopyOptions.ScratchpadIndex; + llvm::Value *ScratchpadWidth = CopyOptions.ScratchpadWidth; // Iterates, element-by-element, through the source Reduce list and // make a copy. unsigned Idx = 0; unsigned Size = Privates.size(); - for (auto &Private : Privates) { + for (const Expr *Private : Privates) { Address SrcElementAddr = Address::invalid(); Address DestElementAddr = Address::invalid(); Address DestElementPtrAddr = Address::invalid(); @@ -1716,9 +1718,9 @@ static void emitReductionListCopy( // Step 1.2: Get the address for dest element: // address = base + index * ElementSizeInChars. llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType()); - auto *CurrentOffset = + llvm::Value *CurrentOffset = Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex); - auto *ScratchPadElemAbsolutePtrVal = + llvm::Value *ScratchPadElemAbsolutePtrVal = Bld.CreateNUWAdd(DestBase.getPointer(), CurrentOffset); ScratchPadElemAbsolutePtrVal = Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy); @@ -1731,9 +1733,9 @@ static void emitReductionListCopy( // Step 1.1: Get the address for the src element in the scratchpad. // address = base + index * ElementSizeInChars. llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType()); - auto *CurrentOffset = + llvm::Value *CurrentOffset = Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex); - auto *ScratchPadElemAbsolutePtrVal = + llvm::Value *ScratchPadElemAbsolutePtrVal = Bld.CreateNUWAdd(SrcBase.getPointer(), CurrentOffset); ScratchPadElemAbsolutePtrVal = Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy); @@ -1816,7 +1818,7 @@ static void emitReductionListCopy( SrcBase = Address(ScratchpadBasePtr, CGF.getPointerAlign()); } - Idx++; + ++Idx; } } @@ -1834,8 +1836,8 @@ static void emitReductionListCopy( static llvm::Value *emitReduceScratchpadFunction( CodeGenModule &CGM, ArrayRef<const Expr *> Privates, QualType ReductionArrayTy, llvm::Value *ReduceFn, SourceLocation Loc) { - auto &C = CGM.getContext(); - auto Int32Ty = C.getIntTypeForBitwidth(32, /* Signed */ true); + ASTContext &C = CGM.getContext(); + QualType Int32Ty = C.getIntTypeForBitwidth(32, /*Signed=*/1); // Destination of the copy. ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, @@ -1864,7 +1866,8 @@ static llvm::Value *emitReduceScratchpad Args.push_back(&WidthArg); Args.push_back(&ShouldReduceArg); - auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + const CGFunctionInfo &CGFI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); auto *Fn = llvm::Function::Create( CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, "_omp_reduction_load_and_reduce", &CGM.getModule()); @@ -1873,7 +1876,7 @@ static llvm::Value *emitReduceScratchpad CodeGenFunction CGF(CGM); CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); - auto &Bld = CGF.Builder; + CGBuilderTy &Bld = CGF.Builder; // Get local Reduce list pointer. Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg); @@ -1923,7 +1926,7 @@ static llvm::Value *emitReduceScratchpad llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else"); llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont"); - auto CondReduce = Bld.CreateICmpEQ(ShouldReduceVal, Bld.getInt32(1)); + llvm::Value *CondReduce = Bld.CreateIsNotNull(ShouldReduceVal); Bld.CreateCondBr(CondReduce, ThenBB, ElseBB); CGF.EmitBlock(ThenBB); @@ -1961,8 +1964,8 @@ static llvm::Value *emitCopyToScratchpad QualType ReductionArrayTy, SourceLocation Loc) { - auto &C = CGM.getContext(); - auto Int32Ty = C.getIntTypeForBitwidth(32, /* Signed */ true); + ASTContext &C = CGM.getContext(); + QualType Int32Ty = C.getIntTypeForBitwidth(32, /*Signed=*/1); // Source of the copy. ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, @@ -1986,7 +1989,8 @@ static llvm::Value *emitCopyToScratchpad Args.push_back(&IndexArg); Args.push_back(&WidthArg); - auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + const CGFunctionInfo &CGFI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); auto *Fn = llvm::Function::Create( CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, "_omp_reduction_copy_to_scratchpad", &CGM.getModule()); @@ -1995,7 +1999,7 @@ static llvm::Value *emitCopyToScratchpad CodeGenFunction CGF(CGM); CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); - auto &Bld = CGF.Builder; + CGBuilderTy &Bld = CGF.Builder; Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg); Address SrcDataAddr( @@ -2051,8 +2055,8 @@ static llvm::Value *emitInterWarpCopyFun ArrayRef<const Expr *> Privates, QualType ReductionArrayTy, SourceLocation Loc) { - auto &C = CGM.getContext(); - auto &M = CGM.getModule(); + ASTContext &C = CGM.getContext(); + llvm::Module &M = CGM.getModule(); // ReduceList: thread local Reduce list. // At the stage of the computation when this function is called, partially @@ -2068,7 +2072,8 @@ static llvm::Value *emitInterWarpCopyFun Args.push_back(&ReduceListArg); Args.push_back(&NumWarpsArg); - auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + const CGFunctionInfo &CGFI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); auto *Fn = llvm::Function::Create( CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, "_omp_reduction_inter_warp_copy_func", &CGM.getModule()); @@ -2077,7 +2082,7 @@ static llvm::Value *emitInterWarpCopyFun CodeGenFunction CGF(CGM); CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); - auto &Bld = CGF.Builder; + CGBuilderTy &Bld = CGF.Builder; // This array is used as a medium to transfer, one reduce element at a time, // the data from the first lane of every warp to lanes in the first warp @@ -2086,7 +2091,7 @@ static llvm::Value *emitInterWarpCopyFun // for reduced latency, as well as to have a distinct copy for concurrently // executing target regions. The array is declared with common linkage so // as to be shared across compilation units. - const char *TransferMediumName = + StringRef TransferMediumName = "__openmp_nvptx_data_transfer_temporary_storage"; llvm::GlobalVariable *TransferMedium = M.getGlobalVariable(TransferMediumName); @@ -2099,14 +2104,15 @@ static llvm::Value *emitInterWarpCopyFun llvm::Constant::getNullValue(Ty), TransferMediumName, /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal, SharedAddressSpace); + CGM.addCompilerUsedGlobal(TransferMedium); } // Get the CUDA thread id of the current OpenMP thread on the GPU. - auto *ThreadID = getNVPTXThreadID(CGF); + llvm::Value *ThreadID = getNVPTXThreadID(CGF); // nvptx_lane_id = nvptx_id % warpsize - auto *LaneID = getNVPTXLaneID(CGF); + llvm::Value *LaneID = getNVPTXLaneID(CGF); // nvptx_warp_id = nvptx_id / warpsize - auto *WarpID = getNVPTXWarpID(CGF); + llvm::Value *WarpID = getNVPTXWarpID(CGF); Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg); Address LocalReduceList( @@ -2117,7 +2123,7 @@ static llvm::Value *emitInterWarpCopyFun CGF.getPointerAlign()); unsigned Idx = 0; - for (auto &Private : Privates) { + for (const Expr *Private : Privates) { // // Warp master copies reduce element to transfer medium in __shared__ // memory. @@ -2127,8 +2133,7 @@ static llvm::Value *emitInterWarpCopyFun llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont"); // if (lane_id == 0) - auto IsWarpMaster = - Bld.CreateICmpEQ(LaneID, Bld.getInt32(0), "warp_master"); + llvm::Value *IsWarpMaster = Bld.CreateIsNull(LaneID, "warp_master"); Bld.CreateCondBr(IsWarpMaster, ThenBB, ElseBB); CGF.EmitBlock(ThenBB); @@ -2170,7 +2175,7 @@ static llvm::Value *emitInterWarpCopyFun llvm::Value *NumWarpsVal = CGF.EmitLoadOfScalar( AddrNumWarpsArg, /*Volatile=*/false, C.IntTy, SourceLocation()); - auto *NumActiveThreads = Bld.CreateNSWMul( + llvm::Value *NumActiveThreads = Bld.CreateNSWMul( NumWarpsVal, getNVPTXWarpSize(CGF), "num_active_threads"); // named_barrier_sync(ParallelBarrierID, num_active_threads) syncParallelThreads(CGF, NumActiveThreads); @@ -2183,7 +2188,7 @@ static llvm::Value *emitInterWarpCopyFun llvm::BasicBlock *W0MergeBB = CGF.createBasicBlock("ifcont"); // Up to 32 threads in warp 0 are active. - auto IsActiveThread = + llvm::Value *IsActiveThread = Bld.CreateICmpULT(ThreadID, NumWarpsVal, "is_active_thread"); Bld.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB); @@ -2223,7 +2228,7 @@ static llvm::Value *emitInterWarpCopyFun // While warp 0 copies values from transfer medium, all other warps must // wait. syncParallelThreads(CGF, NumActiveThreads); - Idx++; + ++Idx; } CGF.FinishFunction(); @@ -2299,7 +2304,7 @@ static llvm::Value *emitInterWarpCopyFun static llvm::Value *emitShuffleAndReduceFunction( CodeGenModule &CGM, ArrayRef<const Expr *> Privates, QualType ReductionArrayTy, llvm::Value *ReduceFn, SourceLocation Loc) { - auto &C = CGM.getContext(); + ASTContext &C = CGM.getContext(); // Thread local Reduce list used to host the values of data to be reduced. ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, @@ -2319,7 +2324,8 @@ static llvm::Value *emitShuffleAndReduce Args.push_back(&RemoteLaneOffsetArg); Args.push_back(&AlgoVerArg); - auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + const CGFunctionInfo &CGFI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); auto *Fn = llvm::Function::Create( CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, "_omp_reduction_shuffle_and_reduce_func", &CGM.getModule()); @@ -2328,7 +2334,7 @@ static llvm::Value *emitShuffleAndReduce CodeGenFunction CGF(CGM); CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); - auto &Bld = CGF.Builder; + CGBuilderTy &Bld = CGF.Builder; Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg); Address LocalReduceList( @@ -2385,21 +2391,19 @@ static llvm::Value *emitShuffleAndReduce // When AlgoVer==2, the third conjunction has only the second part to be // evaluated during runtime. Other conjunctions evaluates to false // during compile time. - auto CondAlgo0 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(0)); + llvm::Value *CondAlgo0 = Bld.CreateIsNull(AlgoVerArgVal); - auto Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1)); - auto CondAlgo1 = Bld.CreateAnd( + llvm::Value *Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1)); + llvm::Value *CondAlgo1 = Bld.CreateAnd( Algo1, Bld.CreateICmpULT(LaneIDArgVal, RemoteLaneOffsetArgVal)); - auto Algo2 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(2)); - auto CondAlgo2 = Bld.CreateAnd( - Algo2, - Bld.CreateICmpEQ(Bld.CreateAnd(LaneIDArgVal, Bld.getInt16(1)), - Bld.getInt16(0))); + llvm::Value *Algo2 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(2)); + llvm::Value *CondAlgo2 = Bld.CreateAnd( + Algo2, Bld.CreateIsNull(Bld.CreateAnd(LaneIDArgVal, Bld.getInt16(1)))); CondAlgo2 = Bld.CreateAnd( CondAlgo2, Bld.CreateICmpSGT(RemoteLaneOffsetArgVal, Bld.getInt16(0))); - auto CondReduce = Bld.CreateOr(CondAlgo0, CondAlgo1); + llvm::Value *CondReduce = Bld.CreateOr(CondAlgo0, CondAlgo1); CondReduce = Bld.CreateOr(CondReduce, CondAlgo2); llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then"); @@ -2425,7 +2429,7 @@ static llvm::Value *emitShuffleAndReduce // if (AlgoVer==1 && (LaneId >= Offset)) copy Remote Reduce list to local // Reduce list. Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1)); - auto CondCopy = Bld.CreateAnd( + llvm::Value *CondCopy = Bld.CreateAnd( Algo1, Bld.CreateICmpUGE(LaneIDArgVal, RemoteLaneOffsetArgVal)); llvm::BasicBlock *CpyThenBB = CGF.createBasicBlock("then"); @@ -2702,12 +2706,12 @@ void CGOpenMPRuntimeNVPTX::emitReduction assert((TeamsReduction || ParallelReduction) && "Invalid reduction selection in emitReduction."); - auto &C = CGM.getContext(); + ASTContext &C = CGM.getContext(); // 1. Build a list of reduction variables. // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; auto Size = RHSExprs.size(); - for (auto *E : Privates) { + for (const Expr *E : Privates) { if (E->getType()->isVariablyModifiedType()) // Reserve place for array size. ++Size; @@ -2743,20 +2747,20 @@ void CGOpenMPRuntimeNVPTX::emitReduction } // 2. Emit reduce_func(). - auto *ReductionFn = emitReductionFunction( + llvm::Value *ReductionFn = emitReductionFunction( CGM, Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, LHSExprs, RHSExprs, ReductionOps); // 4. Build res = __kmpc_reduce{_nowait}(<gtid>, <n>, sizeof(RedList), // RedList, shuffle_reduce_func, interwarp_copy_func); - auto *ThreadId = getThreadID(CGF, Loc); - auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); - auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + llvm::Value *ThreadId = getThreadID(CGF, Loc); + llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); + llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( ReductionList.getPointer(), CGF.VoidPtrTy); - auto *ShuffleAndReduceFn = emitShuffleAndReduceFunction( + llvm::Value *ShuffleAndReduceFn = emitShuffleAndReduceFunction( CGM, Privates, ReductionArrayTy, ReductionFn, Loc); - auto *InterWarpCopyFn = + llvm::Value *InterWarpCopyFn = emitInterWarpCopyFunction(CGM, Privates, ReductionArrayTy, Loc); llvm::Value *Res = nullptr; @@ -2774,9 +2778,9 @@ void CGOpenMPRuntimeNVPTX::emitReduction } if (TeamsReduction) { - auto *ScratchPadCopyFn = + llvm::Value *ScratchPadCopyFn = emitCopyToScratchpad(CGM, Privates, ReductionArrayTy, Loc); - auto *LoadAndReduceFn = emitReduceScratchpadFunction( + llvm::Value *LoadAndReduceFn = emitReduceScratchpadFunction( CGM, Privates, ReductionArrayTy, ReductionFn, Loc); llvm::Value *Args[] = {ThreadId, @@ -2793,25 +2797,26 @@ void CGOpenMPRuntimeNVPTX::emitReduction } // 5. Build switch(res) - auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); - auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/1); + llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); + llvm::SwitchInst *SwInst = + CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/1); // 6. Build case 1: where we have reduced values in the master // thread in each team. // __kmpc_end_reduce{_nowait}(<gtid>); // break; - auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); + llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); CGF.EmitBlock(Case1BB); // Add emission of __kmpc_end_reduce{_nowait}(<gtid>); llvm::Value *EndArgs[] = {ThreadId}; - auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps, + auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps, this](CodeGenFunction &CGF, PrePostActionTy &Action) { auto IPriv = Privates.begin(); auto ILHS = LHSExprs.begin(); auto IRHS = RHSExprs.begin(); - for (auto *E : ReductionOps) { + for (const Expr *E : ReductionOps) { emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), cast<DeclRefExpr>(*IRHS)); ++IPriv; @@ -2850,11 +2855,10 @@ CGOpenMPRuntimeNVPTX::translateParameter enum { NVPTX_local_addr = 5 }; QC.addAddressSpace(getLangASFromTargetAS(NVPTX_local_addr)); ArgType = QC.apply(CGM.getContext(), ArgType); - if (isa<ImplicitParamDecl>(NativeParam)) { + if (isa<ImplicitParamDecl>(NativeParam)) return ImplicitParamDecl::Create( CGM.getContext(), /*DC=*/nullptr, NativeParam->getLocation(), NativeParam->getIdentifier(), ArgType, ImplicitParamDecl::Other); - } return ParmVarDecl::Create( CGM.getContext(), const_cast<DeclContext *>(NativeParam->getDeclContext()), @@ -2945,12 +2949,12 @@ llvm::Function *CGOpenMPRuntimeNVPTX::cr WrapperArgs.emplace_back(&ParallelLevelArg); WrapperArgs.emplace_back(&WrapperArg); - auto &CGFI = + const CGFunctionInfo &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, WrapperArgs); auto *Fn = llvm::Function::Create( CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, - OutlinedParallelFn->getName() + "_wrapper", &CGM.getModule()); + Twine(OutlinedParallelFn->getName(), "_wrapper"), &CGM.getModule()); CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); Fn->setLinkage(llvm::GlobalValue::InternalLinkage); Fn->setDoesNotRecurse(); Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h?rev=330154&r1=330153&r2=330154&view=diff ============================================================================== --- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h (original) +++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h Mon Apr 16 13:16:21 2018 @@ -35,7 +35,7 @@ private: class WorkerFunctionState { public: llvm::Function *WorkerFn; - const CGFunctionInfo *CGFI; + const CGFunctionInfo &CGFI; SourceLocation Loc; WorkerFunctionState(CodeGenModule &CGM, SourceLocation Loc); Modified: cfe/trunk/test/OpenMP/nvptx_teams_reduction_codegen.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_teams_reduction_codegen.cpp?rev=330154&r1=330153&r2=330154&view=diff ============================================================================== --- cfe/trunk/test/OpenMP/nvptx_teams_reduction_codegen.cpp (original) +++ cfe/trunk/test/OpenMP/nvptx_teams_reduction_codegen.cpp Mon Apr 16 13:16:21 2018 @@ -283,7 +283,7 @@ int bar(int n){ // CHECK: [[REMOTE_ELT_PTR:%.+]] = bitcast double* [[REMOTE_ELT]] to i8* // CHECK: store i8* [[REMOTE_ELT_PTR]], i8** [[REMOTE_ELT_REF]], align // - // CHECK: [[REDUCE:%.+]] = icmp eq i32 [[SHOULD_REDUCE]], 1 + // CHECK: [[REDUCE:%.+]] = icmp ne i32 [[SHOULD_REDUCE]], 0 // CHECK: br i1 [[REDUCE]], label {{%?}}[[DO_REDUCE:.+]], label {{%?}}[[REDUCE_ELSE:.+]] // // CHECK: [[DO_REDUCE]] @@ -658,7 +658,7 @@ int bar(int n){ // CHECK: [[REMOTE_ELT_PTR:%.+]] = bitcast float* [[REMOTE_ELT2]] to i8* // CHECK: store i8* [[REMOTE_ELT_PTR]], i8** [[REMOTE_ELT_REF]], align // - // CHECK: [[REDUCE:%.+]] = icmp eq i32 [[SHOULD_REDUCE]], 1 + // CHECK: [[REDUCE:%.+]] = icmp ne i32 [[SHOULD_REDUCE]], 0 // CHECK: br i1 [[REDUCE]], label {{%?}}[[DO_REDUCE:.+]], label {{%?}}[[REDUCE_ELSE:.+]] // // CHECK: [[DO_REDUCE]] @@ -1087,7 +1087,7 @@ int bar(int n){ // CHECK: [[REMOTE_ELT_PTR:%.+]] = bitcast i16* [[REMOTE_ELT2]] to i8* // CHECK: store i8* [[REMOTE_ELT_PTR]], i8** [[REMOTE_ELT_REF]], align // - // CHECK: [[REDUCE:%.+]] = icmp eq i32 [[SHOULD_REDUCE]], 1 + // CHECK: [[REDUCE:%.+]] = icmp ne i32 [[SHOULD_REDUCE]], 0 // CHECK: br i1 [[REDUCE]], label {{%?}}[[DO_REDUCE:.+]], label {{%?}}[[REDUCE_ELSE:.+]] // // CHECK: [[DO_REDUCE]] _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits