https://github.com/jacquesguan updated https://github.com/llvm/llvm-project/pull/197872
>From 4c482d410bcaee44a91e5c85cb07d2d22b5f262e Mon Sep 17 00:00:00 2001 From: Jianjian GUAN <[email protected]> Date: Fri, 15 May 2026 10:37:40 +0800 Subject: [PATCH 1/3] [CIR] Add support for __builtin_nontemporal_store and __builtin_nontemporal_load Add nontemporal attribute to cir.load and cir.store ops. --- .../CIR/Dialect/Builder/CIRBaseBuilder.h | 16 ++-- clang/include/clang/CIR/Dialect/IR/CIROps.td | 4 + clang/include/clang/CIR/MissingFeatures.h | 1 - clang/lib/CIR/CodeGen/CIRGenAtomic.cpp | 1 + clang/lib/CIR/CodeGen/CIRGenBuilder.h | 12 +-- clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 19 ++++- clang/lib/CIR/CodeGen/CIRGenExpr.cpp | 14 ++-- clang/lib/CIR/CodeGen/CIRGenFunction.h | 4 +- clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp | 1 + .../CIR/Dialect/Transforms/EHABILowering.cpp | 6 +- .../lib/CIR/Dialect/Transforms/FlattenCFG.cpp | 20 +++-- .../TargetLowering/LowerItaniumCXXABI.cpp | 5 ++ .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 11 +-- .../CodeGenBuiltins/builtin-nontemporal.cpp | 77 +++++++++++++++++++ 14 files changed, 147 insertions(+), 44 deletions(-) create mode 100644 clang/test/CIR/CodeGenBuiltins/builtin-nontemporal.cpp diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h index f5222accff154..0758a0f2c14c7 100644 --- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h +++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h @@ -222,11 +222,12 @@ class CIRBaseBuilderTy : public mlir::OpBuilder { } cir::LoadOp createLoad(mlir::Location loc, mlir::Value ptr, - bool isVolatile = false, uint64_t alignment = 0) { + bool isVolatile = false, uint64_t alignment = 0, + bool isNontemporal = false) { mlir::IntegerAttr alignmentAttr = getAlignmentAttr(alignment); return cir::LoadOp::create(*this, loc, ptr, /*isDeref=*/false, isVolatile, - alignmentAttr, cir::SyncScopeKindAttr{}, - cir::MemOrderAttr{}); + isNontemporal, alignmentAttr, + cir::SyncScopeKindAttr{}, cir::MemOrderAttr{}); } mlir::Value createAlignedLoad(mlir::Location loc, mlir::Value ptr, @@ -376,15 +377,15 @@ class CIRBaseBuilderTy : public mlir::OpBuilder { } cir::StoreOp createStore(mlir::Location loc, mlir::Value val, mlir::Value dst, - bool isVolatile = false, + bool isVolatile = false, bool isNontemporal = false, mlir::IntegerAttr align = {}, cir::SyncScopeKindAttr scope = {}, cir::MemOrderAttr order = {}) { if (mlir::cast<cir::PointerType>(dst.getType()).getPointee() != val.getType()) dst = createPtrBitcast(dst, val.getType()); - return cir::StoreOp::create(*this, loc, val, dst, isVolatile, align, scope, - order); + return cir::StoreOp::create(*this, loc, val, dst, isVolatile, isNontemporal, + align, scope, order); } /// Emit a load from an boolean flag variable. @@ -422,7 +423,8 @@ class CIRBaseBuilderTy : public mlir::OpBuilder { mlir::IntegerAttr alignmentAttr = getAlignmentAttr(alignment); auto addr = createAlloca(loc, getPointerTo(type), type, {}, alignmentAttr); return cir::LoadOp::create(*this, loc, addr, /*isDeref=*/false, - /*isVolatile=*/false, alignmentAttr, + /*isVolatile=*/false, /*nontemporal=*/false, + alignmentAttr, /*sync_scope=*/{}, /*mem_order=*/{}); } diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index 67ddaa73d9184..b95622f7a2b89 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -709,6 +709,7 @@ def CIR_LoadOp : CIR_Op<"load", [ [MemRead]>:$addr, UnitAttr:$isDeref, UnitAttr:$is_volatile, + UnitAttr:$nontemporal, OptionalAttr<I64Attr>:$alignment, OptionalAttr<CIR_SyncScopeKind>:$sync_scope, OptionalAttr<CIR_MemOrder>:$mem_order); @@ -717,6 +718,7 @@ def CIR_LoadOp : CIR_Op<"load", [ let assemblyFormat = [{ (`deref` $isDeref^)? (`volatile` $is_volatile^)? + (`nontemporal` $nontemporal^)? (`align` `(` $alignment^ `)`)? (`syncscope` `(` $sync_scope^ `)`)? (`atomic` `(` $mem_order^ `)`)? @@ -808,12 +810,14 @@ def CIR_StoreOp : CIR_Op<"store", [ Arg<CIR_PointerType, "the address to store the value", [MemWrite]>:$addr, UnitAttr:$is_volatile, + UnitAttr:$nontemporal, OptionalAttr<I64Attr>:$alignment, OptionalAttr<CIR_SyncScopeKind>:$sync_scope, OptionalAttr<CIR_MemOrder>:$mem_order); let assemblyFormat = [{ (`volatile` $is_volatile^)? + (`nontemporal` $nontemporal^)? (`align` `(` $alignment^ `)`)? (`syncscope` `(` $sync_scope^ `)`)? (`atomic` `(` $mem_order^ `)`)? diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index 56ee2f4101a99..31958cd078d4d 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -47,7 +47,6 @@ struct MissingFeatures { // Load/store attributes static bool opLoadEmitScalarRangeCheck() { return false; } - static bool opLoadStoreNontemporal() { return false; } static bool opLoadStoreTbaa() { return false; } static bool opLoadStoreAtomic() { return false; } static bool opLoadStoreObjC() { return false; } diff --git a/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp b/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp index 3df0cd23d570e..bd6e2b685f767 100644 --- a/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp @@ -630,6 +630,7 @@ static void emitAtomicOp(CIRGenFunction &cgf, AtomicExpr *expr, Address dest, assert(!cir::MissingFeatures::atomicSyncScopeID()); builder.createStore(loc, loadVal1, ptr, expr->isVolatile(), + /*isNontemporal=*/false, /*align=*/mlir::IntegerAttr{}, scopeAttr, orderAttr); return; } diff --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h b/clang/lib/CIR/CodeGen/CIRGenBuilder.h index aeb1a122429e2..ae8198e191bfc 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuilder.h +++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.h @@ -626,10 +626,11 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy { } cir::LoadOp createLoad(mlir::Location loc, Address addr, - bool isVolatile = false) { + bool isVolatile = false, bool isNontemporal = false) { mlir::IntegerAttr align = getAlignmentAttr(addr.getAlignment()); return cir::LoadOp::create(*this, loc, addr.getPointer(), /*isDeref=*/false, - isVolatile, /*alignment=*/align, + isVolatile, isNontemporal, + /*alignment=*/align, /*sync_scope=*/cir::SyncScopeKindAttr{}, /*mem_order=*/cir::MemOrderAttr{}); } @@ -641,7 +642,8 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy { uint64_t alignment = align ? align->value() : 0; mlir::IntegerAttr alignAttr = getAlignmentAttr(alignment); return cir::LoadOp::create(*this, loc, ptr, /*isDeref=*/false, - /*isVolatile=*/false, alignAttr, + /*isVolatile=*/false, /*isNontemporal=*/false, + alignAttr, /*sync_scope=*/cir::SyncScopeKindAttr{}, /*mem_order=*/cir::MemOrderAttr{}); } @@ -653,14 +655,14 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy { } cir::StoreOp createStore(mlir::Location loc, mlir::Value val, Address dst, - bool isVolatile = false, + bool isVolatile = false, bool isNontemporal = false, mlir::IntegerAttr align = {}, cir::SyncScopeKindAttr scope = {}, cir::MemOrderAttr order = {}) { if (!align) align = getAlignmentAttr(dst.getAlignment()); return CIRBaseBuilderTy::createStore(loc, val, dst.getPointer(), isVolatile, - align, scope, order); + isNontemporal, align, scope, order); } /// Create a cir.complex.real_ptr operation that derives a pointer to the real diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp index afa7e5b91251b..dd48979eeb00a 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp @@ -2090,8 +2090,23 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, case Builtin::BI__sync_lock_release_8: case Builtin::BI__sync_lock_release_16: case Builtin::BI__sync_synchronize: - case Builtin::BI__builtin_nontemporal_load: - case Builtin::BI__builtin_nontemporal_store: + return errorBuiltinNYI(*this, e, builtinID); + case Builtin::BI__builtin_nontemporal_load: { + Address addr = emitPointerWithAlignment(e->getArg(0)); + mlir::Value val = emitLoadOfScalar( + addr, /*isVolatile=*/false, e->getType(), e->getExprLoc(), + LValueBaseInfo(AlignmentSource::Type), /*isNontemporal=*/true); + return RValue::get(val); + } + case Builtin::BI__builtin_nontemporal_store: { + mlir::Value val = emitScalarExpr(e->getArg(0)); + Address addr = emitPointerWithAlignment(e->getArg(1)); + val = emitToMemory(val, e->getArg(0)->getType()); + emitStoreOfScalar(val, addr, /*isVolatile=*/false, e->getArg(0)->getType(), + LValueBaseInfo(AlignmentSource::Type), /*isInit=*/false, + /*isNontemporal=*/true); + return RValue::get(nullptr); + } case Builtin::BI__c11_atomic_is_lock_free: case Builtin::BI__atomic_is_lock_free: case Builtin::BI__atomic_test_and_set: diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp index 0f40516ee3537..f92ba41fcd146 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp @@ -499,12 +499,7 @@ void CIRGenFunction::emitStoreOfScalar(mlir::Value value, Address addr, } assert(currSrcLoc && "must pass in source location"); - builder.createStore(*currSrcLoc, value, addr, isVolatile); - - if (isNontemporal) { - cgm.errorNYI(addr.getPointer().getLoc(), "emitStoreOfScalar nontemporal"); - return; - } + builder.createStore(*currSrcLoc, value, addr, isVolatile, isNontemporal); assert(!cir::MissingFeatures::opTBAA()); } @@ -741,7 +736,8 @@ void CIRGenFunction::emitStoreOfScalar(mlir::Value value, LValue lvalue, mlir::Value CIRGenFunction::emitLoadOfScalar(Address addr, bool isVolatile, QualType ty, SourceLocation loc, - LValueBaseInfo baseInfo) { + LValueBaseInfo baseInfo, + bool isNontemporal) { // Traditional LLVM codegen handles thread local separately, CIR handles // as part of getAddrOfGlobalVar (GetGlobalOp). mlir::Type eltTy = addr.getElementType(); @@ -771,7 +767,8 @@ mlir::Value CIRGenFunction::emitLoadOfScalar(Address addr, bool isVolatile, assert(!cir::MissingFeatures::opLoadEmitScalarRangeCheck()); - mlir::Value loadOp = builder.createLoad(getLoc(loc), addr, isVolatile); + mlir::Value loadOp = + builder.createLoad(getLoc(loc), addr, isVolatile, isNontemporal); if (!ty->isBooleanType() && ty->hasBooleanRepresentation()) cgm.errorNYI("emitLoadOfScalar: boolean type with boolean representation"); @@ -780,7 +777,6 @@ mlir::Value CIRGenFunction::emitLoadOfScalar(Address addr, bool isVolatile, mlir::Value CIRGenFunction::emitLoadOfScalar(LValue lvalue, SourceLocation loc) { - assert(!cir::MissingFeatures::opLoadStoreNontemporal()); assert(!cir::MissingFeatures::opLoadStoreTbaa()); return emitLoadOfScalar(lvalue.getAddress(), lvalue.isVolatile(), lvalue.getType(), loc, lvalue.getBaseInfo()); diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index 9f2facd12f417..cf71985310459 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -1993,7 +1993,8 @@ class CIRGenFunction : public CIRGenTypeCache { /// l-value. mlir::Value emitLoadOfScalar(LValue lvalue, SourceLocation loc); mlir::Value emitLoadOfScalar(Address addr, bool isVolatile, QualType ty, - SourceLocation loc, LValueBaseInfo baseInfo); + SourceLocation loc, LValueBaseInfo baseInfo, + bool isNontemporal = false); /// Emit code to compute a designator that specifies the location /// of the expression. @@ -2203,6 +2204,7 @@ class CIRGenFunction : public CIRGenTypeCache { builder.restoreInsertionPoint(outermostConditional->getInsertPoint()); builder.createStore( value.getLoc(), value, addr, /*isVolatile=*/false, + /*isNontemporal=*/false, mlir::IntegerAttr::get( mlir::IntegerType::get(value.getContext(), 64), (uint64_t)addr.getAlignment().getAsAlign().value())); diff --git a/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp b/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp index 8a82bcb19454e..73b35c7f00c2d 100644 --- a/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp @@ -142,6 +142,7 @@ DeletionKind cir::CopyOp::removeBlockingUses( if (loadsFrom(slot)) cir::StoreOp::create(builder, getLoc(), reachingDefinition, getDst(), /*isVolatile=*/false, + /*isNontemporal=*/false, /*alignment=*/mlir::IntegerAttr{}, /*sync_scope=*/cir::SyncScopeKindAttr(), /*mem-order=*/cir::MemOrderAttr()); diff --git a/clang/lib/CIR/Dialect/Transforms/EHABILowering.cpp b/clang/lib/CIR/Dialect/Transforms/EHABILowering.cpp index 802740e800d7f..b586a281ca91b 100644 --- a/clang/lib/CIR/Dialect/Transforms/EHABILowering.cpp +++ b/clang/lib/CIR/Dialect/Transforms/EHABILowering.cpp @@ -750,7 +750,7 @@ void ItaniumEHLowering::lowerInitCatchParam(cir::InitCatchParamOp op) { mlir::Value casted = cir::CastOp::create(builder, loc, elementType, cir::CastKind::bitcast, exnPtr); - cir::StoreOp::create(builder, loc, casted, paramAddr, {}, {}, {}, {}); + cir::StoreOp::create(builder, loc, casted, paramAddr, {}, {}, {}, {}, {}); break; } case InitCatchKind::TrivialCopy: { @@ -771,13 +771,13 @@ void ItaniumEHLowering::lowerInitCatchParam(cir::InitCatchParamOp op) { cir::CastKind::bitcast, exnPtr); auto loadOp = cir::LoadOp::create(builder, loc, elementType, srcPtr); cir::StoreOp::create(builder, loc, loadOp.getResult(), paramAddr, {}, {}, - {}, {}); + {}, {}, {}); break; } case InitCatchKind::Pointer: { mlir::Value casted = cir::CastOp::create(builder, loc, elementType, cir::CastKind::bitcast, exnPtr); - cir::StoreOp::create(builder, loc, casted, paramAddr, {}, {}, {}, {}); + cir::StoreOp::create(builder, loc, casted, paramAddr, {}, {}, {}, {}, {}); break; } case InitCatchKind::Objc: diff --git a/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp b/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp index a21394dc62332..984e60a98dcef 100644 --- a/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp +++ b/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp @@ -956,16 +956,18 @@ class CIRCleanupScopeOpFlattening rewriter.setInsertionPoint(exitOp); cir::StoreOp::create(rewriter, loc, operand, alloca, /*isVolatile=*/false, + /*isNontemporal=*/false, /*alignment=*/mlir::IntegerAttr(), cir::SyncScopeKindAttr(), cir::MemOrderAttr()); } // Reload the value from the temporary alloca in the destination block. rewriter.setInsertionPointToEnd(destBlock); - auto loaded = cir::LoadOp::create( - rewriter, loc, alloca, /*isDeref=*/false, - /*isVolatile=*/false, /*alignment=*/mlir::IntegerAttr(), - cir::SyncScopeKindAttr(), cir::MemOrderAttr()); + auto loaded = + cir::LoadOp::create(rewriter, loc, alloca, /*isDeref=*/false, + /*isVolatile=*/false, /*isNontemporal=*/false, + /*alignment=*/mlir::IntegerAttr(), + cir::SyncScopeKindAttr(), cir::MemOrderAttr()); returnValues.push_back(loaded); } } @@ -1274,10 +1276,11 @@ class CIRCleanupScopeOpFlattening rewriter.setInsertionPointToEnd(exitBlock); // Load the destination slot value. - auto slotValue = cir::LoadOp::create( - rewriter, loc, destSlot, /*isDeref=*/false, - /*isVolatile=*/false, /*alignment=*/mlir::IntegerAttr(), - cir::SyncScopeKindAttr(), cir::MemOrderAttr()); + auto slotValue = + cir::LoadOp::create(rewriter, loc, destSlot, /*isDeref=*/false, + /*isVolatile=*/false, /*isNontemporal=*/false, + /*alignment=*/mlir::IntegerAttr(), + cir::SyncScopeKindAttr(), cir::MemOrderAttr()); // Create destination blocks for each exit and collect switch case info. llvm::SmallVector<mlir::APInt, 8> caseValues; @@ -1306,6 +1309,7 @@ class CIRCleanupScopeOpFlattening rewriter, loc, cir::IntAttr::get(s32Type, exit.destinationId)); cir::StoreOp::create(rewriter, loc, destIdConst, destSlot, /*isVolatile=*/false, + /*isNontemporal=*/false, /*alignment=*/mlir::IntegerAttr(), cir::SyncScopeKindAttr(), cir::MemOrderAttr()); rewriter.replaceOpWithNewOp<cir::BrOp>(exit.exitOp, cleanupEntry); diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerItaniumCXXABI.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerItaniumCXXABI.cpp index 5c54103c60247..6edb5cc8425bd 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerItaniumCXXABI.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerItaniumCXXABI.cpp @@ -380,6 +380,7 @@ void LowerItaniumCXXABI::lowerGetMethod( mlir::Value vtablePtr = cir::LoadOp::create(b, loc, vtablePtrPtr, /*isDeref=*/false, /*isVolatile=*/false, + /*isNontemporal=*/false, /*alignment=*/mlir::IntegerAttr(), /*sync_scope=*/cir::SyncScopeKindAttr{}, /*mem_order=*/cir::MemOrderAttr()); @@ -407,6 +408,7 @@ void LowerItaniumCXXABI::lowerGetMethod( cir::CastKind::bitcast, vfpAddr); auto fnPtr = cir::LoadOp::create(b, loc, vfpPtr, /*isDeref=*/false, /*isVolatile=*/false, + /*isNontemporal=*/false, /*alignment=*/mlir::IntegerAttr(), /*sync_scope=*/cir::SyncScopeKindAttr{}, /*mem_order=*/cir::MemOrderAttr()); @@ -780,6 +782,7 @@ static mlir::Value buildDynamicCastToVoidAfterNullCheck( builder, loc, vptrPtr, /*isDeref=*/false, /*is_volatile=*/false, + /*isNontemporal=*/false, /*alignment=*/builder.getI64IntegerAttr(vtableElemAlign), /*sync_scope=*/cir::SyncScopeKindAttr(), /*mem_order=*/cir::MemOrderAttr()); @@ -793,6 +796,7 @@ static mlir::Value buildDynamicCastToVoidAfterNullCheck( builder, loc, offsetToTopSlotPtr, /*isDeref=*/false, /*is_volatile=*/false, + /*isNontemporal=*/false, /*alignment=*/builder.getI64IntegerAttr(vtableElemAlign), /*sync_scope=*/cir::SyncScopeKindAttr(), /*mem_order=*/cir::MemOrderAttr()); @@ -902,6 +906,7 @@ mlir::Value LowerItaniumCXXABI::readArrayCookieImpl( builder, loc, countPtrTy, cir::CastKind::bitcast, countBytePtr); return cir::LoadOp::create( builder, loc, countPtr, /*isDeref=*/false, /*isVolatile=*/false, + /*isNontemporal=*/false, builder.getI64IntegerAttr(countAlignment.getQuantity()), cir::SyncScopeKindAttr(), cir::MemOrderAttr()); } diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index c4e98e299dfc1..515ac39a8de67 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -1860,15 +1860,12 @@ mlir::LogicalResult CIRToLLVMLoadOpLowering::matchAndRewrite( assert(!cir::MissingFeatures::lowerModeOptLevel()); - // TODO: nontemporal. - assert(!cir::MissingFeatures::opLoadStoreNontemporal()); - std::optional<llvm::StringRef> llvmSyncScope = getLLVMSyncScope(op.getSyncScope()); mlir::LLVM::LoadOp newLoad = mlir::LLVM::LoadOp::create( rewriter, op->getLoc(), llvmTy, adaptor.getAddr(), alignment, - op.getIsVolatile(), /*isNonTemporal=*/false, + op.getIsVolatile(), /*isNonTemporal=*/op.getNontemporal(), /*isInvariant=*/false, /*isInvariantGroup=*/false, ordering, llvmSyncScope.value_or(std::string())); @@ -1916,8 +1913,6 @@ mlir::LogicalResult CIRToLLVMStoreOpLowering::matchAndRewrite( // Convert adapted value to its memory type if needed. mlir::Value value = emitToMemory(rewriter, dataLayout, op.getValue().getType(), adaptor.getValue()); - // TODO: nontemporal. - assert(!cir::MissingFeatures::opLoadStoreNontemporal()); assert(!cir::MissingFeatures::opLoadStoreTbaa()); std::optional<llvm::StringRef> llvmSyncScope = @@ -1926,8 +1921,8 @@ mlir::LogicalResult CIRToLLVMStoreOpLowering::matchAndRewrite( mlir::LLVM::StoreOp storeOp = mlir::LLVM::StoreOp::create( rewriter, op->getLoc(), value, adaptor.getAddr(), alignment, op.getIsVolatile(), - /*isNonTemporal=*/false, /*isInvariantGroup=*/false, memorder, - llvmSyncScope.value_or(std::string())); + /*isNonTemporal=*/op.getNontemporal(), /*isInvariantGroup=*/false, + memorder, llvmSyncScope.value_or(std::string())); rewriter.replaceOp(op, storeOp); assert(!cir::MissingFeatures::opLoadStoreTbaa()); return mlir::LogicalResult::success(); diff --git a/clang/test/CIR/CodeGenBuiltins/builtin-nontemporal.cpp b/clang/test/CIR/CodeGenBuiltins/builtin-nontemporal.cpp new file mode 100644 index 0000000000000..ec834049ecc44 --- /dev/null +++ b/clang/test/CIR/CodeGenBuiltins/builtin-nontemporal.cpp @@ -0,0 +1,77 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s -check-prefix=CIR +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o - | FileCheck %s -check-prefix=LLVM +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefix=LLVM + +signed char sc; +unsigned char uc; +signed short ss; +unsigned short us; +signed int si; +unsigned int ui; +signed long long sll; +unsigned long long ull; +float f1, f2; +double d1, d2; + +void test_nontemporal_store() { +// CIR-LABEL: cir.func {{.*}}@_Z22test_nontemporal_storev +// CIR: cir.store nontemporal align(1) {{%.*}}, {{%.*}} : !u8i, !cir.ptr<!u8i> +// CIR: cir.store nontemporal align(1) {{%.*}}, {{%.*}} : !u8i, !cir.ptr<!u8i> +// CIR: cir.store nontemporal align(1) {{%.*}}, {{%.*}} : !s8i, !cir.ptr<!s8i> +// CIR: cir.store nontemporal align(2) {{%.*}}, {{%.*}} : !u16i, !cir.ptr<!u16i> +// CIR: cir.store nontemporal align(4) {{%.*}}, {{%.*}} : !s32i, !cir.ptr<!s32i> +// CIR: cir.store nontemporal align(8) {{%.*}}, {{%.*}} : !u64i, !cir.ptr<!u64i> +// CIR: cir.store nontemporal align(4) {{%.*}}, {{%.*}} : !cir.float, !cir.ptr<!cir.float> +// CIR: cir.store nontemporal align(8) {{%.*}}, {{%.*}} : !cir.double, !cir.ptr<!cir.double> +// CIR: cir.return + +// LLVM-LABEL: define dso_local void @_Z22test_nontemporal_storev +// LLVM: store i8 1, ptr @uc, align 1, !nontemporal +// LLVM: store i8 1, ptr @uc, align 1, !nontemporal +// LLVM: store i8 1, ptr @sc, align 1, !nontemporal +// LLVM: store i16 1, ptr @us, align 2, !nontemporal +// LLVM: store i32 1, ptr @si, align 4, !nontemporal +// LLVM: store i64 1, ptr @ull, align 8, !nontemporal +// LLVM: store float 1.0{{.*}}, ptr @f1, align 4, !nontemporal +// LLVM: store double 1.0{{.*}}, ptr @d1, align 8, !nontemporal +// LLVM: ret void + + __builtin_nontemporal_store(true, &uc); + __builtin_nontemporal_store(1, &uc); + __builtin_nontemporal_store(1, &sc); + __builtin_nontemporal_store(1, &us); + __builtin_nontemporal_store(1, &si); + __builtin_nontemporal_store(1, &ull); + __builtin_nontemporal_store(1.0, &f1); + __builtin_nontemporal_store(1.0, &d1); +} + +void test_nontemporal_load() { +// CIR-LABEL: cir.func {{.*}}@_Z21test_nontemporal_loadv +// CIR: cir.load nontemporal align(1) {{%.*}} : !cir.ptr<!s8i>, !s8i +// CIR: cir.load nontemporal align(1) {{%.*}} : !cir.ptr<!u8i>, !u8i +// CIR: cir.load nontemporal align(2) {{%.*}} : !cir.ptr<!s16i>, !s16i +// CIR: cir.load nontemporal align(4) {{%.*}} : !cir.ptr<!u32i>, !u32i +// CIR: cir.load nontemporal align(8) {{%.*}} : !cir.ptr<!s64i>, !s64i +// CIR: cir.load nontemporal align(4) {{%.*}} : !cir.ptr<!cir.float>, !cir.float +// CIR: cir.load nontemporal align(8) {{%.*}} : !cir.ptr<!cir.double>, !cir.double +// CIR: cir.return + +// LLVM-LABEL: define dso_local void @_Z21test_nontemporal_loadv +// LLVM: load i8, ptr @sc, align 1, !nontemporal +// LLVM: load i8, ptr @uc, align 1, !nontemporal +// LLVM: load i16, ptr @ss, align 2, !nontemporal +// LLVM: load i32, ptr @ui, align 4, !nontemporal +// LLVM: load i64, ptr @sll, align 8, !nontemporal +// LLVM: load float, ptr @f2, align 4, !nontemporal +// LLVM: load double, ptr @d2, align 8, !nontemporal +// LLVM: ret void + + uc = __builtin_nontemporal_load(&sc); + sc = __builtin_nontemporal_load(&uc); + us = __builtin_nontemporal_load(&ss); + si = __builtin_nontemporal_load(&ui); + ull = __builtin_nontemporal_load(&sll); + f1 = __builtin_nontemporal_load(&f2); + d1 = __builtin_nontemporal_load(&d2); +} >From 1dbf2b01cbc5b846d1b672e333b272d59876e04e Mon Sep 17 00:00:00 2001 From: Jianjian GUAN <[email protected]> Date: Tue, 19 May 2026 11:40:53 +0800 Subject: [PATCH 2/3] fix rebase --- clang/lib/CIR/CodeGen/CIRGenBuiltinNVPTX.cpp | 1 + clang/lib/CIR/Dialect/Transforms/EHABILowering.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinNVPTX.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinNVPTX.cpp index 3c654761b9903..4db2d7259c6ba 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinNVPTX.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinNVPTX.cpp @@ -1067,6 +1067,7 @@ static mlir::Value packArgsIntoNVPTXFormatBuffer(CIRGenFunction &cgf, dataLayout.getABITypeAlign(argTypes[i]).value()); cir::StoreOp::create(builder, loc, arg.getKnownRValue().getValue(), member, /*is_volatile=*/false, + /*isNontemporal=*/false, builder.getAlignmentAttr(abiAlign), /*sync_scope=*/cir::SyncScopeKindAttr{}, /*mem_order=*/cir::MemOrderAttr{}); diff --git a/clang/lib/CIR/Dialect/Transforms/EHABILowering.cpp b/clang/lib/CIR/Dialect/Transforms/EHABILowering.cpp index b586a281ca91b..918eecb3eee49 100644 --- a/clang/lib/CIR/Dialect/Transforms/EHABILowering.cpp +++ b/clang/lib/CIR/Dialect/Transforms/EHABILowering.cpp @@ -641,7 +641,7 @@ ItaniumEHLowering::lowerConstructCatchParam(cir::ConstructCatchParamOp op, mlir::Value casted = cir::CastOp::create(builder, loc, paramAddrType.getPointee(), cir::CastKind::bitcast, exnObj); - cir::StoreOp::create(builder, loc, casted, paramAddr, {}, {}, {}, {}); + cir::StoreOp::create(builder, loc, casted, paramAddr, {}, {}, {}, {}, {}); op.erase(); return success(); } >From 646c0729eaeba0d5f4dfd85b3b6e9d3e05c0057f Mon Sep 17 00:00:00 2001 From: Jianjian GUAN <[email protected]> Date: Tue, 2 Jun 2026 16:52:45 +0800 Subject: [PATCH 3/3] Address comment --- clang/include/clang/CIR/Dialect/IR/CIROps.td | 8 ++++---- clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 14 ++++++++------ clang/lib/CIR/CodeGen/CIRGenExpr.cpp | 5 +++-- clang/lib/CIR/CodeGen/CIRGenValue.h | 7 +++++++ .../lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 4 ++-- 5 files changed, 24 insertions(+), 14 deletions(-) diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index b95622f7a2b89..63773791b66a3 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -709,7 +709,7 @@ def CIR_LoadOp : CIR_Op<"load", [ [MemRead]>:$addr, UnitAttr:$isDeref, UnitAttr:$is_volatile, - UnitAttr:$nontemporal, + UnitAttr:$is_nontemporal, OptionalAttr<I64Attr>:$alignment, OptionalAttr<CIR_SyncScopeKind>:$sync_scope, OptionalAttr<CIR_MemOrder>:$mem_order); @@ -718,7 +718,7 @@ def CIR_LoadOp : CIR_Op<"load", [ let assemblyFormat = [{ (`deref` $isDeref^)? (`volatile` $is_volatile^)? - (`nontemporal` $nontemporal^)? + (`nontemporal` $is_nontemporal^)? (`align` `(` $alignment^ `)`)? (`syncscope` `(` $sync_scope^ `)`)? (`atomic` `(` $mem_order^ `)`)? @@ -810,14 +810,14 @@ def CIR_StoreOp : CIR_Op<"store", [ Arg<CIR_PointerType, "the address to store the value", [MemWrite]>:$addr, UnitAttr:$is_volatile, - UnitAttr:$nontemporal, + UnitAttr:$is_nontemporal, OptionalAttr<I64Attr>:$alignment, OptionalAttr<CIR_SyncScopeKind>:$sync_scope, OptionalAttr<CIR_MemOrder>:$mem_order); let assemblyFormat = [{ (`volatile` $is_volatile^)? - (`nontemporal` $nontemporal^)? + (`nontemporal` $is_nontemporal^)? (`align` `(` $alignment^ `)`)? (`syncscope` `(` $sync_scope^ `)`)? (`atomic` `(` $mem_order^ `)`)? diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp index dd48979eeb00a..9674c0af800e1 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp @@ -2093,18 +2093,20 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, return errorBuiltinNYI(*this, e, builtinID); case Builtin::BI__builtin_nontemporal_load: { Address addr = emitPointerWithAlignment(e->getArg(0)); - mlir::Value val = emitLoadOfScalar( - addr, /*isVolatile=*/false, e->getType(), e->getExprLoc(), - LValueBaseInfo(AlignmentSource::Type), /*isNontemporal=*/true); + LValue lv = makeAddrLValue(addr, e->getType(), + LValueBaseInfo(AlignmentSource::Type)); + lv.setNontemporal(true); + mlir::Value val = emitLoadOfScalar(lv, e->getExprLoc()); return RValue::get(val); } case Builtin::BI__builtin_nontemporal_store: { mlir::Value val = emitScalarExpr(e->getArg(0)); Address addr = emitPointerWithAlignment(e->getArg(1)); val = emitToMemory(val, e->getArg(0)->getType()); - emitStoreOfScalar(val, addr, /*isVolatile=*/false, e->getArg(0)->getType(), - LValueBaseInfo(AlignmentSource::Type), /*isInit=*/false, - /*isNontemporal=*/true); + LValue lv = makeAddrLValue(addr, e->getArg(0)->getType(), + LValueBaseInfo(AlignmentSource::Type)); + lv.setNontemporal(true); + emitStoreOfScalar(val, lv, /*isInit=*/false); return RValue::get(nullptr); } case Builtin::BI__c11_atomic_is_lock_free: diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp index f92ba41fcd146..a1d522212bd90 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp @@ -731,7 +731,7 @@ void CIRGenFunction::emitStoreOfScalar(mlir::Value value, LValue lvalue, emitStoreOfScalar(value, lvalue.getAddress(), lvalue.isVolatile(), lvalue.getType(), lvalue.getBaseInfo(), isInit, - /*isNontemporal=*/false); + lvalue.isNontemporal()); } mlir::Value CIRGenFunction::emitLoadOfScalar(Address addr, bool isVolatile, @@ -779,7 +779,8 @@ mlir::Value CIRGenFunction::emitLoadOfScalar(LValue lvalue, SourceLocation loc) { assert(!cir::MissingFeatures::opLoadStoreTbaa()); return emitLoadOfScalar(lvalue.getAddress(), lvalue.isVolatile(), - lvalue.getType(), loc, lvalue.getBaseInfo()); + lvalue.getType(), loc, lvalue.getBaseInfo(), + lvalue.isNontemporal()); } /// Given an expression that represents a value lvalue, this diff --git a/clang/lib/CIR/CodeGen/CIRGenValue.h b/clang/lib/CIR/CodeGen/CIRGenValue.h index e70dac5851189..b291b8c76f1ad 100644 --- a/clang/lib/CIR/CodeGen/CIRGenValue.h +++ b/clang/lib/CIR/CodeGen/CIRGenValue.h @@ -173,6 +173,9 @@ class LValue { mlir::Type elementType; LValueBaseInfo baseInfo; const CIRGenBitFieldInfo *bitFieldInfo{nullptr}; + // This flag shows if a nontemporal load/stores should be used when accessing + // this lvalue. + bool nontemporal; void initialize(clang::QualType type, clang::Qualifiers quals, clang::CharUnits alignment, LValueBaseInfo baseInfo) { @@ -187,6 +190,7 @@ class LValue { assert(this->alignment == alignment.getQuantity() && "Alignment exceeds allowed max!"); this->baseInfo = baseInfo; + this->nontemporal = false; } public: @@ -200,6 +204,9 @@ class LValue { bool isVolatileQualified() const { return quals.hasVolatile(); } + bool isNontemporal() const { return nontemporal; } + void setNontemporal(bool v) { nontemporal = v; } + unsigned getVRQualifiers() const { return quals.getCVRQualifiers() & ~clang::Qualifiers::Const; } diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 515ac39a8de67..1fe1d7dd52f97 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -1865,7 +1865,7 @@ mlir::LogicalResult CIRToLLVMLoadOpLowering::matchAndRewrite( mlir::LLVM::LoadOp newLoad = mlir::LLVM::LoadOp::create( rewriter, op->getLoc(), llvmTy, adaptor.getAddr(), alignment, - op.getIsVolatile(), /*isNonTemporal=*/op.getNontemporal(), + op.getIsVolatile(), /*isNonTemporal=*/op.getIsNontemporal(), /*isInvariant=*/false, /*isInvariantGroup=*/false, ordering, llvmSyncScope.value_or(std::string())); @@ -1921,7 +1921,7 @@ mlir::LogicalResult CIRToLLVMStoreOpLowering::matchAndRewrite( mlir::LLVM::StoreOp storeOp = mlir::LLVM::StoreOp::create( rewriter, op->getLoc(), value, adaptor.getAddr(), alignment, op.getIsVolatile(), - /*isNonTemporal=*/op.getNontemporal(), /*isInvariantGroup=*/false, + /*isNonTemporal=*/op.getIsNontemporal(), /*isInvariantGroup=*/false, memorder, llvmSyncScope.value_or(std::string())); rewriter.replaceOp(op, storeOp); assert(!cir::MissingFeatures::opLoadStoreTbaa()); _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
