Author: Jianjian Guan Date: 2026-06-23T10:27:45+08:00 New Revision: d853c056e5db0f9e1779e351b02c98a141a95c8d
URL: https://github.com/llvm/llvm-project/commit/d853c056e5db0f9e1779e351b02c98a141a95c8d DIFF: https://github.com/llvm/llvm-project/commit/d853c056e5db0f9e1779e351b02c98a141a95c8d.diff LOG: [CIR] Add support for __builtin_nontemporal_store and __builtin_nontemporal_load (#197872) Add nontemporal attribute to cir.load and cir.store ops. Added: clang/test/CIR/CodeGenBuiltins/builtin-nontemporal.cpp Modified: clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h clang/include/clang/CIR/Dialect/IR/CIROps.td clang/include/clang/CIR/MissingFeatures.h clang/lib/CIR/CodeGen/CIRGenAtomic.cpp clang/lib/CIR/CodeGen/CIRGenBuilder.h clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp clang/lib/CIR/CodeGen/CIRGenBuiltinNVPTX.cpp clang/lib/CIR/CodeGen/CIRGenExpr.cpp clang/lib/CIR/CodeGen/CIRGenFunction.h clang/lib/CIR/CodeGen/CIRGenValue.h clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp clang/lib/CIR/Dialect/Transforms/EHABILowering.cpp clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerItaniumCXXABI.cpp clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp Removed: ################################################################################ diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h index 121eed5f8ba9a..0db205f8d5b79 100644 --- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h +++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h @@ -229,11 +229,12 @@ class CIRBaseBuilderTy : public mlir::OpBuilder { } cir::LoadOp createLoad(mlir::Location loc, mlir::Value ptr, - bool isVolatile = false, uint64_t alignment = 0) { + bool isVolatile = false, uint64_t alignment = 0, + bool isNontemporal = false) { mlir::IntegerAttr alignmentAttr = getAlignmentAttr(alignment); return cir::LoadOp::create(*this, loc, ptr, /*isDeref=*/false, isVolatile, - alignmentAttr, cir::SyncScopeKindAttr{}, - cir::MemOrderAttr{}); + isNontemporal, alignmentAttr, + cir::SyncScopeKindAttr{}, cir::MemOrderAttr{}); } mlir::Value createAlignedLoad(mlir::Location loc, mlir::Value ptr, @@ -380,15 +381,15 @@ class CIRBaseBuilderTy : public mlir::OpBuilder { } cir::StoreOp createStore(mlir::Location loc, mlir::Value val, mlir::Value dst, - bool isVolatile = false, + bool isVolatile = false, bool isNontemporal = false, mlir::IntegerAttr align = {}, cir::SyncScopeKindAttr scope = {}, cir::MemOrderAttr order = {}) { if (mlir::cast<cir::PointerType>(dst.getType()).getPointee() != val.getType()) dst = createPtrBitcast(dst, val.getType()); - return cir::StoreOp::create(*this, loc, val, dst, isVolatile, align, scope, - order); + return cir::StoreOp::create(*this, loc, val, dst, isVolatile, isNontemporal, + align, scope, order); } /// Emit a load from an boolean flag variable. @@ -426,7 +427,8 @@ class CIRBaseBuilderTy : public mlir::OpBuilder { mlir::IntegerAttr alignmentAttr = getAlignmentAttr(alignment); auto addr = createAlloca(loc, getPointerTo(type), {}, alignmentAttr); return cir::LoadOp::create(*this, loc, addr, /*isDeref=*/false, - /*isVolatile=*/false, alignmentAttr, + /*isVolatile=*/false, /*nontemporal=*/false, + alignmentAttr, /*sync_scope=*/{}, /*mem_order=*/{}); } diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index 9dae3534991e5..f4f22cd297ea6 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -694,6 +694,7 @@ def CIR_LoadOp : CIR_Op<"load", [ [MemRead]>:$addr, UnitAttr:$isDeref, UnitAttr:$is_volatile, + UnitAttr:$is_nontemporal, OptionalAttr<I64Attr>:$alignment, OptionalAttr<CIR_SyncScopeKind>:$sync_scope, OptionalAttr<CIR_MemOrder>:$mem_order); @@ -702,6 +703,7 @@ def CIR_LoadOp : CIR_Op<"load", [ let assemblyFormat = [{ (`deref` $isDeref^)? (`volatile` $is_volatile^)? + (`nontemporal` $is_nontemporal^)? (`align` `(` $alignment^ `)`)? (`syncscope` `(` $sync_scope^ `)`)? (`atomic` `(` $mem_order^ `)`)? @@ -793,12 +795,14 @@ def CIR_StoreOp : CIR_Op<"store", [ Arg<CIR_PointerType, "the address to store the value", [MemWrite]>:$addr, UnitAttr:$is_volatile, + UnitAttr:$is_nontemporal, OptionalAttr<I64Attr>:$alignment, OptionalAttr<CIR_SyncScopeKind>:$sync_scope, OptionalAttr<CIR_MemOrder>:$mem_order); let assemblyFormat = [{ (`volatile` $is_volatile^)? + (`nontemporal` $is_nontemporal^)? (`align` `(` $alignment^ `)`)? (`syncscope` `(` $sync_scope^ `)`)? (`atomic` `(` $mem_order^ `)`)? @@ -809,6 +813,7 @@ def CIR_StoreOp : CIR_Op<"store", [ // Non-volatile, non-atomic store with default alignment. OpBuilder<(ins "mlir::Value":$value, "mlir::Value":$addr), [{ build($_builder, $_state, value, addr, /*is_volatile=*/mlir::UnitAttr(), + /*is_nontemporal=*/mlir::UnitAttr(), /*alignment=*/mlir::IntegerAttr(), /*sync_scope=*/cir::SyncScopeKindAttr(), /*mem_order=*/cir::MemOrderAttr()); diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index c09db49a955ac..9a1546fe14e65 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -47,7 +47,6 @@ struct MissingFeatures { // Load/store attributes static bool opLoadEmitScalarRangeCheck() { return false; } - static bool opLoadStoreNontemporal() { return false; } static bool opLoadStoreTbaa() { return false; } static bool opLoadStoreAtomic() { return false; } static bool opLoadStoreObjC() { return false; } diff --git a/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp b/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp index 6ba6bc1c0405a..4ac6f4506b2cd 100644 --- a/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp @@ -705,6 +705,7 @@ static void emitAtomicOp(CIRGenFunction &cgf, AtomicExpr *expr, Address dest, assert(!cir::MissingFeatures::atomicSyncScopeID()); builder.createStore(loc, loadVal1, ptr, expr->isVolatile(), + /*isNontemporal=*/false, /*align=*/mlir::IntegerAttr{}, scopeAttr, orderAttr); return; } diff --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h b/clang/lib/CIR/CodeGen/CIRGenBuilder.h index 3204ba1a319f0..b8db0d9157aa6 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuilder.h +++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.h @@ -577,10 +577,11 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy { } cir::LoadOp createLoad(mlir::Location loc, Address addr, - bool isVolatile = false) { + bool isVolatile = false, bool isNontemporal = false) { mlir::IntegerAttr align = getAlignmentAttr(addr.getAlignment()); return cir::LoadOp::create(*this, loc, addr.getPointer(), /*isDeref=*/false, - isVolatile, /*alignment=*/align, + isVolatile, isNontemporal, + /*alignment=*/align, /*sync_scope=*/cir::SyncScopeKindAttr{}, /*mem_order=*/cir::MemOrderAttr{}); } @@ -592,7 +593,8 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy { uint64_t alignment = align ? align->value() : 0; mlir::IntegerAttr alignAttr = getAlignmentAttr(alignment); return cir::LoadOp::create(*this, loc, ptr, /*isDeref=*/false, - /*isVolatile=*/false, alignAttr, + /*isVolatile=*/false, /*isNontemporal=*/false, + alignAttr, /*sync_scope=*/cir::SyncScopeKindAttr{}, /*mem_order=*/cir::MemOrderAttr{}); } @@ -604,14 +606,14 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy { } cir::StoreOp createStore(mlir::Location loc, mlir::Value val, Address dst, - bool isVolatile = false, + bool isVolatile = false, bool isNontemporal = false, mlir::IntegerAttr align = {}, cir::SyncScopeKindAttr scope = {}, cir::MemOrderAttr order = {}) { if (!align) align = getAlignmentAttr(dst.getAlignment()); return CIRBaseBuilderTy::createStore(loc, val, dst.getPointer(), isVolatile, - align, scope, order); + isNontemporal, align, scope, order); } /// Create a cir.complex.real_ptr operation that derives a pointer to the real diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp index a483eb635f0e2..4fb7ffc13a2ce 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp @@ -2126,8 +2126,24 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, cir::SyncScopeKind::System)); return RValue::get(nullptr); } - case Builtin::BI__builtin_nontemporal_load: - case Builtin::BI__builtin_nontemporal_store: + case Builtin::BI__builtin_nontemporal_load: { + Address addr = emitPointerWithAlignment(e->getArg(0)); + LValue lv = makeAddrLValue(addr, e->getType(), + LValueBaseInfo(AlignmentSource::Type)); + lv.setNontemporal(true); + mlir::Value val = emitLoadOfScalar(lv, e->getExprLoc()); + return RValue::get(val); + } + case Builtin::BI__builtin_nontemporal_store: { + mlir::Value val = emitScalarExpr(e->getArg(0)); + Address addr = emitPointerWithAlignment(e->getArg(1)); + val = emitToMemory(val, e->getArg(0)->getType()); + LValue lv = makeAddrLValue(addr, e->getArg(0)->getType(), + LValueBaseInfo(AlignmentSource::Type)); + lv.setNontemporal(true); + emitStoreOfScalar(val, lv, /*isInit=*/false); + return RValue::get(nullptr); + } case Builtin::BI__c11_atomic_is_lock_free: case Builtin::BI__atomic_is_lock_free: case Builtin::BI__atomic_test_and_set: diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinNVPTX.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinNVPTX.cpp index 3c654761b9903..4db2d7259c6ba 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinNVPTX.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinNVPTX.cpp @@ -1067,6 +1067,7 @@ static mlir::Value packArgsIntoNVPTXFormatBuffer(CIRGenFunction &cgf, dataLayout.getABITypeAlign(argTypes[i]).value()); cir::StoreOp::create(builder, loc, arg.getKnownRValue().getValue(), member, /*is_volatile=*/false, + /*isNontemporal=*/false, builder.getAlignmentAttr(abiAlign), /*sync_scope=*/cir::SyncScopeKindAttr{}, /*mem_order=*/cir::MemOrderAttr{}); diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp index beaedd853f57b..fa14b45cbb015 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp @@ -499,12 +499,7 @@ void CIRGenFunction::emitStoreOfScalar(mlir::Value value, Address addr, } assert(currSrcLoc && "must pass in source location"); - builder.createStore(*currSrcLoc, value, addr, isVolatile); - - if (isNontemporal) { - cgm.errorNYI(addr.getPointer().getLoc(), "emitStoreOfScalar nontemporal"); - return; - } + builder.createStore(*currSrcLoc, value, addr, isVolatile, isNontemporal); assert(!cir::MissingFeatures::opTBAA()); } @@ -736,12 +731,13 @@ void CIRGenFunction::emitStoreOfScalar(mlir::Value value, LValue lvalue, emitStoreOfScalar(value, lvalue.getAddress(), lvalue.isVolatile(), lvalue.getType(), lvalue.getBaseInfo(), isInit, - /*isNontemporal=*/false); + lvalue.isNontemporal()); } mlir::Value CIRGenFunction::emitLoadOfScalar(Address addr, bool isVolatile, QualType ty, SourceLocation loc, - LValueBaseInfo baseInfo) { + LValueBaseInfo baseInfo, + bool isNontemporal) { // Traditional LLVM codegen handles thread local separately, CIR handles // as part of getAddrOfGlobalVar (GetGlobalOp). mlir::Type eltTy = addr.getElementType(); @@ -771,7 +767,8 @@ mlir::Value CIRGenFunction::emitLoadOfScalar(Address addr, bool isVolatile, assert(!cir::MissingFeatures::opLoadEmitScalarRangeCheck()); - mlir::Value loadOp = builder.createLoad(getLoc(loc), addr, isVolatile); + mlir::Value loadOp = + builder.createLoad(getLoc(loc), addr, isVolatile, isNontemporal); if (!ty->isBooleanType() && ty->hasBooleanRepresentation()) cgm.errorNYI("emitLoadOfScalar: boolean type with boolean representation"); @@ -780,10 +777,10 @@ mlir::Value CIRGenFunction::emitLoadOfScalar(Address addr, bool isVolatile, mlir::Value CIRGenFunction::emitLoadOfScalar(LValue lvalue, SourceLocation loc) { - assert(!cir::MissingFeatures::opLoadStoreNontemporal()); assert(!cir::MissingFeatures::opLoadStoreTbaa()); return emitLoadOfScalar(lvalue.getAddress(), lvalue.isVolatile(), - lvalue.getType(), loc, lvalue.getBaseInfo()); + lvalue.getType(), loc, lvalue.getBaseInfo(), + lvalue.isNontemporal()); } /// Given an expression that represents a value lvalue, this diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index 317151c8d61c6..d0b936f45378d 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -2095,7 +2095,8 @@ class CIRGenFunction : public CIRGenTypeCache { /// l-value. mlir::Value emitLoadOfScalar(LValue lvalue, SourceLocation loc); mlir::Value emitLoadOfScalar(Address addr, bool isVolatile, QualType ty, - SourceLocation loc, LValueBaseInfo baseInfo); + SourceLocation loc, LValueBaseInfo baseInfo, + bool isNontemporal = false); /// Emit code to compute a designator that specifies the location /// of the expression. @@ -2305,6 +2306,7 @@ class CIRGenFunction : public CIRGenTypeCache { builder.restoreInsertionPoint(outermostConditional->getInsertPoint()); builder.createStore( value.getLoc(), value, addr, /*isVolatile=*/false, + /*isNontemporal=*/false, mlir::IntegerAttr::get( mlir::IntegerType::get(value.getContext(), 64), (uint64_t)addr.getAlignment().getAsAlign().value())); diff --git a/clang/lib/CIR/CodeGen/CIRGenValue.h b/clang/lib/CIR/CodeGen/CIRGenValue.h index e70dac5851189..b291b8c76f1ad 100644 --- a/clang/lib/CIR/CodeGen/CIRGenValue.h +++ b/clang/lib/CIR/CodeGen/CIRGenValue.h @@ -173,6 +173,9 @@ class LValue { mlir::Type elementType; LValueBaseInfo baseInfo; const CIRGenBitFieldInfo *bitFieldInfo{nullptr}; + // This flag shows if a nontemporal load/stores should be used when accessing + // this lvalue. + bool nontemporal; void initialize(clang::QualType type, clang::Qualifiers quals, clang::CharUnits alignment, LValueBaseInfo baseInfo) { @@ -187,6 +190,7 @@ class LValue { assert(this->alignment == alignment.getQuantity() && "Alignment exceeds allowed max!"); this->baseInfo = baseInfo; + this->nontemporal = false; } public: @@ -200,6 +204,9 @@ class LValue { bool isVolatileQualified() const { return quals.hasVolatile(); } + bool isNontemporal() const { return nontemporal; } + void setNontemporal(bool v) { nontemporal = v; } + unsigned getVRQualifiers() const { return quals.getCVRQualifiers() & ~clang::Qualifiers::Const; } diff --git a/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp b/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp index 8a82bcb19454e..73b35c7f00c2d 100644 --- a/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp @@ -142,6 +142,7 @@ DeletionKind cir::CopyOp::removeBlockingUses( if (loadsFrom(slot)) cir::StoreOp::create(builder, getLoc(), reachingDefinition, getDst(), /*isVolatile=*/false, + /*isNontemporal=*/false, /*alignment=*/mlir::IntegerAttr{}, /*sync_scope=*/cir::SyncScopeKindAttr(), /*mem-order=*/cir::MemOrderAttr()); diff --git a/clang/lib/CIR/Dialect/Transforms/EHABILowering.cpp b/clang/lib/CIR/Dialect/Transforms/EHABILowering.cpp index e6c76fed6f78a..0e39fa15d377b 100644 --- a/clang/lib/CIR/Dialect/Transforms/EHABILowering.cpp +++ b/clang/lib/CIR/Dialect/Transforms/EHABILowering.cpp @@ -678,7 +678,7 @@ ItaniumEHLowering::lowerConstructCatchParam(cir::ConstructCatchParamOp op, mlir::Value casted = cir::CastOp::create(builder, loc, paramAddrType.getPointee(), cir::CastKind::bitcast, exnObj); - cir::StoreOp::create(builder, loc, casted, paramAddr, {}, {}, {}, {}); + cir::StoreOp::create(builder, loc, casted, paramAddr, {}, {}, {}, {}, {}); op.erase(); return success(); } @@ -853,7 +853,7 @@ void ItaniumEHLowering::lowerInitCatchParam(cir::InitCatchParamOp op) { mlir::Value casted = cir::CastOp::create(builder, loc, elementType, cir::CastKind::bitcast, exnPtr); - cir::StoreOp::create(builder, loc, casted, paramAddr, {}, {}, {}, {}); + cir::StoreOp::create(builder, loc, casted, paramAddr, {}, {}, {}, {}, {}); break; } case InitCatchKind::TrivialCopy: { @@ -874,13 +874,13 @@ void ItaniumEHLowering::lowerInitCatchParam(cir::InitCatchParamOp op) { cir::CastKind::bitcast, exnPtr); auto loadOp = cir::LoadOp::create(builder, loc, elementType, srcPtr); cir::StoreOp::create(builder, loc, loadOp.getResult(), paramAddr, {}, {}, - {}, {}); + {}, {}, {}); break; } case InitCatchKind::Pointer: { mlir::Value casted = cir::CastOp::create(builder, loc, elementType, cir::CastKind::bitcast, exnPtr); - cir::StoreOp::create(builder, loc, casted, paramAddr, {}, {}, {}, {}); + cir::StoreOp::create(builder, loc, casted, paramAddr, {}, {}, {}, {}, {}); break; } case InitCatchKind::Objc: diff --git a/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp b/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp index ddeeb98fee820..c487e645e30cd 100644 --- a/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp +++ b/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp @@ -971,16 +971,18 @@ class CIRCleanupScopeOpFlattening rewriter.setInsertionPoint(exitOp); cir::StoreOp::create(rewriter, loc, operand, alloca, /*isVolatile=*/false, + /*isNontemporal=*/false, /*alignment=*/mlir::IntegerAttr(), cir::SyncScopeKindAttr(), cir::MemOrderAttr()); } // Reload the value from the temporary alloca in the destination block. rewriter.setInsertionPointToEnd(destBlock); - auto loaded = cir::LoadOp::create( - rewriter, loc, alloca, /*isDeref=*/false, - /*isVolatile=*/false, /*alignment=*/mlir::IntegerAttr(), - cir::SyncScopeKindAttr(), cir::MemOrderAttr()); + auto loaded = + cir::LoadOp::create(rewriter, loc, alloca, /*isDeref=*/false, + /*isVolatile=*/false, /*isNontemporal=*/false, + /*alignment=*/mlir::IntegerAttr(), + cir::SyncScopeKindAttr(), cir::MemOrderAttr()); returnValues.push_back(loaded); } } @@ -1290,10 +1292,11 @@ class CIRCleanupScopeOpFlattening rewriter.setInsertionPointToEnd(exitBlock); // Load the destination slot value. - auto slotValue = cir::LoadOp::create( - rewriter, loc, destSlot, /*isDeref=*/false, - /*isVolatile=*/false, /*alignment=*/mlir::IntegerAttr(), - cir::SyncScopeKindAttr(), cir::MemOrderAttr()); + auto slotValue = + cir::LoadOp::create(rewriter, loc, destSlot, /*isDeref=*/false, + /*isVolatile=*/false, /*isNontemporal=*/false, + /*alignment=*/mlir::IntegerAttr(), + cir::SyncScopeKindAttr(), cir::MemOrderAttr()); // Create destination blocks for each exit and collect switch case info. llvm::SmallVector<mlir::APInt, 8> caseValues; @@ -1322,6 +1325,7 @@ class CIRCleanupScopeOpFlattening rewriter, loc, cir::IntAttr::get(s32Type, exit.destinationId)); cir::StoreOp::create(rewriter, loc, destIdConst, destSlot, /*isVolatile=*/false, + /*isNontemporal=*/false, /*alignment=*/mlir::IntegerAttr(), cir::SyncScopeKindAttr(), cir::MemOrderAttr()); rewriter.replaceOpWithNewOp<cir::BrOp>(exit.exitOp, cleanupEntry); diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerItaniumCXXABI.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerItaniumCXXABI.cpp index cccbe70876c3f..6e12a13787a2a 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerItaniumCXXABI.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerItaniumCXXABI.cpp @@ -382,6 +382,7 @@ void LowerItaniumCXXABI::lowerGetMethod( mlir::Value vtablePtr = cir::LoadOp::create(b, loc, vtablePtrPtr, /*isDeref=*/false, /*isVolatile=*/false, + /*isNontemporal=*/false, /*alignment=*/mlir::IntegerAttr(), /*sync_scope=*/cir::SyncScopeKindAttr{}, /*mem_order=*/cir::MemOrderAttr()); @@ -409,6 +410,7 @@ void LowerItaniumCXXABI::lowerGetMethod( cir::CastKind::bitcast, vfpAddr); auto fnPtr = cir::LoadOp::create(b, loc, vfpPtr, /*isDeref=*/false, /*isVolatile=*/false, + /*isNontemporal=*/false, /*alignment=*/mlir::IntegerAttr(), /*sync_scope=*/cir::SyncScopeKindAttr{}, /*mem_order=*/cir::MemOrderAttr()); @@ -782,6 +784,7 @@ static mlir::Value buildDynamicCastToVoidAfterNullCheck( builder, loc, vptrPtr, /*isDeref=*/false, /*is_volatile=*/false, + /*isNontemporal=*/false, /*alignment=*/builder.getI64IntegerAttr(vtableElemAlign), /*sync_scope=*/cir::SyncScopeKindAttr(), /*mem_order=*/cir::MemOrderAttr()); @@ -795,6 +798,7 @@ static mlir::Value buildDynamicCastToVoidAfterNullCheck( builder, loc, offsetToTopSlotPtr, /*isDeref=*/false, /*is_volatile=*/false, + /*isNontemporal=*/false, /*alignment=*/builder.getI64IntegerAttr(vtableElemAlign), /*sync_scope=*/cir::SyncScopeKindAttr(), /*mem_order=*/cir::MemOrderAttr()); @@ -904,6 +908,7 @@ mlir::Value LowerItaniumCXXABI::readArrayCookieImpl( builder, loc, countPtrTy, cir::CastKind::bitcast, countBytePtr); return cir::LoadOp::create( builder, loc, countPtr, /*isDeref=*/false, /*isVolatile=*/false, + /*isNontemporal=*/false, builder.getI64IntegerAttr(countAlignment.getQuantity()), cir::SyncScopeKindAttr(), cir::MemOrderAttr()); } diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 25fa6d1625301..27eba4ee326a5 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -1860,15 +1860,12 @@ mlir::LogicalResult CIRToLLVMLoadOpLowering::matchAndRewrite( assert(!cir::MissingFeatures::lowerModeOptLevel()); - // TODO: nontemporal. - assert(!cir::MissingFeatures::opLoadStoreNontemporal()); - std::optional<llvm::StringRef> llvmSyncScope = getLLVMSyncScope(op.getSyncScope()); mlir::LLVM::LoadOp newLoad = mlir::LLVM::LoadOp::create( rewriter, op->getLoc(), llvmTy, adaptor.getAddr(), alignment, - op.getIsVolatile(), /*isNonTemporal=*/false, + op.getIsVolatile(), /*isNonTemporal=*/op.getIsNontemporal(), /*isInvariant=*/false, /*isInvariantGroup=*/false, ordering, llvmSyncScope.value_or(std::string())); @@ -1916,8 +1913,6 @@ mlir::LogicalResult CIRToLLVMStoreOpLowering::matchAndRewrite( // Convert adapted value to its memory type if needed. mlir::Value value = emitToMemory(rewriter, dataLayout, op.getValue().getType(), adaptor.getValue()); - // TODO: nontemporal. - assert(!cir::MissingFeatures::opLoadStoreNontemporal()); assert(!cir::MissingFeatures::opLoadStoreTbaa()); std::optional<llvm::StringRef> llvmSyncScope = @@ -1926,8 +1921,8 @@ mlir::LogicalResult CIRToLLVMStoreOpLowering::matchAndRewrite( mlir::LLVM::StoreOp storeOp = mlir::LLVM::StoreOp::create( rewriter, op->getLoc(), value, adaptor.getAddr(), alignment, op.getIsVolatile(), - /*isNonTemporal=*/false, /*isInvariantGroup=*/false, memorder, - llvmSyncScope.value_or(std::string())); + /*isNonTemporal=*/op.getIsNontemporal(), /*isInvariantGroup=*/false, + memorder, llvmSyncScope.value_or(std::string())); rewriter.replaceOp(op, storeOp); assert(!cir::MissingFeatures::opLoadStoreTbaa()); return mlir::LogicalResult::success(); diff --git a/clang/test/CIR/CodeGenBuiltins/builtin-nontemporal.cpp b/clang/test/CIR/CodeGenBuiltins/builtin-nontemporal.cpp new file mode 100644 index 0000000000000..ec834049ecc44 --- /dev/null +++ b/clang/test/CIR/CodeGenBuiltins/builtin-nontemporal.cpp @@ -0,0 +1,77 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s -check-prefix=CIR +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o - | FileCheck %s -check-prefix=LLVM +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefix=LLVM + +signed char sc; +unsigned char uc; +signed short ss; +unsigned short us; +signed int si; +unsigned int ui; +signed long long sll; +unsigned long long ull; +float f1, f2; +double d1, d2; + +void test_nontemporal_store() { +// CIR-LABEL: cir.func {{.*}}@_Z22test_nontemporal_storev +// CIR: cir.store nontemporal align(1) {{%.*}}, {{%.*}} : !u8i, !cir.ptr<!u8i> +// CIR: cir.store nontemporal align(1) {{%.*}}, {{%.*}} : !u8i, !cir.ptr<!u8i> +// CIR: cir.store nontemporal align(1) {{%.*}}, {{%.*}} : !s8i, !cir.ptr<!s8i> +// CIR: cir.store nontemporal align(2) {{%.*}}, {{%.*}} : !u16i, !cir.ptr<!u16i> +// CIR: cir.store nontemporal align(4) {{%.*}}, {{%.*}} : !s32i, !cir.ptr<!s32i> +// CIR: cir.store nontemporal align(8) {{%.*}}, {{%.*}} : !u64i, !cir.ptr<!u64i> +// CIR: cir.store nontemporal align(4) {{%.*}}, {{%.*}} : !cir.float, !cir.ptr<!cir.float> +// CIR: cir.store nontemporal align(8) {{%.*}}, {{%.*}} : !cir.double, !cir.ptr<!cir.double> +// CIR: cir.return + +// LLVM-LABEL: define dso_local void @_Z22test_nontemporal_storev +// LLVM: store i8 1, ptr @uc, align 1, !nontemporal +// LLVM: store i8 1, ptr @uc, align 1, !nontemporal +// LLVM: store i8 1, ptr @sc, align 1, !nontemporal +// LLVM: store i16 1, ptr @us, align 2, !nontemporal +// LLVM: store i32 1, ptr @si, align 4, !nontemporal +// LLVM: store i64 1, ptr @ull, align 8, !nontemporal +// LLVM: store float 1.0{{.*}}, ptr @f1, align 4, !nontemporal +// LLVM: store double 1.0{{.*}}, ptr @d1, align 8, !nontemporal +// LLVM: ret void + + __builtin_nontemporal_store(true, &uc); + __builtin_nontemporal_store(1, &uc); + __builtin_nontemporal_store(1, &sc); + __builtin_nontemporal_store(1, &us); + __builtin_nontemporal_store(1, &si); + __builtin_nontemporal_store(1, &ull); + __builtin_nontemporal_store(1.0, &f1); + __builtin_nontemporal_store(1.0, &d1); +} + +void test_nontemporal_load() { +// CIR-LABEL: cir.func {{.*}}@_Z21test_nontemporal_loadv +// CIR: cir.load nontemporal align(1) {{%.*}} : !cir.ptr<!s8i>, !s8i +// CIR: cir.load nontemporal align(1) {{%.*}} : !cir.ptr<!u8i>, !u8i +// CIR: cir.load nontemporal align(2) {{%.*}} : !cir.ptr<!s16i>, !s16i +// CIR: cir.load nontemporal align(4) {{%.*}} : !cir.ptr<!u32i>, !u32i +// CIR: cir.load nontemporal align(8) {{%.*}} : !cir.ptr<!s64i>, !s64i +// CIR: cir.load nontemporal align(4) {{%.*}} : !cir.ptr<!cir.float>, !cir.float +// CIR: cir.load nontemporal align(8) {{%.*}} : !cir.ptr<!cir.double>, !cir.double +// CIR: cir.return + +// LLVM-LABEL: define dso_local void @_Z21test_nontemporal_loadv +// LLVM: load i8, ptr @sc, align 1, !nontemporal +// LLVM: load i8, ptr @uc, align 1, !nontemporal +// LLVM: load i16, ptr @ss, align 2, !nontemporal +// LLVM: load i32, ptr @ui, align 4, !nontemporal +// LLVM: load i64, ptr @sll, align 8, !nontemporal +// LLVM: load float, ptr @f2, align 4, !nontemporal +// LLVM: load double, ptr @d2, align 8, !nontemporal +// LLVM: ret void + + uc = __builtin_nontemporal_load(&sc); + sc = __builtin_nontemporal_load(&uc); + us = __builtin_nontemporal_load(&ss); + si = __builtin_nontemporal_load(&ui); + ull = __builtin_nontemporal_load(&sll); + f1 = __builtin_nontemporal_load(&f2); + d1 = __builtin_nontemporal_load(&d2); +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
