Author: Jianjian Guan
Date: 2026-06-23T10:27:45+08:00
New Revision: d853c056e5db0f9e1779e351b02c98a141a95c8d

URL: 
https://github.com/llvm/llvm-project/commit/d853c056e5db0f9e1779e351b02c98a141a95c8d
DIFF: 
https://github.com/llvm/llvm-project/commit/d853c056e5db0f9e1779e351b02c98a141a95c8d.diff

LOG: [CIR] Add support for __builtin_nontemporal_store and 
__builtin_nontemporal_load (#197872)

Add nontemporal attribute to cir.load and cir.store ops.

Added: 
    clang/test/CIR/CodeGenBuiltins/builtin-nontemporal.cpp

Modified: 
    clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
    clang/include/clang/CIR/Dialect/IR/CIROps.td
    clang/include/clang/CIR/MissingFeatures.h
    clang/lib/CIR/CodeGen/CIRGenAtomic.cpp
    clang/lib/CIR/CodeGen/CIRGenBuilder.h
    clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
    clang/lib/CIR/CodeGen/CIRGenBuiltinNVPTX.cpp
    clang/lib/CIR/CodeGen/CIRGenExpr.cpp
    clang/lib/CIR/CodeGen/CIRGenFunction.h
    clang/lib/CIR/CodeGen/CIRGenValue.h
    clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp
    clang/lib/CIR/Dialect/Transforms/EHABILowering.cpp
    clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp
    clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerItaniumCXXABI.cpp
    clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h 
b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
index 121eed5f8ba9a..0db205f8d5b79 100644
--- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
+++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
@@ -229,11 +229,12 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
   }
 
   cir::LoadOp createLoad(mlir::Location loc, mlir::Value ptr,
-                         bool isVolatile = false, uint64_t alignment = 0) {
+                         bool isVolatile = false, uint64_t alignment = 0,
+                         bool isNontemporal = false) {
     mlir::IntegerAttr alignmentAttr = getAlignmentAttr(alignment);
     return cir::LoadOp::create(*this, loc, ptr, /*isDeref=*/false, isVolatile,
-                               alignmentAttr, cir::SyncScopeKindAttr{},
-                               cir::MemOrderAttr{});
+                               isNontemporal, alignmentAttr,
+                               cir::SyncScopeKindAttr{}, cir::MemOrderAttr{});
   }
 
   mlir::Value createAlignedLoad(mlir::Location loc, mlir::Value ptr,
@@ -380,15 +381,15 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
   }
 
   cir::StoreOp createStore(mlir::Location loc, mlir::Value val, mlir::Value 
dst,
-                           bool isVolatile = false,
+                           bool isVolatile = false, bool isNontemporal = false,
                            mlir::IntegerAttr align = {},
                            cir::SyncScopeKindAttr scope = {},
                            cir::MemOrderAttr order = {}) {
     if (mlir::cast<cir::PointerType>(dst.getType()).getPointee() !=
         val.getType())
       dst = createPtrBitcast(dst, val.getType());
-    return cir::StoreOp::create(*this, loc, val, dst, isVolatile, align, scope,
-                                order);
+    return cir::StoreOp::create(*this, loc, val, dst, isVolatile, 
isNontemporal,
+                                align, scope, order);
   }
 
   /// Emit a load from an boolean flag variable.
@@ -426,7 +427,8 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
     mlir::IntegerAttr alignmentAttr = getAlignmentAttr(alignment);
     auto addr = createAlloca(loc, getPointerTo(type), {}, alignmentAttr);
     return cir::LoadOp::create(*this, loc, addr, /*isDeref=*/false,
-                               /*isVolatile=*/false, alignmentAttr,
+                               /*isVolatile=*/false, /*nontemporal=*/false,
+                               alignmentAttr,
                                /*sync_scope=*/{}, /*mem_order=*/{});
   }
 

diff  --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td 
b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index 9dae3534991e5..f4f22cd297ea6 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -694,6 +694,7 @@ def CIR_LoadOp : CIR_Op<"load", [
                            [MemRead]>:$addr,
                        UnitAttr:$isDeref,
                        UnitAttr:$is_volatile,
+                       UnitAttr:$is_nontemporal,
                        OptionalAttr<I64Attr>:$alignment,
                        OptionalAttr<CIR_SyncScopeKind>:$sync_scope,
                        OptionalAttr<CIR_MemOrder>:$mem_order);
@@ -702,6 +703,7 @@ def CIR_LoadOp : CIR_Op<"load", [
   let assemblyFormat = [{
     (`deref` $isDeref^)?
     (`volatile` $is_volatile^)?
+    (`nontemporal` $is_nontemporal^)?
     (`align` `(` $alignment^ `)`)?
     (`syncscope` `(` $sync_scope^ `)`)?
     (`atomic` `(` $mem_order^ `)`)?
@@ -793,12 +795,14 @@ def CIR_StoreOp : CIR_Op<"store", [
                        Arg<CIR_PointerType, "the address to store the value",
                            [MemWrite]>:$addr,
                        UnitAttr:$is_volatile,
+                       UnitAttr:$is_nontemporal,
                        OptionalAttr<I64Attr>:$alignment,
                        OptionalAttr<CIR_SyncScopeKind>:$sync_scope,
                        OptionalAttr<CIR_MemOrder>:$mem_order);
 
   let assemblyFormat = [{
     (`volatile` $is_volatile^)?
+    (`nontemporal` $is_nontemporal^)?
     (`align` `(` $alignment^ `)`)?
     (`syncscope` `(` $sync_scope^ `)`)?
     (`atomic` `(` $mem_order^ `)`)?
@@ -809,6 +813,7 @@ def CIR_StoreOp : CIR_Op<"store", [
     // Non-volatile, non-atomic store with default alignment.
     OpBuilder<(ins "mlir::Value":$value, "mlir::Value":$addr), [{
       build($_builder, $_state, value, addr, /*is_volatile=*/mlir::UnitAttr(),
+            /*is_nontemporal=*/mlir::UnitAttr(),
             /*alignment=*/mlir::IntegerAttr(),
             /*sync_scope=*/cir::SyncScopeKindAttr(),
             /*mem_order=*/cir::MemOrderAttr());

diff  --git a/clang/include/clang/CIR/MissingFeatures.h 
b/clang/include/clang/CIR/MissingFeatures.h
index c09db49a955ac..9a1546fe14e65 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -47,7 +47,6 @@ struct MissingFeatures {
 
   // Load/store attributes
   static bool opLoadEmitScalarRangeCheck() { return false; }
-  static bool opLoadStoreNontemporal() { return false; }
   static bool opLoadStoreTbaa() { return false; }
   static bool opLoadStoreAtomic() { return false; }
   static bool opLoadStoreObjC() { return false; }

diff  --git a/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp 
b/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp
index 6ba6bc1c0405a..4ac6f4506b2cd 100644
--- a/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp
@@ -705,6 +705,7 @@ static void emitAtomicOp(CIRGenFunction &cgf, AtomicExpr 
*expr, Address dest,
     assert(!cir::MissingFeatures::atomicSyncScopeID());
 
     builder.createStore(loc, loadVal1, ptr, expr->isVolatile(),
+                        /*isNontemporal=*/false,
                         /*align=*/mlir::IntegerAttr{}, scopeAttr, orderAttr);
     return;
   }

diff  --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h 
b/clang/lib/CIR/CodeGen/CIRGenBuilder.h
index 3204ba1a319f0..b8db0d9157aa6 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuilder.h
+++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.h
@@ -577,10 +577,11 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy {
   }
 
   cir::LoadOp createLoad(mlir::Location loc, Address addr,
-                         bool isVolatile = false) {
+                         bool isVolatile = false, bool isNontemporal = false) {
     mlir::IntegerAttr align = getAlignmentAttr(addr.getAlignment());
     return cir::LoadOp::create(*this, loc, addr.getPointer(), 
/*isDeref=*/false,
-                               isVolatile, /*alignment=*/align,
+                               isVolatile, isNontemporal,
+                               /*alignment=*/align,
                                /*sync_scope=*/cir::SyncScopeKindAttr{},
                                /*mem_order=*/cir::MemOrderAttr{});
   }
@@ -592,7 +593,8 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy {
     uint64_t alignment = align ? align->value() : 0;
     mlir::IntegerAttr alignAttr = getAlignmentAttr(alignment);
     return cir::LoadOp::create(*this, loc, ptr, /*isDeref=*/false,
-                               /*isVolatile=*/false, alignAttr,
+                               /*isVolatile=*/false, /*isNontemporal=*/false,
+                               alignAttr,
                                /*sync_scope=*/cir::SyncScopeKindAttr{},
                                /*mem_order=*/cir::MemOrderAttr{});
   }
@@ -604,14 +606,14 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy {
   }
 
   cir::StoreOp createStore(mlir::Location loc, mlir::Value val, Address dst,
-                           bool isVolatile = false,
+                           bool isVolatile = false, bool isNontemporal = false,
                            mlir::IntegerAttr align = {},
                            cir::SyncScopeKindAttr scope = {},
                            cir::MemOrderAttr order = {}) {
     if (!align)
       align = getAlignmentAttr(dst.getAlignment());
     return CIRBaseBuilderTy::createStore(loc, val, dst.getPointer(), 
isVolatile,
-                                         align, scope, order);
+                                         isNontemporal, align, scope, order);
   }
 
   /// Create a cir.complex.real_ptr operation that derives a pointer to the 
real

diff  --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index a483eb635f0e2..4fb7ffc13a2ce 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -2126,8 +2126,24 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl 
&gd, unsigned builtinID,
                                     cir::SyncScopeKind::System));
     return RValue::get(nullptr);
   }
-  case Builtin::BI__builtin_nontemporal_load:
-  case Builtin::BI__builtin_nontemporal_store:
+  case Builtin::BI__builtin_nontemporal_load: {
+    Address addr = emitPointerWithAlignment(e->getArg(0));
+    LValue lv = makeAddrLValue(addr, e->getType(),
+                               LValueBaseInfo(AlignmentSource::Type));
+    lv.setNontemporal(true);
+    mlir::Value val = emitLoadOfScalar(lv, e->getExprLoc());
+    return RValue::get(val);
+  }
+  case Builtin::BI__builtin_nontemporal_store: {
+    mlir::Value val = emitScalarExpr(e->getArg(0));
+    Address addr = emitPointerWithAlignment(e->getArg(1));
+    val = emitToMemory(val, e->getArg(0)->getType());
+    LValue lv = makeAddrLValue(addr, e->getArg(0)->getType(),
+                               LValueBaseInfo(AlignmentSource::Type));
+    lv.setNontemporal(true);
+    emitStoreOfScalar(val, lv, /*isInit=*/false);
+    return RValue::get(nullptr);
+  }
   case Builtin::BI__c11_atomic_is_lock_free:
   case Builtin::BI__atomic_is_lock_free:
   case Builtin::BI__atomic_test_and_set:

diff  --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinNVPTX.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinNVPTX.cpp
index 3c654761b9903..4db2d7259c6ba 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinNVPTX.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinNVPTX.cpp
@@ -1067,6 +1067,7 @@ static mlir::Value 
packArgsIntoNVPTXFormatBuffer(CIRGenFunction &cgf,
         dataLayout.getABITypeAlign(argTypes[i]).value());
     cir::StoreOp::create(builder, loc, arg.getKnownRValue().getValue(), member,
                          /*is_volatile=*/false,
+                         /*isNontemporal=*/false,
                          builder.getAlignmentAttr(abiAlign),
                          /*sync_scope=*/cir::SyncScopeKindAttr{},
                          /*mem_order=*/cir::MemOrderAttr{});

diff  --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp 
b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
index beaedd853f57b..fa14b45cbb015 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -499,12 +499,7 @@ void CIRGenFunction::emitStoreOfScalar(mlir::Value value, 
Address addr,
   }
 
   assert(currSrcLoc && "must pass in source location");
-  builder.createStore(*currSrcLoc, value, addr, isVolatile);
-
-  if (isNontemporal) {
-    cgm.errorNYI(addr.getPointer().getLoc(), "emitStoreOfScalar nontemporal");
-    return;
-  }
+  builder.createStore(*currSrcLoc, value, addr, isVolatile, isNontemporal);
 
   assert(!cir::MissingFeatures::opTBAA());
 }
@@ -736,12 +731,13 @@ void CIRGenFunction::emitStoreOfScalar(mlir::Value value, 
LValue lvalue,
 
   emitStoreOfScalar(value, lvalue.getAddress(), lvalue.isVolatile(),
                     lvalue.getType(), lvalue.getBaseInfo(), isInit,
-                    /*isNontemporal=*/false);
+                    lvalue.isNontemporal());
 }
 
 mlir::Value CIRGenFunction::emitLoadOfScalar(Address addr, bool isVolatile,
                                              QualType ty, SourceLocation loc,
-                                             LValueBaseInfo baseInfo) {
+                                             LValueBaseInfo baseInfo,
+                                             bool isNontemporal) {
   // Traditional LLVM codegen handles thread local separately, CIR handles
   // as part of getAddrOfGlobalVar (GetGlobalOp).
   mlir::Type eltTy = addr.getElementType();
@@ -771,7 +767,8 @@ mlir::Value CIRGenFunction::emitLoadOfScalar(Address addr, 
bool isVolatile,
 
   assert(!cir::MissingFeatures::opLoadEmitScalarRangeCheck());
 
-  mlir::Value loadOp = builder.createLoad(getLoc(loc), addr, isVolatile);
+  mlir::Value loadOp =
+      builder.createLoad(getLoc(loc), addr, isVolatile, isNontemporal);
   if (!ty->isBooleanType() && ty->hasBooleanRepresentation())
     cgm.errorNYI("emitLoadOfScalar: boolean type with boolean representation");
 
@@ -780,10 +777,10 @@ mlir::Value CIRGenFunction::emitLoadOfScalar(Address 
addr, bool isVolatile,
 
 mlir::Value CIRGenFunction::emitLoadOfScalar(LValue lvalue,
                                              SourceLocation loc) {
-  assert(!cir::MissingFeatures::opLoadStoreNontemporal());
   assert(!cir::MissingFeatures::opLoadStoreTbaa());
   return emitLoadOfScalar(lvalue.getAddress(), lvalue.isVolatile(),
-                          lvalue.getType(), loc, lvalue.getBaseInfo());
+                          lvalue.getType(), loc, lvalue.getBaseInfo(),
+                          lvalue.isNontemporal());
 }
 
 /// Given an expression that represents a value lvalue, this

diff  --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h 
b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index 317151c8d61c6..d0b936f45378d 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -2095,7 +2095,8 @@ class CIRGenFunction : public CIRGenTypeCache {
   /// l-value.
   mlir::Value emitLoadOfScalar(LValue lvalue, SourceLocation loc);
   mlir::Value emitLoadOfScalar(Address addr, bool isVolatile, QualType ty,
-                               SourceLocation loc, LValueBaseInfo baseInfo);
+                               SourceLocation loc, LValueBaseInfo baseInfo,
+                               bool isNontemporal = false);
 
   /// Emit code to compute a designator that specifies the location
   /// of the expression.
@@ -2305,6 +2306,7 @@ class CIRGenFunction : public CIRGenTypeCache {
       builder.restoreInsertionPoint(outermostConditional->getInsertPoint());
       builder.createStore(
           value.getLoc(), value, addr, /*isVolatile=*/false,
+          /*isNontemporal=*/false,
           mlir::IntegerAttr::get(
               mlir::IntegerType::get(value.getContext(), 64),
               (uint64_t)addr.getAlignment().getAsAlign().value()));

diff  --git a/clang/lib/CIR/CodeGen/CIRGenValue.h 
b/clang/lib/CIR/CodeGen/CIRGenValue.h
index e70dac5851189..b291b8c76f1ad 100644
--- a/clang/lib/CIR/CodeGen/CIRGenValue.h
+++ b/clang/lib/CIR/CodeGen/CIRGenValue.h
@@ -173,6 +173,9 @@ class LValue {
   mlir::Type elementType;
   LValueBaseInfo baseInfo;
   const CIRGenBitFieldInfo *bitFieldInfo{nullptr};
+  // This flag shows if a nontemporal load/stores should be used when accessing
+  // this lvalue.
+  bool nontemporal;
 
   void initialize(clang::QualType type, clang::Qualifiers quals,
                   clang::CharUnits alignment, LValueBaseInfo baseInfo) {
@@ -187,6 +190,7 @@ class LValue {
     assert(this->alignment == alignment.getQuantity() &&
            "Alignment exceeds allowed max!");
     this->baseInfo = baseInfo;
+    this->nontemporal = false;
   }
 
 public:
@@ -200,6 +204,9 @@ class LValue {
 
   bool isVolatileQualified() const { return quals.hasVolatile(); }
 
+  bool isNontemporal() const { return nontemporal; }
+  void setNontemporal(bool v) { nontemporal = v; }
+
   unsigned getVRQualifiers() const {
     return quals.getCVRQualifiers() & ~clang::Qualifiers::Const;
   }

diff  --git a/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp 
b/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp
index 8a82bcb19454e..73b35c7f00c2d 100644
--- a/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp
+++ b/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp
@@ -142,6 +142,7 @@ DeletionKind cir::CopyOp::removeBlockingUses(
   if (loadsFrom(slot))
     cir::StoreOp::create(builder, getLoc(), reachingDefinition, getDst(),
                          /*isVolatile=*/false,
+                         /*isNontemporal=*/false,
                          /*alignment=*/mlir::IntegerAttr{},
                          /*sync_scope=*/cir::SyncScopeKindAttr(),
                          /*mem-order=*/cir::MemOrderAttr());

diff  --git a/clang/lib/CIR/Dialect/Transforms/EHABILowering.cpp 
b/clang/lib/CIR/Dialect/Transforms/EHABILowering.cpp
index e6c76fed6f78a..0e39fa15d377b 100644
--- a/clang/lib/CIR/Dialect/Transforms/EHABILowering.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/EHABILowering.cpp
@@ -678,7 +678,7 @@ 
ItaniumEHLowering::lowerConstructCatchParam(cir::ConstructCatchParamOp op,
     mlir::Value casted =
         cir::CastOp::create(builder, loc, paramAddrType.getPointee(),
                             cir::CastKind::bitcast, exnObj);
-    cir::StoreOp::create(builder, loc, casted, paramAddr, {}, {}, {}, {});
+    cir::StoreOp::create(builder, loc, casted, paramAddr, {}, {}, {}, {}, {});
     op.erase();
     return success();
   }
@@ -853,7 +853,7 @@ void 
ItaniumEHLowering::lowerInitCatchParam(cir::InitCatchParamOp op) {
 
     mlir::Value casted = cir::CastOp::create(builder, loc, elementType,
                                              cir::CastKind::bitcast, exnPtr);
-    cir::StoreOp::create(builder, loc, casted, paramAddr, {}, {}, {}, {});
+    cir::StoreOp::create(builder, loc, casted, paramAddr, {}, {}, {}, {}, {});
     break;
   }
   case InitCatchKind::TrivialCopy: {
@@ -874,13 +874,13 @@ void 
ItaniumEHLowering::lowerInitCatchParam(cir::InitCatchParamOp op) {
                                              cir::CastKind::bitcast, exnPtr);
     auto loadOp = cir::LoadOp::create(builder, loc, elementType, srcPtr);
     cir::StoreOp::create(builder, loc, loadOp.getResult(), paramAddr, {}, {},
-                         {}, {});
+                         {}, {}, {});
     break;
   }
   case InitCatchKind::Pointer: {
     mlir::Value casted = cir::CastOp::create(builder, loc, elementType,
                                              cir::CastKind::bitcast, exnPtr);
-    cir::StoreOp::create(builder, loc, casted, paramAddr, {}, {}, {}, {});
+    cir::StoreOp::create(builder, loc, casted, paramAddr, {}, {}, {}, {}, {});
     break;
   }
   case InitCatchKind::Objc:

diff  --git a/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp 
b/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp
index ddeeb98fee820..c487e645e30cd 100644
--- a/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp
@@ -971,16 +971,18 @@ class CIRCleanupScopeOpFlattening
           rewriter.setInsertionPoint(exitOp);
           cir::StoreOp::create(rewriter, loc, operand, alloca,
                                /*isVolatile=*/false,
+                               /*isNontemporal=*/false,
                                /*alignment=*/mlir::IntegerAttr(),
                                cir::SyncScopeKindAttr(), cir::MemOrderAttr());
         }
 
         // Reload the value from the temporary alloca in the destination block.
         rewriter.setInsertionPointToEnd(destBlock);
-        auto loaded = cir::LoadOp::create(
-            rewriter, loc, alloca, /*isDeref=*/false,
-            /*isVolatile=*/false, /*alignment=*/mlir::IntegerAttr(),
-            cir::SyncScopeKindAttr(), cir::MemOrderAttr());
+        auto loaded =
+            cir::LoadOp::create(rewriter, loc, alloca, /*isDeref=*/false,
+                                /*isVolatile=*/false, /*isNontemporal=*/false,
+                                /*alignment=*/mlir::IntegerAttr(),
+                                cir::SyncScopeKindAttr(), cir::MemOrderAttr());
         returnValues.push_back(loaded);
       }
     }
@@ -1290,10 +1292,11 @@ class CIRCleanupScopeOpFlattening
         rewriter.setInsertionPointToEnd(exitBlock);
 
         // Load the destination slot value.
-        auto slotValue = cir::LoadOp::create(
-            rewriter, loc, destSlot, /*isDeref=*/false,
-            /*isVolatile=*/false, /*alignment=*/mlir::IntegerAttr(),
-            cir::SyncScopeKindAttr(), cir::MemOrderAttr());
+        auto slotValue =
+            cir::LoadOp::create(rewriter, loc, destSlot, /*isDeref=*/false,
+                                /*isVolatile=*/false, /*isNontemporal=*/false,
+                                /*alignment=*/mlir::IntegerAttr(),
+                                cir::SyncScopeKindAttr(), cir::MemOrderAttr());
 
         // Create destination blocks for each exit and collect switch case 
info.
         llvm::SmallVector<mlir::APInt, 8> caseValues;
@@ -1322,6 +1325,7 @@ class CIRCleanupScopeOpFlattening
               rewriter, loc, cir::IntAttr::get(s32Type, exit.destinationId));
           cir::StoreOp::create(rewriter, loc, destIdConst, destSlot,
                                /*isVolatile=*/false,
+                               /*isNontemporal=*/false,
                                /*alignment=*/mlir::IntegerAttr(),
                                cir::SyncScopeKindAttr(), cir::MemOrderAttr());
           rewriter.replaceOpWithNewOp<cir::BrOp>(exit.exitOp, cleanupEntry);

diff  --git 
a/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerItaniumCXXABI.cpp 
b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerItaniumCXXABI.cpp
index cccbe70876c3f..6e12a13787a2a 100644
--- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerItaniumCXXABI.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerItaniumCXXABI.cpp
@@ -382,6 +382,7 @@ void LowerItaniumCXXABI::lowerGetMethod(
     mlir::Value vtablePtr =
         cir::LoadOp::create(b, loc, vtablePtrPtr, /*isDeref=*/false,
                             /*isVolatile=*/false,
+                            /*isNontemporal=*/false,
                             /*alignment=*/mlir::IntegerAttr(),
                             /*sync_scope=*/cir::SyncScopeKindAttr{},
                             /*mem_order=*/cir::MemOrderAttr());
@@ -409,6 +410,7 @@ void LowerItaniumCXXABI::lowerGetMethod(
                                              cir::CastKind::bitcast, vfpAddr);
     auto fnPtr = cir::LoadOp::create(b, loc, vfpPtr,
                                      /*isDeref=*/false, /*isVolatile=*/false,
+                                     /*isNontemporal=*/false,
                                      /*alignment=*/mlir::IntegerAttr(),
                                      /*sync_scope=*/cir::SyncScopeKindAttr{},
                                      /*mem_order=*/cir::MemOrderAttr());
@@ -782,6 +784,7 @@ static mlir::Value buildDynamicCastToVoidAfterNullCheck(
       builder, loc, vptrPtr,
       /*isDeref=*/false,
       /*is_volatile=*/false,
+      /*isNontemporal=*/false,
       /*alignment=*/builder.getI64IntegerAttr(vtableElemAlign),
       /*sync_scope=*/cir::SyncScopeKindAttr(),
       /*mem_order=*/cir::MemOrderAttr());
@@ -795,6 +798,7 @@ static mlir::Value buildDynamicCastToVoidAfterNullCheck(
       builder, loc, offsetToTopSlotPtr,
       /*isDeref=*/false,
       /*is_volatile=*/false,
+      /*isNontemporal=*/false,
       /*alignment=*/builder.getI64IntegerAttr(vtableElemAlign),
       /*sync_scope=*/cir::SyncScopeKindAttr(),
       /*mem_order=*/cir::MemOrderAttr());
@@ -904,6 +908,7 @@ mlir::Value LowerItaniumCXXABI::readArrayCookieImpl(
       builder, loc, countPtrTy, cir::CastKind::bitcast, countBytePtr);
   return cir::LoadOp::create(
       builder, loc, countPtr, /*isDeref=*/false, /*isVolatile=*/false,
+      /*isNontemporal=*/false,
       builder.getI64IntegerAttr(countAlignment.getQuantity()),
       cir::SyncScopeKindAttr(), cir::MemOrderAttr());
 }

diff  --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp 
b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 25fa6d1625301..27eba4ee326a5 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -1860,15 +1860,12 @@ mlir::LogicalResult 
CIRToLLVMLoadOpLowering::matchAndRewrite(
 
   assert(!cir::MissingFeatures::lowerModeOptLevel());
 
-  // TODO: nontemporal.
-  assert(!cir::MissingFeatures::opLoadStoreNontemporal());
-
   std::optional<llvm::StringRef> llvmSyncScope =
       getLLVMSyncScope(op.getSyncScope());
 
   mlir::LLVM::LoadOp newLoad = mlir::LLVM::LoadOp::create(
       rewriter, op->getLoc(), llvmTy, adaptor.getAddr(), alignment,
-      op.getIsVolatile(), /*isNonTemporal=*/false,
+      op.getIsVolatile(), /*isNonTemporal=*/op.getIsNontemporal(),
       /*isInvariant=*/false, /*isInvariantGroup=*/false, ordering,
       llvmSyncScope.value_or(std::string()));
 
@@ -1916,8 +1913,6 @@ mlir::LogicalResult 
CIRToLLVMStoreOpLowering::matchAndRewrite(
   // Convert adapted value to its memory type if needed.
   mlir::Value value = emitToMemory(rewriter, dataLayout,
                                    op.getValue().getType(), 
adaptor.getValue());
-  // TODO: nontemporal.
-  assert(!cir::MissingFeatures::opLoadStoreNontemporal());
   assert(!cir::MissingFeatures::opLoadStoreTbaa());
 
   std::optional<llvm::StringRef> llvmSyncScope =
@@ -1926,8 +1921,8 @@ mlir::LogicalResult 
CIRToLLVMStoreOpLowering::matchAndRewrite(
   mlir::LLVM::StoreOp storeOp = mlir::LLVM::StoreOp::create(
       rewriter, op->getLoc(), value, adaptor.getAddr(), alignment,
       op.getIsVolatile(),
-      /*isNonTemporal=*/false, /*isInvariantGroup=*/false, memorder,
-      llvmSyncScope.value_or(std::string()));
+      /*isNonTemporal=*/op.getIsNontemporal(), /*isInvariantGroup=*/false,
+      memorder, llvmSyncScope.value_or(std::string()));
   rewriter.replaceOp(op, storeOp);
   assert(!cir::MissingFeatures::opLoadStoreTbaa());
   return mlir::LogicalResult::success();

diff  --git a/clang/test/CIR/CodeGenBuiltins/builtin-nontemporal.cpp 
b/clang/test/CIR/CodeGenBuiltins/builtin-nontemporal.cpp
new file mode 100644
index 0000000000000..ec834049ecc44
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/builtin-nontemporal.cpp
@@ -0,0 +1,77 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o 
- | FileCheck %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o 
- | FileCheck %s -check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o - | 
FileCheck %s -check-prefix=LLVM
+
+signed char sc;
+unsigned char uc;
+signed short ss;
+unsigned short us;
+signed int si;
+unsigned int ui;
+signed long long sll;
+unsigned long long ull;
+float f1, f2;
+double d1, d2;
+
+void test_nontemporal_store() {
+// CIR-LABEL: cir.func {{.*}}@_Z22test_nontemporal_storev
+// CIR: cir.store nontemporal align(1) {{%.*}}, {{%.*}} : !u8i, !cir.ptr<!u8i>
+// CIR: cir.store nontemporal align(1) {{%.*}}, {{%.*}} : !u8i, !cir.ptr<!u8i>
+// CIR: cir.store nontemporal align(1) {{%.*}}, {{%.*}} : !s8i, !cir.ptr<!s8i>
+// CIR: cir.store nontemporal align(2) {{%.*}}, {{%.*}} : !u16i, 
!cir.ptr<!u16i>
+// CIR: cir.store nontemporal align(4) {{%.*}}, {{%.*}} : !s32i, 
!cir.ptr<!s32i>
+// CIR: cir.store nontemporal align(8) {{%.*}}, {{%.*}} : !u64i, 
!cir.ptr<!u64i>
+// CIR: cir.store nontemporal align(4) {{%.*}}, {{%.*}} : !cir.float, 
!cir.ptr<!cir.float>
+// CIR: cir.store nontemporal align(8) {{%.*}}, {{%.*}} : !cir.double, 
!cir.ptr<!cir.double>
+// CIR: cir.return
+
+// LLVM-LABEL: define dso_local void @_Z22test_nontemporal_storev
+// LLVM: store i8 1, ptr @uc, align 1, !nontemporal
+// LLVM: store i8 1, ptr @uc, align 1, !nontemporal
+// LLVM: store i8 1, ptr @sc, align 1, !nontemporal
+// LLVM: store i16 1, ptr @us, align 2, !nontemporal
+// LLVM: store i32 1, ptr @si, align 4, !nontemporal
+// LLVM: store i64 1, ptr @ull, align 8, !nontemporal
+// LLVM: store float 1.0{{.*}}, ptr @f1, align 4, !nontemporal
+// LLVM: store double 1.0{{.*}}, ptr @d1, align 8, !nontemporal
+// LLVM: ret void
+
+  __builtin_nontemporal_store(true, &uc);
+  __builtin_nontemporal_store(1, &uc);
+  __builtin_nontemporal_store(1, &sc);
+  __builtin_nontemporal_store(1, &us);
+  __builtin_nontemporal_store(1, &si);
+  __builtin_nontemporal_store(1, &ull);
+  __builtin_nontemporal_store(1.0, &f1);
+  __builtin_nontemporal_store(1.0, &d1);
+}
+
+void test_nontemporal_load() {
+// CIR-LABEL: cir.func {{.*}}@_Z21test_nontemporal_loadv
+// CIR: cir.load nontemporal align(1) {{%.*}} : !cir.ptr<!s8i>, !s8i
+// CIR: cir.load nontemporal align(1) {{%.*}} : !cir.ptr<!u8i>, !u8i
+// CIR: cir.load nontemporal align(2) {{%.*}} : !cir.ptr<!s16i>, !s16i
+// CIR: cir.load nontemporal align(4) {{%.*}} : !cir.ptr<!u32i>, !u32i
+// CIR: cir.load nontemporal align(8) {{%.*}} : !cir.ptr<!s64i>, !s64i
+// CIR: cir.load nontemporal align(4) {{%.*}} : !cir.ptr<!cir.float>, 
!cir.float
+// CIR: cir.load nontemporal align(8) {{%.*}} : !cir.ptr<!cir.double>, 
!cir.double
+// CIR: cir.return
+
+// LLVM-LABEL: define dso_local void @_Z21test_nontemporal_loadv
+// LLVM: load i8, ptr @sc, align 1, !nontemporal
+// LLVM: load i8, ptr @uc, align 1, !nontemporal
+// LLVM: load i16, ptr @ss, align 2, !nontemporal
+// LLVM: load i32, ptr @ui, align 4, !nontemporal
+// LLVM: load i64, ptr @sll, align 8, !nontemporal
+// LLVM: load float, ptr @f2, align 4, !nontemporal
+// LLVM: load double, ptr @d2, align 8, !nontemporal
+// LLVM: ret void
+
+  uc = __builtin_nontemporal_load(&sc);
+  sc = __builtin_nontemporal_load(&uc);
+  us = __builtin_nontemporal_load(&ss);
+  si = __builtin_nontemporal_load(&ui);
+  ull = __builtin_nontemporal_load(&sll);
+  f1 = __builtin_nontemporal_load(&f2);
+  d1 = __builtin_nontemporal_load(&d2);
+}


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to