https://github.com/jthackray updated 
https://github.com/llvm/llvm-project/pull/181386

>From b026046420f4b9ced8c1f250a8b49638b910d2a4 Mon Sep 17 00:00:00 2001
From: Jonathan Thackray <[email protected]>
Date: Fri, 13 Feb 2026 14:42:05 +0000
Subject: [PATCH 1/6] [AArch64][clang][llvm] Add ACLE `stshh` atomic store
 builtin

Add `__arm_atomic_store_with_stshh` implementation as defined
in the ACLE. Validate that the arguments passed are correct, and
lower it to the stshh intrinsic plus an atomic store with the
allowed orderings.

Gate this on FEAT_PCDPHINT so that availability matches
hardware support for the `STSHH` instruction. Use an i64
immediate and side-effect modeling to satisfy tablegen and decoding.
---
 clang/include/clang/Basic/BuiltinsAArch64.def |   3 +
 .../clang/Basic/DiagnosticSemaKinds.td        |   9 ++
 .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp  |   7 +
 clang/lib/CodeGen/TargetBuiltins/ARM.cpp      |  48 ++++++
 clang/lib/Headers/arm_acle.h                  |   6 +
 clang/lib/Sema/SemaARM.cpp                    | 140 ++++++++++++++++++
 .../CodeGen/AArch64/pcdphint-atomic-store.c   |  31 ++++
 .../test/Sema/AArch64/pcdphint-atomic-store.c |  29 ++++
 llvm/include/llvm/IR/IntrinsicsAArch64.td     |   2 +
 .../lib/Target/AArch64/AArch64InstrFormats.td |  12 +-
 .../Disassembler/AArch64Disassembler.cpp      |  13 ++
 11 files changed, 298 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/CodeGen/AArch64/pcdphint-atomic-store.c
 create mode 100644 clang/test/Sema/AArch64/pcdphint-atomic-store.c

diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def 
b/clang/include/clang/Basic/BuiltinsAArch64.def
index 5d7e956b73b87..5d747f4d9c4b2 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.def
+++ b/clang/include/clang/Basic/BuiltinsAArch64.def
@@ -135,6 +135,9 @@ TARGET_BUILTIN(__builtin_arm_st64b, "vv*WUiC*", "n", "ls64")
 TARGET_BUILTIN(__builtin_arm_st64bv, "WUiv*WUiC*", "n", "ls64")
 TARGET_BUILTIN(__builtin_arm_st64bv0, "WUiv*WUiC*", "n", "ls64")
 
+// Atomic store with PCDPHINT
+TARGET_BUILTIN(__builtin_arm_atomic_store_with_stshh, "v.", "t", "pcdphint")
+
 // Armv9.3-A Guarded Control Stack
 TARGET_BUILTIN(__builtin_arm_gcspopm, "WUiWUi", "n", "gcs")
 TARGET_BUILTIN(__builtin_arm_gcsss, "v*v*", "n", "gcs")
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 68016ec4d58a3..a2a9da2baaf00 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9550,6 +9550,15 @@ def err_atomic_builtin_must_be_pointer_intfltptr : Error<
 def err_atomic_builtin_pointer_size : Error<
   "address argument to atomic builtin must be a pointer to 1,2,4,8 or 16 byte "
   "type (%0 invalid)">;
+def err_arm_atomic_store_with_stshh_bad_type : Error<
+  "address argument to '__arm_atomic_store_with_stshh' must be a pointer to an 
"
+  "8,16,32, or 64-bit integer type (%0 invalid)">;
+def err_arm_atomic_store_with_stshh_bad_value_type : Error<
+  "value argument to '__arm_atomic_store_with_stshh' must be an integer of the 
"
+  "same size as the pointed-to type (%0 invalid)">;
+def err_arm_atomic_store_with_stshh_bad_order : Error<
+  "memory order argument to '__arm_atomic_store_with_stshh' must be one of "
+  "__ATOMIC_RELAXED, __ATOMIC_RELEASE, or __ATOMIC_SEQ_CST">;
 def err_atomic_exclusive_builtin_pointer_size : Error<
   "address argument to load or store exclusive builtin must be a pointer to "
   // Because the range of legal sizes for load/store exclusive varies with the
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index 16fe3142f2bc6..8d913f87061ae 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -1089,6 +1089,13 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned 
builtinID, const CallExpr *expr,
     return mlir::Value{};
   }
 
+  if (builtinID == clang::AArch64::BI__builtin_arm_atomic_store_with_stshh) {
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented AArch64 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+    return mlir::Value{};
+  }
+
   if (builtinID == clang::AArch64::BI__builtin_arm_rndr ||
       builtinID == clang::AArch64::BI__builtin_arm_rndrrs) {
     cgm.errorNYI(expr->getSourceRange(),
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp 
b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index e6801f77232ad..e108cddecc9cb 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -5278,6 +5278,54 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned 
BuiltinID,
     return Builder.CreateCall(F, Args);
   }
 
+  if (BuiltinID == clang::AArch64::BI__builtin_arm_atomic_store_with_stshh) {
+    const Expr *Arg0 = E->getArg(0);
+    const Expr *Arg1 = E->getArg(1);
+    const Expr *Arg2 = E->getArg(2);
+    const Expr *Arg3 = E->getArg(3);
+
+    Value *StoreAddr = EmitScalarExpr(Arg0);
+    Value *StoreValue = EmitScalarExpr(Arg1);
+
+    llvm::APSInt OrderVal = Arg2->EvaluateKnownConstInt(getContext());
+    llvm::APSInt RetVal = Arg3->EvaluateKnownConstInt(getContext());
+
+    llvm::AtomicOrdering Ordering;
+    switch (OrderVal.getZExtValue()) {
+    case 0: // __ATOMIC_RELAXED
+      Ordering = llvm::AtomicOrdering::Monotonic;
+      break;
+    case 3: // __ATOMIC_RELEASE
+      Ordering = llvm::AtomicOrdering::Release;
+      break;
+    case 5: // __ATOMIC_SEQ_CST
+      Ordering = llvm::AtomicOrdering::SequentiallyConsistent;
+      break;
+    default:
+      llvm_unreachable(
+          "unexpected memory order for __arm_atomic_store_with_stshh");
+    }
+
+    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_stshh);
+    llvm::Value *Arg = llvm::ConstantInt::get(Int64Ty, RetVal.getZExtValue());
+    CallInst *HintCall = Builder.CreateCall(F, Arg);
+
+    QualType ValQT = Arg0->IgnoreParenImpCasts()
+                         ->getType()
+                         ->castAs<PointerType>()
+                         ->getPointeeType();
+    llvm::Type *ValTy = ConvertType(ValQT);
+
+    CharUnits ValAlign = getContext().getTypeAlignInChars(ValQT);
+    Address Addr = Address(StoreAddr, ValTy, ValAlign);
+    LValue LVal = MakeAddrLValue(Addr, ValQT);
+
+    EmitAtomicStore(RValue::get(StoreValue), LVal, Ordering,
+                    /* isVolatile= */ false,
+                    /* isInit= */ false);
+    return HintCall;
+  }
+
   if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
       BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
 
diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h
index 9a6b6a837fa5a..ec06072bcc4bf 100644
--- a/clang/lib/Headers/arm_acle.h
+++ b/clang/lib/Headers/arm_acle.h
@@ -840,6 +840,12 @@ __rndrrs(uint64_t *__p) {
 }
 #endif
 
+/* Atomic store with PCDPHINT */
+#if defined(__ARM_FEATURE_PCDPHINT)
+#define __arm_atomic_store_with_stshh(ptr, data, memory_order, ret)            
\
+  __builtin_arm_atomic_store_with_stshh((ptr), (data), (memory_order), (ret))
+#endif
+
 /* 11.2 Guarded Control Stack intrinsics */
 #if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
 static __inline__ void * __attribute__((__always_inline__, __nodebug__))
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index 33edc455366a7..64486fe04717c 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -1107,6 +1107,143 @@ bool SemaARM::CheckARMBuiltinFunctionCall(const 
TargetInfo &TI,
   }
 }
 
+static bool CheckAArch64AtomicStoreWithStshhCall(SemaARM &S,
+                                                 CallExpr *TheCall) {
+  Sema &SemaRef = S.SemaRef;
+  ASTContext &Context = S.getASTContext();
+  DeclRefExpr *DRE =
+      cast<DeclRefExpr>(TheCall->getCallee()->IgnoreParenCasts());
+  SourceLocation Loc = DRE->getBeginLoc();
+
+  // Ensure we have the proper number of arguments.
+  if (SemaRef.checkArgCount(TheCall, 4))
+    return true;
+
+  ExprResult PtrRes =
+      SemaRef.DefaultFunctionArrayLvalueConversion(TheCall->getArg(0));
+
+  // Bail if conversion failed.
+  if (PtrRes.isInvalid())
+    return true;
+
+  TheCall->setArg(0, PtrRes.get());
+  Expr *PointerArg = PtrRes.get();
+
+  // Check arg 0 is a pointer type, err out if not
+  const PointerType *PointerTy = PointerArg->getType()->getAs<PointerType>();
+  if (!PointerTy) {
+    SemaRef.Diag(Loc, diag::err_atomic_builtin_must_be_pointer)
+        << PointerArg->getType() << 0 << PointerArg->getSourceRange();
+    return true;
+  }
+
+  // Reject const-qualified pointee types, with an error
+  QualType ValType = PointerTy->getPointeeType();
+  if (ValType.isConstQualified()) {
+    SemaRef.Diag(Loc, diag::err_atomic_builtin_cannot_be_const)
+        << PointerArg->getType() << PointerArg->getSourceRange();
+    return true;
+  }
+
+  // Only integer element types are supported.
+  ValType = ValType.getUnqualifiedType();
+  if (!ValType->isIntegerType()) {
+    SemaRef.Diag(Loc, diag::err_arm_atomic_store_with_stshh_bad_type)
+        << PointerArg->getType() << PointerArg->getSourceRange();
+    return true;
+  }
+
+  // Only 8/16/32/64-bit integers are supported.
+  unsigned Bits = Context.getTypeSize(ValType);
+  switch (Bits) {
+  case 8:
+  case 16:
+  case 32:
+  case 64:
+    break;
+  default:
+    SemaRef.Diag(Loc, diag::err_arm_atomic_store_with_stshh_bad_type)
+        << PointerArg->getType() << PointerArg->getSourceRange();
+    return true;
+  }
+
+  ExprResult ValRes =
+      SemaRef.DefaultFunctionArrayLvalueConversion(TheCall->getArg(1));
+
+  // Bail if conversion failed.
+  if (ValRes.isInvalid())
+    return true;
+
+  // Check if value is an integer type.
+  Expr *ValArg = ValRes.get();
+  if (!ValArg->getType()->isIntegerType()) {
+    SemaRef.Diag(Loc, diag::err_arm_atomic_store_with_stshh_bad_value_type)
+        << ValArg->getType() << ValArg->getSourceRange();
+    return true;
+  }
+
+  // Value width must match the pointee width.
+  if (Context.getTypeSize(ValArg->getType()) != Bits) {
+    SemaRef.Diag(Loc, diag::err_arm_atomic_store_with_stshh_bad_value_type)
+        << ValArg->getType() << ValArg->getSourceRange();
+    return true;
+  }
+
+  // Prepare a cast if the value type differs
+  ExprResult ValArgRes;
+  CastKind CK =
+      ValArg->getType().getCanonicalType() == ValType.getCanonicalType()
+          ? CK_NoOp
+          : CK_IntegralCast;
+
+  // Apply cast to the pointee type.
+  ValArgRes = SemaRef.ImpCastExprToType(ValArg, ValType, CK);
+
+  // Bail if cast failed.
+  if (ValArgRes.isInvalid())
+    return true;
+
+  TheCall->setArg(1, ValArgRes.get());
+  Expr *OrderArg = TheCall->getArg(2);
+
+  // Defer validation for dependent memory_order arguments.
+  if (OrderArg->isValueDependent())
+    return false;
+
+  // Require an order value.
+  std::optional<llvm::APSInt> OrderValOpt =
+      OrderArg->getIntegerConstantExpr(Context);
+  if (!OrderValOpt) {
+    SemaRef.Diag(Loc, diag::err_arm_atomic_store_with_stshh_bad_order)
+        << OrderArg->getSourceRange();
+    return true;
+  }
+
+  // Validate order; not used here; used later in codegen.
+  llvm::APSInt OrderVal = *OrderValOpt;
+  int64_t Order = OrderVal.getSExtValue();
+  // __ATOMIC_RELAXED=0, __ATOMIC_RELEASE=3, __ATOMIC_SEQ_CST=5.
+  constexpr int64_t AtomicRelaxed = 0;
+  constexpr int64_t AtomicRelease = 3;
+  constexpr int64_t AtomicSeqCst = 5;
+  switch (Order) {
+  case AtomicRelaxed:
+  case AtomicRelease:
+  case AtomicSeqCst:
+    break;
+  default:
+    SemaRef.Diag(Loc, diag::err_arm_atomic_store_with_stshh_bad_order)
+        << OrderArg->getSourceRange();
+    return true;
+  }
+
+  // Arg 3 (retention policy) must be between KEEP(0) and STRM(1).
+  if (SemaRef.BuiltinConstantArgRange(TheCall, 3, 0, 1))
+    return true;
+
+  return false;
+}
+
 bool SemaARM::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI,
                                               unsigned BuiltinID,
                                               CallExpr *TheCall) {
@@ -1117,6 +1254,9 @@ bool SemaARM::CheckAArch64BuiltinFunctionCall(const 
TargetInfo &TI,
     return CheckARMBuiltinExclusiveCall(TI, BuiltinID, TheCall);
   }
 
+  if (BuiltinID == AArch64::BI__builtin_arm_atomic_store_with_stshh)
+    return CheckAArch64AtomicStoreWithStshhCall(*this, TheCall);
+
   if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
     return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 1) ||
            SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 3) ||
diff --git a/clang/test/CodeGen/AArch64/pcdphint-atomic-store.c 
b/clang/test/CodeGen/AArch64/pcdphint-atomic-store.c
new file mode 100644
index 0000000000000..79510be522b6a
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/pcdphint-atomic-store.c
@@ -0,0 +1,31 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +pcdphint 
-D__ARM_FEATURE_PCDPHINT -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_acle.h>
+
+void test_u8(unsigned char *p, unsigned char v) {
+  __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
+}
+// CHECK-LABEL: @test_u8
+// CHECK: call void @llvm.aarch64.stshh(i64 0)
+// CHECK-NEXT: store atomic i8 %{{.*}}, ptr %{{.*}} monotonic
+
+void test_u16(unsigned short *p, unsigned short v) {
+  __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELEASE, 1);
+}
+// CHECK-LABEL: @test_u16
+// CHECK: call void @llvm.aarch64.stshh(i64 1)
+// CHECK-NEXT: store atomic i16 %{{.*}}, ptr %{{.*}} release
+
+void test_u32(unsigned int *p, unsigned int v) {
+  __arm_atomic_store_with_stshh(p, v, __ATOMIC_SEQ_CST, 0);
+}
+// CHECK-LABEL: @test_u32
+// CHECK: call void @llvm.aarch64.stshh(i64 0)
+// CHECK-NEXT: store atomic i32 %{{.*}}, ptr %{{.*}} seq_cst
+
+void test_u64(unsigned long long *p, unsigned long long v) {
+  __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 1);
+}
+// CHECK-LABEL: @test_u64
+// CHECK: call void @llvm.aarch64.stshh(i64 1)
+// CHECK-NEXT: store atomic i64 %{{.*}}, ptr %{{.*}} monotonic
diff --git a/clang/test/Sema/AArch64/pcdphint-atomic-store.c 
b/clang/test/Sema/AArch64/pcdphint-atomic-store.c
new file mode 100644
index 0000000000000..091f1c25c2880
--- /dev/null
+++ b/clang/test/Sema/AArch64/pcdphint-atomic-store.c
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +pcdphint \
+// RUN:   -D__ARM_FEATURE_PCDPHINT -fsyntax-only -verify %s
+
+#include <arm_acle.h>
+
+void test_const_pointer(const unsigned int *p, unsigned int v) {
+  __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
+  // expected-error@-1 {{address argument to atomic builtin cannot be 
const-qualified}}
+}
+
+void test_non_integer_pointer(float *p, float v) {
+  __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
+  // expected-error@-1 {{address argument to '__arm_atomic_store_with_stshh' 
must be a pointer to an 8,16,32, or 64-bit integer type}}
+}
+
+void test_invalid_bit_width(__int128 *p, __int128 v) {
+  __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
+  // expected-error@-1 {{address argument to '__arm_atomic_store_with_stshh' 
must be a pointer to an 8,16,32, or 64-bit integer type}}
+}
+
+void test_invalid_memory_order(unsigned int *p, unsigned int v) {
+  __arm_atomic_store_with_stshh(p, v, __ATOMIC_ACQUIRE, 0);
+  // expected-error@-1 {{memory order argument to 
'__arm_atomic_store_with_stshh' must be one of __ATOMIC_RELAXED, 
__ATOMIC_RELEASE, or __ATOMIC_SEQ_CST}}
+}
+
+void test_invalid_retention_policy(unsigned int *p, unsigned int v) {
+  __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 2);
+  // expected-error@-1 {{argument value 2 is outside the valid range [0, 1]}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td 
b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 7f4b7383415c1..19ba3a5a740c5 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -62,6 +62,8 @@ def int_aarch64_frint64x
 // HINT
 
 def int_aarch64_hint : DefaultAttrsIntrinsic<[], [llvm_i32_ty]>;
+def int_aarch64_stshh
+    : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrHasSideEffects]>;
 
 def int_aarch64_break : Intrinsic<[], [llvm_i32_ty],
     [IntrNoMem, IntrHasSideEffects, IntrNoReturn, IntrCold, 
ImmArg<ArgIndex<0>>]>;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td 
b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 7d4e034ca16c8..69fb01ada0b40 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1840,16 +1840,24 @@ def PHintInstOperand : AsmOperandClass {
     let ParserMethod = "tryParsePHintInstOperand";
 }
 
-def phint_op : Operand<i32> {
+def phint_op : Operand<i64> {
     let ParserMatchClass = PHintInstOperand;
    let PrintMethod = "printPHintOp";
    let OperandType = "OPERAND_IMMEDIATE";
+   let MIOperandInfo = (ops i64imm);
+  let DecoderMethod = "DecodeUImm<3>";
 }
 
 class STSHHI
-    : SimpleSystemI<0, (ins phint_op:$policy), "stshh", "\t$policy", []>,
+    : SimpleSystemI<0, (ins phint_op:$policy), "stshh", "\t$policy",
+                    [(int_aarch64_stshh (i64 imm0_7:$policy))]>,
       Sched<[WriteHint]> {
   bits<3> policy;
+  // NOTE: ideally, this would have mayLoad = 0, mayStore = 0, but we cannot
+  // model patterns with sufficiently fine granularity.
+  let mayLoad = 1;
+  let mayStore = 1;
+  let hasSideEffects = 1;
   let Inst{20-12} = 0b000011001;
   let Inst{11-8} = 0b0110;
   let Inst{7-5} = policy;
diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp 
b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index 4eb762a00d477..8fa1913ce24e5 100644
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -38,6 +38,9 @@ using DecodeStatus = MCDisassembler::DecodeStatus;
 template <int Bits>
 static DecodeStatus DecodeSImm(MCInst &Inst, uint64_t Imm, uint64_t Address,
                                const MCDisassembler *Decoder);
+template <int Bits>
+static DecodeStatus DecodeUImm(MCInst &Inst, uint64_t Imm, uint64_t Address,
+                               const MCDisassembler *Decoder);
 
 #define Success MCDisassembler::Success
 #define Fail MCDisassembler::Fail
@@ -1442,6 +1445,16 @@ static DecodeStatus DecodeSImm(MCInst &Inst, uint64_t 
Imm, uint64_t Address,
   return Success;
 }
 
+template <int Bits>
+static DecodeStatus DecodeUImm(MCInst &Inst, uint64_t Imm, uint64_t Address,
+                               const MCDisassembler *Decoder) {
+  if (Imm & ~((1ULL << Bits) - 1))
+    return Fail;
+
+  Inst.addOperand(MCOperand::createImm(Imm));
+  return Success;
+}
+
 // Decode 8-bit signed/unsigned immediate for a given element width.
 template <int ElementWidth>
 static DecodeStatus DecodeImm8OptLsl(MCInst &Inst, unsigned Imm, uint64_t Addr,

>From 46ddaaa8b80d4d40ed10bda30535d02c4e49f117 Mon Sep 17 00:00:00 2001
From: Jonathan Thackray <[email protected]>
Date: Fri, 13 Feb 2026 17:19:25 +0000
Subject: [PATCH 2/6] fixup!

A few small tidyups
---
 clang/lib/CodeGen/TargetBuiltins/ARM.cpp        | 13 +++++++------
 clang/test/Sema/AArch64/pcdphint-atomic-store.c |  4 ++++
 llvm/lib/Target/AArch64/AArch64InstrFormats.td  |  8 ++++----
 3 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp 
b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index e108cddecc9cb..a281aaa2d4d33 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -5288,7 +5288,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned 
BuiltinID,
     Value *StoreValue = EmitScalarExpr(Arg1);
 
     llvm::APSInt OrderVal = Arg2->EvaluateKnownConstInt(getContext());
-    llvm::APSInt RetVal = Arg3->EvaluateKnownConstInt(getContext());
+    llvm::APSInt RetentionPolicy = Arg3->EvaluateKnownConstInt(getContext());
 
     llvm::AtomicOrdering Ordering;
     switch (OrderVal.getZExtValue()) {
@@ -5306,10 +5306,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned 
BuiltinID,
           "unexpected memory order for __arm_atomic_store_with_stshh");
     }
 
-    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_stshh);
-    llvm::Value *Arg = llvm::ConstantInt::get(Int64Ty, RetVal.getZExtValue());
-    CallInst *HintCall = Builder.CreateCall(F, Arg);
-
     QualType ValQT = Arg0->IgnoreParenImpCasts()
                          ->getType()
                          ->castAs<PointerType>()
@@ -5320,10 +5316,15 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned 
BuiltinID,
     Address Addr = Address(StoreAddr, ValTy, ValAlign);
     LValue LVal = MakeAddrLValue(Addr, ValQT);
 
+    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_stshh);
+    llvm::Value *Arg =
+        llvm::ConstantInt::get(Int64Ty, RetentionPolicy.getZExtValue());
+    Builder.CreateCall(F, Arg);
+
     EmitAtomicStore(RValue::get(StoreValue), LVal, Ordering,
                     /* isVolatile= */ false,
                     /* isInit= */ false);
-    return HintCall;
+    return nullptr;
   }
 
   if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
diff --git a/clang/test/Sema/AArch64/pcdphint-atomic-store.c 
b/clang/test/Sema/AArch64/pcdphint-atomic-store.c
index 091f1c25c2880..d9784656d486d 100644
--- a/clang/test/Sema/AArch64/pcdphint-atomic-store.c
+++ b/clang/test/Sema/AArch64/pcdphint-atomic-store.c
@@ -27,3 +27,7 @@ void test_invalid_retention_policy(unsigned int *p, unsigned 
int v) {
   __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 2);
   // expected-error@-1 {{argument value 2 is outside the valid range [0, 1]}}
 }
+
+void test_signed_ok(int *p, int v) {
+  __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
+}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td 
b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 69fb01ada0b40..1390600488bf2 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1842,10 +1842,10 @@ def PHintInstOperand : AsmOperandClass {
 
 def phint_op : Operand<i64> {
     let ParserMatchClass = PHintInstOperand;
-   let PrintMethod = "printPHintOp";
-   let OperandType = "OPERAND_IMMEDIATE";
-   let MIOperandInfo = (ops i64imm);
-  let DecoderMethod = "DecodeUImm<3>";
+    let PrintMethod = "printPHintOp";
+    let OperandType = "OPERAND_IMMEDIATE";
+    let MIOperandInfo = (ops i64imm);
+    let DecoderMethod = "DecodeUImm<3>";
 }
 
 class STSHHI

>From 7f0f42664403902bd43a851778b213c97a42c6e5 Mon Sep 17 00:00:00 2001
From: Jonathan Thackray <[email protected]>
Date: Fri, 13 Feb 2026 17:26:06 +0000
Subject: [PATCH 3/6] fixup!

More small issues tidied, and remove gating.
---
 clang/include/clang/Basic/BuiltinsAArch64.def      | 2 +-
 clang/lib/CodeGen/TargetBuiltins/ARM.cpp           | 4 ++--
 clang/lib/Headers/arm_acle.h                       | 2 --
 clang/lib/Sema/SemaARM.cpp                         | 2 +-
 clang/test/CodeGen/AArch64/pcdphint-atomic-store.c | 2 +-
 clang/test/Sema/AArch64/pcdphint-atomic-store.c    | 8 ++++++--
 6 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def 
b/clang/include/clang/Basic/BuiltinsAArch64.def
index 5d747f4d9c4b2..5722b045f1ed1 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.def
+++ b/clang/include/clang/Basic/BuiltinsAArch64.def
@@ -136,7 +136,7 @@ TARGET_BUILTIN(__builtin_arm_st64bv, "WUiv*WUiC*", "n", 
"ls64")
 TARGET_BUILTIN(__builtin_arm_st64bv0, "WUiv*WUiC*", "n", "ls64")
 
 // Atomic store with PCDPHINT
-TARGET_BUILTIN(__builtin_arm_atomic_store_with_stshh, "v.", "t", "pcdphint")
+TARGET_BUILTIN(__builtin_arm_atomic_store_with_stshh, "v.", "t", "")
 
 // Armv9.3-A Guarded Control Stack
 TARGET_BUILTIN(__builtin_arm_gcspopm, "WUiWUi", "n", "gcs")
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp 
b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index a281aaa2d4d33..d73cb1090d1cc 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -5319,12 +5319,12 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned 
BuiltinID,
     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_stshh);
     llvm::Value *Arg =
         llvm::ConstantInt::get(Int64Ty, RetentionPolicy.getZExtValue());
-    Builder.CreateCall(F, Arg);
+    CallInst *HintCall = Builder.CreateCall(F, Arg);
 
     EmitAtomicStore(RValue::get(StoreValue), LVal, Ordering,
                     /* isVolatile= */ false,
                     /* isInit= */ false);
-    return nullptr;
+    return HintCall;
   }
 
   if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h
index ec06072bcc4bf..88ffd82912df1 100644
--- a/clang/lib/Headers/arm_acle.h
+++ b/clang/lib/Headers/arm_acle.h
@@ -841,10 +841,8 @@ __rndrrs(uint64_t *__p) {
 #endif
 
 /* Atomic store with PCDPHINT */
-#if defined(__ARM_FEATURE_PCDPHINT)
 #define __arm_atomic_store_with_stshh(ptr, data, memory_order, ret)            
\
   __builtin_arm_atomic_store_with_stshh((ptr), (data), (memory_order), (ret))
-#endif
 
 /* 11.2 Guarded Control Stack intrinsics */
 #if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index 64486fe04717c..b09b5c0cf2f66 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -1219,7 +1219,7 @@ static bool CheckAArch64AtomicStoreWithStshhCall(SemaARM 
&S,
     return true;
   }
 
-  // Validate order; not used here; used later in codegen.
+  // Validate order here; the value is mapped to LLVM ordering in codegen.
   llvm::APSInt OrderVal = *OrderValOpt;
   int64_t Order = OrderVal.getSExtValue();
   // __ATOMIC_RELAXED=0, __ATOMIC_RELEASE=3, __ATOMIC_SEQ_CST=5.
diff --git a/clang/test/CodeGen/AArch64/pcdphint-atomic-store.c 
b/clang/test/CodeGen/AArch64/pcdphint-atomic-store.c
index 79510be522b6a..fceb739782641 100644
--- a/clang/test/CodeGen/AArch64/pcdphint-atomic-store.c
+++ b/clang/test/CodeGen/AArch64/pcdphint-atomic-store.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +pcdphint 
-D__ARM_FEATURE_PCDPHINT -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -o - %s | 
FileCheck %s
 
 #include <arm_acle.h>
 
diff --git a/clang/test/Sema/AArch64/pcdphint-atomic-store.c 
b/clang/test/Sema/AArch64/pcdphint-atomic-store.c
index d9784656d486d..bd69ca859f15e 100644
--- a/clang/test/Sema/AArch64/pcdphint-atomic-store.c
+++ b/clang/test/Sema/AArch64/pcdphint-atomic-store.c
@@ -1,5 +1,4 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +pcdphint \
-// RUN:   -D__ARM_FEATURE_PCDPHINT -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -fsyntax-only -verify %s
 
 #include <arm_acle.h>
 
@@ -31,3 +30,8 @@ void test_invalid_retention_policy(unsigned int *p, unsigned 
int v) {
 void test_signed_ok(int *p, int v) {
   __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
 }
+
+void test_value_size_mismatch(int *p, short v) {
+  __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
+  // expected-error@-1 {{value argument to '__arm_atomic_store_with_stshh' 
must be an integer of the same size as the pointed-to type}}
+}

>From ec47932776c41d342fec507c133b8ee6358d9faf Mon Sep 17 00:00:00 2001
From: Jonathan Thackray <[email protected]>
Date: Fri, 13 Feb 2026 21:38:30 +0000
Subject: [PATCH 4/6] fixup! Improve error diagnostics, and other cleanups

---
 clang/include/clang/Basic/DiagnosticSemaKinds.td   |  2 +-
 clang/lib/CodeGen/TargetBuiltins/ARM.cpp           |  3 ++-
 clang/lib/Headers/arm_acle.h                       |  2 ++
 clang/lib/Sema/SemaARM.cpp                         |  6 ++++--
 clang/test/Sema/AArch64/pcdphint-atomic-store.c    |  2 +-
 llvm/test/CodeGen/AArch64/pcdphint-atomic-store.ll | 12 ++++++++++++
 6 files changed, 22 insertions(+), 5 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/pcdphint-atomic-store.ll

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index a2a9da2baaf00..806b275f6df5f 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9555,7 +9555,7 @@ def err_arm_atomic_store_with_stshh_bad_type : Error<
   "8,16,32, or 64-bit integer type (%0 invalid)">;
 def err_arm_atomic_store_with_stshh_bad_value_type : Error<
   "value argument to '__arm_atomic_store_with_stshh' must be an integer of the 
"
-  "same size as the pointed-to type (%0 invalid)">;
+  "same size as the pointed-to type; expected %0 bits, got %1 bits">;
 def err_arm_atomic_store_with_stshh_bad_order : Error<
   "memory order argument to '__arm_atomic_store_with_stshh' must be one of "
   "__ATOMIC_RELAXED, __ATOMIC_RELEASE, or __ATOMIC_SEQ_CST">;
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp 
b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index d73cb1090d1cc..f4dfc6b6921b3 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -5319,10 +5319,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned 
BuiltinID,
     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_stshh);
     llvm::Value *Arg =
         llvm::ConstantInt::get(Int64Ty, RetentionPolicy.getZExtValue());
+    // Execute hint before store to provide cache prefetch guidance.
     CallInst *HintCall = Builder.CreateCall(F, Arg);
 
     EmitAtomicStore(RValue::get(StoreValue), LVal, Ordering,
-                    /* isVolatile= */ false,
+                    /* isVolatile= */ LVal.isVolatile(),
                     /* isInit= */ false);
     return HintCall;
   }
diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h
index 88ffd82912df1..19a534d320790 100644
--- a/clang/lib/Headers/arm_acle.h
+++ b/clang/lib/Headers/arm_acle.h
@@ -841,8 +841,10 @@ __rndrrs(uint64_t *__p) {
 #endif
 
 /* Atomic store with PCDPHINT */
+#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
 #define __arm_atomic_store_with_stshh(ptr, data, memory_order, ret)            
\
   __builtin_arm_atomic_store_with_stshh((ptr), (data), (memory_order), (ret))
+#endif
 
 /* 11.2 Guarded Control Stack intrinsics */
 #if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index b09b5c0cf2f66..fd135c15e5c0c 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -1178,14 +1178,16 @@ static bool 
CheckAArch64AtomicStoreWithStshhCall(SemaARM &S,
   Expr *ValArg = ValRes.get();
   if (!ValArg->getType()->isIntegerType()) {
     SemaRef.Diag(Loc, diag::err_arm_atomic_store_with_stshh_bad_value_type)
-        << ValArg->getType() << ValArg->getSourceRange();
+        << Bits << Context.getTypeSize(ValArg->getType())
+        << ValArg->getSourceRange();
     return true;
   }
 
   // Value width must match the pointee width.
   if (Context.getTypeSize(ValArg->getType()) != Bits) {
     SemaRef.Diag(Loc, diag::err_arm_atomic_store_with_stshh_bad_value_type)
-        << ValArg->getType() << ValArg->getSourceRange();
+        << Bits << Context.getTypeSize(ValArg->getType())
+        << ValArg->getSourceRange();
     return true;
   }
 
diff --git a/clang/test/Sema/AArch64/pcdphint-atomic-store.c 
b/clang/test/Sema/AArch64/pcdphint-atomic-store.c
index bd69ca859f15e..96efaff847b0c 100644
--- a/clang/test/Sema/AArch64/pcdphint-atomic-store.c
+++ b/clang/test/Sema/AArch64/pcdphint-atomic-store.c
@@ -33,5 +33,5 @@ void test_signed_ok(int *p, int v) {
 
 void test_value_size_mismatch(int *p, short v) {
   __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
-  // expected-error@-1 {{value argument to '__arm_atomic_store_with_stshh' 
must be an integer of the same size as the pointed-to type}}
+  // expected-error@-1 {{value argument to '__arm_atomic_store_with_stshh' 
must be an integer of the same size as the pointed-to type; expected 32 bits, 
got 16 bits}}
 }
diff --git a/llvm/test/CodeGen/AArch64/pcdphint-atomic-store.ll 
b/llvm/test/CodeGen/AArch64/pcdphint-atomic-store.ll
new file mode 100644
index 0000000000000..c424c0db6525f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/pcdphint-atomic-store.ll
@@ -0,0 +1,12 @@
+; RUN: llc -mtriple=aarch64 -mattr=+pcdphint < %s | FileCheck %s
+
+declare void @llvm.aarch64.stshh(i64)
+
+define void @test_stshh_atomic_store(ptr %p, i32 %v) {
+; CHECK-LABEL: test_stshh_atomic_store
+; CHECK: stshh
+; CHECK: str
+  call void @llvm.aarch64.stshh(i64 0)
+  store atomic i32 %v, ptr %p monotonic, align 4
+  ret void
+}

>From 277853758e33cfbf29065511e0a2f2e728f74955 Mon Sep 17 00:00:00 2001
From: Jonathan Thackray <[email protected]>
Date: Mon, 16 Feb 2026 09:47:22 +0000
Subject: [PATCH 5/6] fixup! Fix Kerry's CR comments and add negative test for
 "must be an integer type"

---
 .../clang/Basic/DiagnosticSemaKinds.td        |  3 +++
 clang/lib/CodeGen/TargetBuiltins/ARM.cpp      | 10 +++------
 clang/lib/Headers/arm_acle.h                  |  2 +-
 clang/lib/Sema/SemaARM.cpp                    |  6 ++---
 .../test/Sema/AArch64/pcdphint-atomic-store.c |  5 +++++
 .../CodeGen/AArch64/pcdphint-atomic-store.ll  | 22 ++++++++++++++-----
 6 files changed, 31 insertions(+), 17 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 806b275f6df5f..9f5d755034b09 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9556,6 +9556,9 @@ def err_arm_atomic_store_with_stshh_bad_type : Error<
 def err_arm_atomic_store_with_stshh_bad_value_type : Error<
   "value argument to '__arm_atomic_store_with_stshh' must be an integer of the 
"
   "same size as the pointed-to type; expected %0 bits, got %1 bits">;
+def err_arm_atomic_store_with_stshh_bad_value_must_be_integer : Error<
+  "value argument to '__arm_atomic_store_with_stshh' must be an integer type "
+  "(%0 invalid)">;
 def err_arm_atomic_store_with_stshh_bad_order : Error<
   "memory order argument to '__arm_atomic_store_with_stshh' must be one of "
   "__ATOMIC_RELAXED, __ATOMIC_RELEASE, or __ATOMIC_SEQ_CST">;
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp 
b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index f4dfc6b6921b3..d40a994eb7866 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -5280,15 +5280,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned 
BuiltinID,
 
   if (BuiltinID == clang::AArch64::BI__builtin_arm_atomic_store_with_stshh) {
     const Expr *Arg0 = E->getArg(0);
-    const Expr *Arg1 = E->getArg(1);
-    const Expr *Arg2 = E->getArg(2);
-    const Expr *Arg3 = E->getArg(3);
-
     Value *StoreAddr = EmitScalarExpr(Arg0);
-    Value *StoreValue = EmitScalarExpr(Arg1);
+    Value *StoreValue = EmitScalarExpr(E->getArg(1));
 
-    llvm::APSInt OrderVal = Arg2->EvaluateKnownConstInt(getContext());
-    llvm::APSInt RetentionPolicy = Arg3->EvaluateKnownConstInt(getContext());
+    llvm::APSInt OrderVal = E->getArg(2)->EvaluateKnownConstInt(getContext());
+    llvm::APSInt RetentionPolicy = 
E->getArg(3)->EvaluateKnownConstInt(getContext());
 
     llvm::AtomicOrdering Ordering;
     switch (OrderVal.getZExtValue()) {
diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h
index 19a534d320790..fc1af8f1d5a12 100644
--- a/clang/lib/Headers/arm_acle.h
+++ b/clang/lib/Headers/arm_acle.h
@@ -843,7 +843,7 @@ __rndrrs(uint64_t *__p) {
 /* Atomic store with PCDPHINT */
 #if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
 #define __arm_atomic_store_with_stshh(ptr, data, memory_order, ret)            
\
-  __builtin_arm_atomic_store_with_stshh((ptr), (data), (memory_order), (ret))
+  __builtin_arm_atomic_store_with_stshh(ptr, data, memory_order, ret)
 #endif
 
 /* 11.2 Guarded Control Stack intrinsics */
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index fd135c15e5c0c..ca844d647b52a 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -1177,9 +1177,9 @@ static bool CheckAArch64AtomicStoreWithStshhCall(SemaARM 
&S,
   // Check if value is an integer type.
   Expr *ValArg = ValRes.get();
   if (!ValArg->getType()->isIntegerType()) {
-    SemaRef.Diag(Loc, diag::err_arm_atomic_store_with_stshh_bad_value_type)
-        << Bits << Context.getTypeSize(ValArg->getType())
-        << ValArg->getSourceRange();
+    SemaRef.Diag(Loc,
+                 
diag::err_arm_atomic_store_with_stshh_bad_value_must_be_integer)
+        << ValArg->getType() << ValArg->getSourceRange();
     return true;
   }
 
diff --git a/clang/test/Sema/AArch64/pcdphint-atomic-store.c 
b/clang/test/Sema/AArch64/pcdphint-atomic-store.c
index 96efaff847b0c..9ca2c0e8f9172 100644
--- a/clang/test/Sema/AArch64/pcdphint-atomic-store.c
+++ b/clang/test/Sema/AArch64/pcdphint-atomic-store.c
@@ -35,3 +35,8 @@ void test_value_size_mismatch(int *p, short v) {
   __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
   // expected-error@-1 {{value argument to '__arm_atomic_store_with_stshh' 
must be an integer of the same size as the pointed-to type; expected 32 bits, 
got 16 bits}}
 }
+
+void test_non_integer_value(int *p, float v) {
+  __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
+  // expected-error@-1 {{value argument to '__arm_atomic_store_with_stshh' 
must be an integer type ('float' invalid)}}
+}
diff --git a/llvm/test/CodeGen/AArch64/pcdphint-atomic-store.ll 
b/llvm/test/CodeGen/AArch64/pcdphint-atomic-store.ll
index c424c0db6525f..f6e6b1838fa5d 100644
--- a/llvm/test/CodeGen/AArch64/pcdphint-atomic-store.ll
+++ b/llvm/test/CodeGen/AArch64/pcdphint-atomic-store.ll
@@ -1,12 +1,22 @@
-; RUN: llc -mtriple=aarch64 -mattr=+pcdphint < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 6
+; RUN: llc -mtriple=aarch64 -mattr=+v9.6a < %s | FileCheck %s
 
 declare void @llvm.aarch64.stshh(i64)
 
-define void @test_stshh_atomic_store(ptr %p, i32 %v) {
-; CHECK-LABEL: test_stshh_atomic_store
-; CHECK: stshh
-; CHECK: str
+define void @test_keep() {
+; CHECK-LABEL: test_keep:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.stshh(i64 0)
-  store atomic i32 %v, ptr %p monotonic, align 4
+  ret void
+}
+
+define void @test_strm() {
+; CHECK-LABEL: test_strm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    ret
+  call void @llvm.aarch64.stshh(i64 1)
   ret void
 }

>From f7ae5e6b5a0faaeced4d4a0a00d091f4697c75ce Mon Sep 17 00:00:00 2001
From: Jonathan Thackray <[email protected]>
Date: Mon, 16 Feb 2026 22:40:36 +0000
Subject: [PATCH 6/6] fixup! Ensure stshh always immediately precedes a store
 instruction

---
 clang/lib/CodeGen/TargetBuiltins/ARM.cpp      | 52 ++++++------
 clang/lib/Sema/SemaARM.cpp                    |  4 +-
 .../CodeGen/AArch64/pcdphint-atomic-store.c   | 63 +++++++++++---
 llvm/include/llvm/IR/IntrinsicsAArch64.td     |  8 +-
 .../AArch64/AArch64ExpandPseudoInsts.cpp      | 62 ++++++++++++++
 .../Target/AArch64/AArch64ISelLowering.cpp    | 82 +++++++++++++++++++
 .../lib/Target/AArch64/AArch64InstrFormats.td | 11 ++-
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   | 15 ++++
 .../CodeGen/AArch64/pcdphint-atomic-store.ll  |  6 +-
 9 files changed, 251 insertions(+), 52 deletions(-)

diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp 
b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index d40a994eb7866..13180d8931fe0 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -5282,46 +5282,40 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned 
BuiltinID,
     const Expr *Arg0 = E->getArg(0);
     Value *StoreAddr = EmitScalarExpr(Arg0);
     Value *StoreValue = EmitScalarExpr(E->getArg(1));
+    Value *Order = EmitScalarExpr(E->getArg(2));
+    Value *Policy = EmitScalarExpr(E->getArg(3));
 
-    llvm::APSInt OrderVal = E->getArg(2)->EvaluateKnownConstInt(getContext());
-    llvm::APSInt RetentionPolicy = 
E->getArg(3)->EvaluateKnownConstInt(getContext());
+    auto *OrderC = dyn_cast<llvm::ConstantInt>(Order);
+    auto *PolicyC = dyn_cast<llvm::ConstantInt>(Policy);
 
-    llvm::AtomicOrdering Ordering;
-    switch (OrderVal.getZExtValue()) {
+    assert(OrderC && PolicyC &&
+           "order/policy must be constant for __arm_atomic_store_with_stshh");
+
+    // Validate ordering argument; bail out if invalid
+    switch (OrderC->getZExtValue()) {
     case 0: // __ATOMIC_RELAXED
-      Ordering = llvm::AtomicOrdering::Monotonic;
-      break;
     case 3: // __ATOMIC_RELEASE
-      Ordering = llvm::AtomicOrdering::Release;
-      break;
     case 5: // __ATOMIC_SEQ_CST
-      Ordering = llvm::AtomicOrdering::SequentiallyConsistent;
       break;
     default:
       llvm_unreachable(
           "unexpected memory order for __arm_atomic_store_with_stshh");
     }
 
-    QualType ValQT = Arg0->IgnoreParenImpCasts()
-                         ->getType()
-                         ->castAs<PointerType>()
-                         ->getPointeeType();
-    llvm::Type *ValTy = ConvertType(ValQT);
-
-    CharUnits ValAlign = getContext().getTypeAlignInChars(ValQT);
-    Address Addr = Address(StoreAddr, ValTy, ValAlign);
-    LValue LVal = MakeAddrLValue(Addr, ValQT);
-
-    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_stshh);
-    llvm::Value *Arg =
-        llvm::ConstantInt::get(Int64Ty, RetentionPolicy.getZExtValue());
-    // Execute hint before store to provide cache prefetch guidance.
-    CallInst *HintCall = Builder.CreateCall(F, Arg);
-
-    EmitAtomicStore(RValue::get(StoreValue), LVal, Ordering,
-                    /* isVolatile= */ LVal.isVolatile(),
-                    /* isInit= */ false);
-    return HintCall;
+    llvm::Value *OrderArg =
+        llvm::ConstantInt::get(Int32Ty, OrderC->getZExtValue());
+    llvm::Value *PolicyArg =
+        llvm::ConstantInt::get(Int32Ty, PolicyC->getZExtValue());
+
+    llvm::Type *PtrTy = StoreAddr->getType();
+    llvm::Type *ValTy = StoreValue->getType();
+
+    Function *F =
+        CGM.getIntrinsic(Intrinsic::aarch64_stshh_atomic_store, {PtrTy, 
ValTy});
+
+    // Emit a single intrinsic so backend can expand to STSHH followed by
+    // atomic store, to guarantee STSHH immediately precedes store insn.
+    return Builder.CreateCall(F, {StoreAddr, StoreValue, OrderArg, PolicyArg});
   }
 
   if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index ca844d647b52a..90285ccda49f4 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -1177,8 +1177,8 @@ static bool CheckAArch64AtomicStoreWithStshhCall(SemaARM 
&S,
   // Check if value is an integer type.
   Expr *ValArg = ValRes.get();
   if (!ValArg->getType()->isIntegerType()) {
-    SemaRef.Diag(Loc,
-                 
diag::err_arm_atomic_store_with_stshh_bad_value_must_be_integer)
+    SemaRef.Diag(
+        Loc, diag::err_arm_atomic_store_with_stshh_bad_value_must_be_integer)
         << ValArg->getType() << ValArg->getSourceRange();
     return true;
   }
diff --git a/clang/test/CodeGen/AArch64/pcdphint-atomic-store.c 
b/clang/test/CodeGen/AArch64/pcdphint-atomic-store.c
index fceb739782641..e87ef3253a6cc 100644
--- a/clang/test/CodeGen/AArch64/pcdphint-atomic-store.c
+++ b/clang/test/CodeGen/AArch64/pcdphint-atomic-store.c
@@ -1,31 +1,68 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 6
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -o - %s | 
FileCheck %s
 
 #include <arm_acle.h>
 
+// CHECK-LABEL: define dso_local void @test_u8(
+// CHECK-SAME: ptr noundef [[P:%.*]], i8 noundef [[V:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[P_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[V_ADDR:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    store ptr [[P]], ptr [[P_ADDR]], align 8
+// CHECK-NEXT:    store i8 [[V]], ptr [[V_ADDR]], align 1
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[V_ADDR]], align 1
+// CHECK-NEXT:    call void @llvm.aarch64.stshh.atomic.store.p0.i8(ptr 
[[TMP0]], i8 [[TMP1]], i32 0, i32 0)
+// CHECK-NEXT:    ret void
+//
 void test_u8(unsigned char *p, unsigned char v) {
   __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
 }
-// CHECK-LABEL: @test_u8
-// CHECK: call void @llvm.aarch64.stshh(i64 0)
-// CHECK-NEXT: store atomic i8 %{{.*}}, ptr %{{.*}} monotonic
 
+// CHECK-LABEL: define dso_local void @test_u16(
+// CHECK-SAME: ptr noundef [[P:%.*]], i16 noundef [[V:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[P_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[V_ADDR:%.*]] = alloca i16, align 2
+// CHECK-NEXT:    store ptr [[P]], ptr [[P_ADDR]], align 8
+// CHECK-NEXT:    store i16 [[V]], ptr [[V_ADDR]], align 2
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[V_ADDR]], align 2
+// CHECK-NEXT:    call void @llvm.aarch64.stshh.atomic.store.p0.i16(ptr 
[[TMP0]], i16 [[TMP1]], i32 3, i32 1)
+// CHECK-NEXT:    ret void
+//
 void test_u16(unsigned short *p, unsigned short v) {
   __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELEASE, 1);
 }
-// CHECK-LABEL: @test_u16
-// CHECK: call void @llvm.aarch64.stshh(i64 1)
-// CHECK-NEXT: store atomic i16 %{{.*}}, ptr %{{.*}} release
 
+// CHECK-LABEL: define dso_local void @test_u32(
+// CHECK-SAME: ptr noundef [[P:%.*]], i32 noundef [[V:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[P_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[V_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store ptr [[P]], ptr [[P_ADDR]], align 8
+// CHECK-NEXT:    store i32 [[V]], ptr [[V_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[V_ADDR]], align 4
+// CHECK-NEXT:    call void @llvm.aarch64.stshh.atomic.store.p0.i32(ptr 
[[TMP0]], i32 [[TMP1]], i32 5, i32 0)
+// CHECK-NEXT:    ret void
+//
 void test_u32(unsigned int *p, unsigned int v) {
   __arm_atomic_store_with_stshh(p, v, __ATOMIC_SEQ_CST, 0);
 }
-// CHECK-LABEL: @test_u32
-// CHECK: call void @llvm.aarch64.stshh(i64 0)
-// CHECK-NEXT: store atomic i32 %{{.*}}, ptr %{{.*}} seq_cst
 
-void test_u64(unsigned long long *p, unsigned long long v) {
+// CHECK-LABEL: define dso_local void @test_u64(
+// CHECK-SAME: ptr noundef [[P:%.*]], i64 noundef [[V:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[P_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[V_ADDR:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    store ptr [[P]], ptr [[P_ADDR]], align 8
+// CHECK-NEXT:    store i64 [[V]], ptr [[V_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[V_ADDR]], align 8
+// CHECK-NEXT:    call void @llvm.aarch64.stshh.atomic.store.p0.i64(ptr 
[[TMP0]], i64 [[TMP1]], i32 0, i32 1)
+// CHECK-NEXT:    ret void
+//
+void test_u64(unsigned long *p, unsigned long v) {
   __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 1);
 }
-// CHECK-LABEL: @test_u64
-// CHECK: call void @llvm.aarch64.stshh(i64 1)
-// CHECK-NEXT: store atomic i64 %{{.*}}, ptr %{{.*}} monotonic
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td 
b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 19ba3a5a740c5..52531eebef42a 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -63,7 +63,13 @@ def int_aarch64_frint64x
 
 def int_aarch64_hint : DefaultAttrsIntrinsic<[], [llvm_i32_ty]>;
 def int_aarch64_stshh
-    : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrHasSideEffects]>;
+    : DefaultAttrsIntrinsic<[], [llvm_i32_ty],
+                            [IntrHasSideEffects, ImmArg<ArgIndex<0>>]>;
+def int_aarch64_stshh_atomic_store
+    : Intrinsic<[],
+                [llvm_anyptr_ty, llvm_anyint_ty, llvm_i32_ty, llvm_i32_ty],
+                [IntrHasSideEffects, ImmArg<ArgIndex<2>>,
+                 ImmArg<ArgIndex<3>>]>;
 
 def int_aarch64_break : Intrinsic<[], [llvm_i32_ty],
     [IntrNoMem, IntrHasSideEffects, IntrNoReturn, IntrCold, 
ImmArg<ArgIndex<0>>]>;
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp 
b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 27d5940c808d2..3cbcd80c4c627 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -26,6 +26,7 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/DebugLoc.h"
@@ -92,6 +93,8 @@ class AArch64ExpandPseudo : public MachineFunctionPass {
   bool expandCALL_BTI(MachineBasicBlock &MBB, MachineBasicBlock::iterator 
MBBI);
   bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator MBBI);
+  bool expandSTSHHAtomicStore(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator MBBI);
   struct ConditionalBlocks {
     MachineBasicBlock &CondBB;
     MachineBasicBlock &EndBB;
@@ -1001,6 +1004,60 @@ bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(
   return true;
 }
 
+bool AArch64ExpandPseudo::expandSTSHHAtomicStore(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
+  MachineInstr &MI = *MBBI;
+  DebugLoc DL(MI.getDebugLoc());
+
+  unsigned Order = MI.getOperand(2).getImm();
+  uint64_t Policy = MI.getOperand(3).getImm();
+
+  bool IsRelaxed = Order == 0;
+  unsigned StoreOpc = 0;
+
+  // __ATOMIC_RELAXED uses STR. __ATOMIC_{RELEASE/SEQ_CST} use STLR
+  switch (MI.getOpcode()) {
+  case AArch64::STSHH_ATOMIC_STORE_B:
+    StoreOpc = IsRelaxed ? AArch64::STRBBui : AArch64::STLRB;
+    break;
+  case AArch64::STSHH_ATOMIC_STORE_H:
+    StoreOpc = IsRelaxed ? AArch64::STRHHui : AArch64::STLRH;
+    break;
+  case AArch64::STSHH_ATOMIC_STORE_W:
+    StoreOpc = IsRelaxed ? AArch64::STRWui : AArch64::STLRW;
+    break;
+  case AArch64::STSHH_ATOMIC_STORE_X:
+    StoreOpc = IsRelaxed ? AArch64::STRXui : AArch64::STLRX;
+    break;
+  default:
+    llvm_unreachable("Unexpected STSHH atomic store pseudo");
+  }
+
+  // Emit the hint with the retention policy immediate.
+  MachineInstr *Hint = BuildMI(MBB, MBBI, DL, TII->get(AArch64::STSHH))
+                           .addImm(Policy)
+                           .getInstr();
+
+  // Emit the associated store instruction.
+  MachineInstrBuilder Store = BuildMI(MBB, MBBI, DL, TII->get(StoreOpc))
+                                  .add(MI.getOperand(0))
+                                  .add(MI.getOperand(1));
+
+  // Relaxed uses base+imm addressing with a zero offset.
+  if (IsRelaxed)
+    Store.addImm(0);
+
+  // Preserve memory operands and any implicit uses/defs.
+  Store->setMemRefs(*MBB.getParent(), MI.memoperands());
+  transferImpOps(MI, Store, Store);
+
+  // Bundle the hint and store so they remain adjacent.
+  finalizeBundle(MBB, Hint->getIterator(), std::next(Store->getIterator()));
+
+  MI.eraseFromParent();
+  return true;
+}
+
 AArch64ExpandPseudo::ConditionalBlocks
 AArch64ExpandPseudo::expandConditionalPseudo(MachineBasicBlock &MBB,
                                              MachineBasicBlock::iterator MBBI,
@@ -1696,6 +1753,11 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock 
&MBB,
      return expandCALL_BTI(MBB, MBBI);
    case AArch64::StoreSwiftAsyncContext:
      return expandStoreSwiftAsyncContext(MBB, MBBI);
+   case AArch64::STSHH_ATOMIC_STORE_B:
+   case AArch64::STSHH_ATOMIC_STORE_H:
+   case AArch64::STSHH_ATOMIC_STORE_W:
+   case AArch64::STSHH_ATOMIC_STORE_X:
+     return expandSTSHHAtomicStore(MBB, MBBI);
    case AArch64::RestoreZAPseudo:
    case AArch64::CommitZASavePseudo:
    case AArch64::MSRpstatePseudo: {
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp 
b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 713b40d97c9fd..eb0b8b908e5e5 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -6356,6 +6356,88 @@ SDValue 
AArch64TargetLowering::LowerINTRINSIC_VOID(SDValue Op,
                        Op.getOperand(0),                        // Chain
                        DAG.getTargetConstant(24, DL, MVT::i32), // Rt
                        Op.getOperand(2));                       // Addr
+  case Intrinsic::aarch64_stshh: {
+    SDValue Chain = Op.getOperand(0);
+    auto *PolicyC = cast<ConstantSDNode>(Op.getOperand(2));
+    SDValue Policy =
+        DAG.getTargetConstant(PolicyC->getZExtValue(), DL, MVT::i32);
+    SDValue Ops[] = {Policy, Chain};
+    MachineSDNode *N = DAG.getMachineNode(AArch64::STSHH, DL, MVT::Other, Ops);
+    return SDValue(N, 0);
+  }
+  case Intrinsic::aarch64_stshh_atomic_store: {
+    SDValue Chain = Op.getOperand(0);
+    SDValue Ptr = Op.getOperand(2);
+    SDValue Val = Op.getOperand(3);
+    auto *OrderC = cast<ConstantSDNode>(Op.getOperand(4));
+    auto *PolicyC = cast<ConstantSDNode>(Op.getOperand(5));
+    uint64_t OrderVal = OrderC->getZExtValue();
+
+    unsigned SizeBits = Val.getValueType().getSizeInBits();
+    if (SizeBits < 8)
+      SizeBits = 8;
+    unsigned PseudoOpc = 0;
+    // Select pseudo opcode based on value size.
+    switch (SizeBits) {
+    case 8:
+      PseudoOpc = AArch64::STSHH_ATOMIC_STORE_B;
+      break;
+    case 16:
+      PseudoOpc = AArch64::STSHH_ATOMIC_STORE_H;
+      break;
+    case 32:
+      PseudoOpc = AArch64::STSHH_ATOMIC_STORE_W;
+      break;
+    case 64:
+      PseudoOpc = AArch64::STSHH_ATOMIC_STORE_X;
+      break;
+    default:
+      llvm_unreachable("Unexpected STSHH atomic store size");
+    }
+
+    // Extend or truncate value to expected store width
+    if (SizeBits <= 32)
+      Val = DAG.getAnyExtOrTrunc(Val, DL, MVT::i32);
+    else
+      Val = DAG.getAnyExtOrTrunc(Val, DL, MVT::i64);
+
+    SDValue Order = DAG.getTargetConstant(OrderVal, DL, MVT::i32);
+    SDValue Policy =
+        DAG.getTargetConstant(PolicyC->getZExtValue(), DL, MVT::i32);
+
+    // Build pseudo which expands to STSHH + atomic store.
+    SDValue Ops[] = {Val, Ptr, Order, Policy, Chain};
+    MachineSDNode *N = DAG.getMachineNode(PseudoOpc, DL, MVT::Other, Ops);
+
+    // Select correct memory ordering for the store
+    AtomicOrdering Ordering;
+    switch (OrderVal) {
+    case 0: // __ATOMIC_RELAXED
+      Ordering = AtomicOrdering::Monotonic;
+      break;
+    case 3: // __ATOMIC_RELEASE
+      Ordering = AtomicOrdering::Release;
+      break;
+    case 5: // __ATOMIC_SEQ_CST
+      Ordering = AtomicOrdering::SequentiallyConsistent;
+      break;
+    default:
+      llvm_unreachable("Unexpected memory order for STSHH atomic store");
+    }
+
+    LLVMContext &Ctx = *DAG.getContext();
+    EVT MemVT = EVT::getIntegerVT(Ctx, SizeBits);
+    Type *MemTy = MemVT.getTypeForEVT(Ctx);
+    Align Alignment = DAG.getDataLayout().getABITypeAlign(MemTy);
+    uint64_t Size = MemVT.getStoreSize();
+
+    MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+        MachinePointerInfo(), MachineMemOperand::MOStore, Size, Alignment,
+        AAMDNodes(), nullptr, SyncScope::System, Ordering);
+
+    DAG.setNodeMemRefs(N, {MMO});
+    return SDValue(N, 0);
+  }
   case Intrinsic::aarch64_sme_str:
   case Intrinsic::aarch64_sme_ldr: {
     return LowerSMELdrStr(Op, DAG, IntNo == Intrinsic::aarch64_sme_ldr);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td 
b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 1390600488bf2..5b2071b2c0b11 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1159,6 +1159,11 @@ def imm0_7 : Operand<i64>, ImmLeaf<i64, [{
   let ParserMatchClass = Imm0_7Operand;
 }
 
+// imm0_7_i32 predicate - True if the immediate is in the range [0,7]
+def imm0_7_i32 : Operand<i32>, ImmLeaf<i32, [{
+  return ((uint32_t)Imm) < 8;
+}]>;
+
 // imm0_3 predicate - True if the immediate is in the range [0,3]
 def imm0_3 : Operand<i64>, ImmLeaf<i64, [{
   return ((uint64_t)Imm) < 4;
@@ -1840,17 +1845,17 @@ def PHintInstOperand : AsmOperandClass {
     let ParserMethod = "tryParsePHintInstOperand";
 }
 
-def phint_op : Operand<i64> {
+def phint_op : Operand<i32> {
     let ParserMatchClass = PHintInstOperand;
     let PrintMethod = "printPHintOp";
     let OperandType = "OPERAND_IMMEDIATE";
-    let MIOperandInfo = (ops i64imm);
+    let MIOperandInfo = (ops i32imm);
     let DecoderMethod = "DecodeUImm<3>";
 }
 
 class STSHHI
     : SimpleSystemI<0, (ins phint_op:$policy), "stshh", "\t$policy",
-                    [(int_aarch64_stshh (i64 imm0_7:$policy))]>,
+                    [(int_aarch64_stshh (i32 imm0_7_i32:$policy))]>,
       Sched<[WriteHint]> {
   bits<3> policy;
   // NOTE: ideally, this would have mayLoad = 0, mayStore = 0, but we cannot
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td 
b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 2a6788b6742d0..d657be9fc0380 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -1577,6 +1577,21 @@ def : InstAlias<"nop", (NOP)>;
 
 def STSHH: STSHHI;
 
+let hasSideEffects = 1, mayStore = 1, isPseudo = 1, isCodeGenOnly = 1 in {
+def STSHH_ATOMIC_STORE_B
+    : Pseudo<(outs), (ins GPR32:$val, GPR64sp:$addr, i32imm:$order,
+                          i64imm:$policy), []>, Sched<[]>;
+def STSHH_ATOMIC_STORE_H
+    : Pseudo<(outs), (ins GPR32:$val, GPR64sp:$addr, i32imm:$order,
+                          i64imm:$policy), []>, Sched<[]>;
+def STSHH_ATOMIC_STORE_W
+    : Pseudo<(outs), (ins GPR32:$val, GPR64sp:$addr, i32imm:$order,
+                          i64imm:$policy), []>, Sched<[]>;
+def STSHH_ATOMIC_STORE_X
+    : Pseudo<(outs), (ins GPR64:$val, GPR64sp:$addr, i32imm:$order,
+                          i64imm:$policy), []>, Sched<[]>;
+}
+
 // In order to be able to write readable assembly, LLVM should accept assembly
 // inputs that use Branch Target Identification mnemonics, even with BTI 
disabled.
 // However, in order to be compatible with other assemblers (e.g. GAS), LLVM
diff --git a/llvm/test/CodeGen/AArch64/pcdphint-atomic-store.ll 
b/llvm/test/CodeGen/AArch64/pcdphint-atomic-store.ll
index f6e6b1838fa5d..06affdf5ff650 100644
--- a/llvm/test/CodeGen/AArch64/pcdphint-atomic-store.ll
+++ b/llvm/test/CodeGen/AArch64/pcdphint-atomic-store.ll
@@ -1,14 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 6
 ; RUN: llc -mtriple=aarch64 -mattr=+v9.6a < %s | FileCheck %s
 
-declare void @llvm.aarch64.stshh(i64)
-
 define void @test_keep() {
 ; CHECK-LABEL: test_keep:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stshh keep
 ; CHECK-NEXT:    ret
-  call void @llvm.aarch64.stshh(i64 0)
+  call void @llvm.aarch64.stshh(i32 0)
   ret void
 }
 
@@ -17,6 +15,6 @@ define void @test_strm() {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stshh strm
 ; CHECK-NEXT:    ret
-  call void @llvm.aarch64.stshh(i64 1)
+  call void @llvm.aarch64.stshh(i32 1)
   ret void
 }

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to