https://github.com/jthackray updated 
https://github.com/llvm/llvm-project/pull/181386

>From e9a8d60f9eab5dc518f530e27fdedde37c450c71 Mon Sep 17 00:00:00 2001
From: Jonathan Thackray <[email protected]>
Date: Fri, 13 Feb 2026 14:42:05 +0000
Subject: [PATCH 01/11] [AArch64][clang][llvm] Add ACLE `stshh` atomic store
 builtin

Add `__arm_atomic_store_with_stshh` implementation as defined
in the ACLE. Validate that the arguments passed are correct, and
lower it to the stshh intrinsic plus an atomic store with the
allowed orderings.

Gate this on FEAT_PCDPHINT so that availability matches
hardware support for the `STSHH` instruction. Use an i64
immediate and side-effect modeling to satisfy tablegen and decoding.
---
 clang/include/clang/Basic/BuiltinsAArch64.def |   3 +
 .../clang/Basic/DiagnosticSemaKinds.td        |   9 ++
 .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp  |   7 +
 clang/lib/CodeGen/TargetBuiltins/ARM.cpp      |  48 ++++++
 clang/lib/Headers/arm_acle.h                  |   6 +
 clang/lib/Sema/SemaARM.cpp                    | 140 ++++++++++++++++++
 .../CodeGen/AArch64/pcdphint-atomic-store.c   |  31 ++++
 .../test/Sema/AArch64/pcdphint-atomic-store.c |  29 ++++
 llvm/include/llvm/IR/IntrinsicsAArch64.td     |   2 +
 .../lib/Target/AArch64/AArch64InstrFormats.td |  12 +-
 .../Disassembler/AArch64Disassembler.cpp      |  13 ++
 11 files changed, 298 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/CodeGen/AArch64/pcdphint-atomic-store.c
 create mode 100644 clang/test/Sema/AArch64/pcdphint-atomic-store.c

diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def 
b/clang/include/clang/Basic/BuiltinsAArch64.def
index 5d7e956b73b87..5d747f4d9c4b2 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.def
+++ b/clang/include/clang/Basic/BuiltinsAArch64.def
@@ -135,6 +135,9 @@ TARGET_BUILTIN(__builtin_arm_st64b, "vv*WUiC*", "n", "ls64")
 TARGET_BUILTIN(__builtin_arm_st64bv, "WUiv*WUiC*", "n", "ls64")
 TARGET_BUILTIN(__builtin_arm_st64bv0, "WUiv*WUiC*", "n", "ls64")
 
+// Atomic store with PCDPHINT
+TARGET_BUILTIN(__builtin_arm_atomic_store_with_stshh, "v.", "t", "pcdphint")
+
 // Armv9.3-A Guarded Control Stack
 TARGET_BUILTIN(__builtin_arm_gcspopm, "WUiWUi", "n", "gcs")
 TARGET_BUILTIN(__builtin_arm_gcsss, "v*v*", "n", "gcs")
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 68016ec4d58a3..a2a9da2baaf00 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9550,6 +9550,15 @@ def err_atomic_builtin_must_be_pointer_intfltptr : Error<
 def err_atomic_builtin_pointer_size : Error<
   "address argument to atomic builtin must be a pointer to 1,2,4,8 or 16 byte "
   "type (%0 invalid)">;
+def err_arm_atomic_store_with_stshh_bad_type : Error<
+  "address argument to '__arm_atomic_store_with_stshh' must be a pointer to an 
"
+  "8,16,32, or 64-bit integer type (%0 invalid)">;
+def err_arm_atomic_store_with_stshh_bad_value_type : Error<
+  "value argument to '__arm_atomic_store_with_stshh' must be an integer of the 
"
+  "same size as the pointed-to type (%0 invalid)">;
+def err_arm_atomic_store_with_stshh_bad_order : Error<
+  "memory order argument to '__arm_atomic_store_with_stshh' must be one of "
+  "__ATOMIC_RELAXED, __ATOMIC_RELEASE, or __ATOMIC_SEQ_CST">;
 def err_atomic_exclusive_builtin_pointer_size : Error<
   "address argument to load or store exclusive builtin must be a pointer to "
   // Because the range of legal sizes for load/store exclusive varies with the
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index 51619bef0b2b9..bb409408f0849 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -1090,6 +1090,13 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned 
builtinID, const CallExpr *expr,
     return mlir::Value{};
   }
 
+  if (builtinID == clang::AArch64::BI__builtin_arm_atomic_store_with_stshh) {
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented AArch64 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+    return mlir::Value{};
+  }
+
   if (builtinID == clang::AArch64::BI__builtin_arm_rndr ||
       builtinID == clang::AArch64::BI__builtin_arm_rndrrs) {
     cgm.errorNYI(expr->getSourceRange(),
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp 
b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index a04f5e32c18ad..16c0ddc1e59b7 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -5274,6 +5274,54 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned 
BuiltinID,
     return Builder.CreateCall(F, Args);
   }
 
+  if (BuiltinID == clang::AArch64::BI__builtin_arm_atomic_store_with_stshh) {
+    const Expr *Arg0 = E->getArg(0);
+    const Expr *Arg1 = E->getArg(1);
+    const Expr *Arg2 = E->getArg(2);
+    const Expr *Arg3 = E->getArg(3);
+
+    Value *StoreAddr = EmitScalarExpr(Arg0);
+    Value *StoreValue = EmitScalarExpr(Arg1);
+
+    llvm::APSInt OrderVal = Arg2->EvaluateKnownConstInt(getContext());
+    llvm::APSInt RetVal = Arg3->EvaluateKnownConstInt(getContext());
+
+    llvm::AtomicOrdering Ordering;
+    switch (OrderVal.getZExtValue()) {
+    case 0: // __ATOMIC_RELAXED
+      Ordering = llvm::AtomicOrdering::Monotonic;
+      break;
+    case 3: // __ATOMIC_RELEASE
+      Ordering = llvm::AtomicOrdering::Release;
+      break;
+    case 5: // __ATOMIC_SEQ_CST
+      Ordering = llvm::AtomicOrdering::SequentiallyConsistent;
+      break;
+    default:
+      llvm_unreachable(
+          "unexpected memory order for __arm_atomic_store_with_stshh");
+    }
+
+    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_stshh);
+    llvm::Value *Arg = llvm::ConstantInt::get(Int64Ty, RetVal.getZExtValue());
+    CallInst *HintCall = Builder.CreateCall(F, Arg);
+
+    QualType ValQT = Arg0->IgnoreParenImpCasts()
+                         ->getType()
+                         ->castAs<PointerType>()
+                         ->getPointeeType();
+    llvm::Type *ValTy = ConvertType(ValQT);
+
+    CharUnits ValAlign = getContext().getTypeAlignInChars(ValQT);
+    Address Addr = Address(StoreAddr, ValTy, ValAlign);
+    LValue LVal = MakeAddrLValue(Addr, ValQT);
+
+    EmitAtomicStore(RValue::get(StoreValue), LVal, Ordering,
+                    /* isVolatile= */ false,
+                    /* isInit= */ false);
+    return HintCall;
+  }
+
   if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
       BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
 
diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h
index 9a6b6a837fa5a..ec06072bcc4bf 100644
--- a/clang/lib/Headers/arm_acle.h
+++ b/clang/lib/Headers/arm_acle.h
@@ -840,6 +840,12 @@ __rndrrs(uint64_t *__p) {
 }
 #endif
 
+/* Atomic store with PCDPHINT */
+#if defined(__ARM_FEATURE_PCDPHINT)
+#define __arm_atomic_store_with_stshh(ptr, data, memory_order, ret)            
\
+  __builtin_arm_atomic_store_with_stshh((ptr), (data), (memory_order), (ret))
+#endif
+
 /* 11.2 Guarded Control Stack intrinsics */
 #if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
 static __inline__ void * __attribute__((__always_inline__, __nodebug__))
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index 33edc455366a7..64486fe04717c 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -1107,6 +1107,143 @@ bool SemaARM::CheckARMBuiltinFunctionCall(const 
TargetInfo &TI,
   }
 }
 
+static bool CheckAArch64AtomicStoreWithStshhCall(SemaARM &S,
+                                                 CallExpr *TheCall) {
+  Sema &SemaRef = S.SemaRef;
+  ASTContext &Context = S.getASTContext();
+  DeclRefExpr *DRE =
+      cast<DeclRefExpr>(TheCall->getCallee()->IgnoreParenCasts());
+  SourceLocation Loc = DRE->getBeginLoc();
+
+  // Ensure we have the proper number of arguments.
+  if (SemaRef.checkArgCount(TheCall, 4))
+    return true;
+
+  ExprResult PtrRes =
+      SemaRef.DefaultFunctionArrayLvalueConversion(TheCall->getArg(0));
+
+  // Bail if conversion failed.
+  if (PtrRes.isInvalid())
+    return true;
+
+  TheCall->setArg(0, PtrRes.get());
+  Expr *PointerArg = PtrRes.get();
+
+  // Check arg 0 is a pointer type, err out if not
+  const PointerType *PointerTy = PointerArg->getType()->getAs<PointerType>();
+  if (!PointerTy) {
+    SemaRef.Diag(Loc, diag::err_atomic_builtin_must_be_pointer)
+        << PointerArg->getType() << 0 << PointerArg->getSourceRange();
+    return true;
+  }
+
+  // Reject const-qualified pointee types, with an error
+  QualType ValType = PointerTy->getPointeeType();
+  if (ValType.isConstQualified()) {
+    SemaRef.Diag(Loc, diag::err_atomic_builtin_cannot_be_const)
+        << PointerArg->getType() << PointerArg->getSourceRange();
+    return true;
+  }
+
+  // Only integer element types are supported.
+  ValType = ValType.getUnqualifiedType();
+  if (!ValType->isIntegerType()) {
+    SemaRef.Diag(Loc, diag::err_arm_atomic_store_with_stshh_bad_type)
+        << PointerArg->getType() << PointerArg->getSourceRange();
+    return true;
+  }
+
+  // Only 8/16/32/64-bit integers are supported.
+  unsigned Bits = Context.getTypeSize(ValType);
+  switch (Bits) {
+  case 8:
+  case 16:
+  case 32:
+  case 64:
+    break;
+  default:
+    SemaRef.Diag(Loc, diag::err_arm_atomic_store_with_stshh_bad_type)
+        << PointerArg->getType() << PointerArg->getSourceRange();
+    return true;
+  }
+
+  ExprResult ValRes =
+      SemaRef.DefaultFunctionArrayLvalueConversion(TheCall->getArg(1));
+
+  // Bail if conversion failed.
+  if (ValRes.isInvalid())
+    return true;
+
+  // Check if value is an integer type.
+  Expr *ValArg = ValRes.get();
+  if (!ValArg->getType()->isIntegerType()) {
+    SemaRef.Diag(Loc, diag::err_arm_atomic_store_with_stshh_bad_value_type)
+        << ValArg->getType() << ValArg->getSourceRange();
+    return true;
+  }
+
+  // Value width must match the pointee width.
+  if (Context.getTypeSize(ValArg->getType()) != Bits) {
+    SemaRef.Diag(Loc, diag::err_arm_atomic_store_with_stshh_bad_value_type)
+        << ValArg->getType() << ValArg->getSourceRange();
+    return true;
+  }
+
+  // Prepare a cast if the value type differs
+  ExprResult ValArgRes;
+  CastKind CK =
+      ValArg->getType().getCanonicalType() == ValType.getCanonicalType()
+          ? CK_NoOp
+          : CK_IntegralCast;
+
+  // Apply cast to the pointee type.
+  ValArgRes = SemaRef.ImpCastExprToType(ValArg, ValType, CK);
+
+  // Bail if cast failed.
+  if (ValArgRes.isInvalid())
+    return true;
+
+  TheCall->setArg(1, ValArgRes.get());
+  Expr *OrderArg = TheCall->getArg(2);
+
+  // Defer validation for dependent memory_order arguments.
+  if (OrderArg->isValueDependent())
+    return false;
+
+  // Require an order value.
+  std::optional<llvm::APSInt> OrderValOpt =
+      OrderArg->getIntegerConstantExpr(Context);
+  if (!OrderValOpt) {
+    SemaRef.Diag(Loc, diag::err_arm_atomic_store_with_stshh_bad_order)
+        << OrderArg->getSourceRange();
+    return true;
+  }
+
+  // Validate order; not used here; used later in codegen.
+  llvm::APSInt OrderVal = *OrderValOpt;
+  int64_t Order = OrderVal.getSExtValue();
+  // __ATOMIC_RELAXED=0, __ATOMIC_RELEASE=3, __ATOMIC_SEQ_CST=5.
+  constexpr int64_t AtomicRelaxed = 0;
+  constexpr int64_t AtomicRelease = 3;
+  constexpr int64_t AtomicSeqCst = 5;
+  switch (Order) {
+  case AtomicRelaxed:
+  case AtomicRelease:
+  case AtomicSeqCst:
+    break;
+  default:
+    SemaRef.Diag(Loc, diag::err_arm_atomic_store_with_stshh_bad_order)
+        << OrderArg->getSourceRange();
+    return true;
+  }
+
+  // Arg 3 (retention policy) must be between KEEP(0) and STRM(1).
+  if (SemaRef.BuiltinConstantArgRange(TheCall, 3, 0, 1))
+    return true;
+
+  return false;
+}
+
 bool SemaARM::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI,
                                               unsigned BuiltinID,
                                               CallExpr *TheCall) {
@@ -1117,6 +1254,9 @@ bool SemaARM::CheckAArch64BuiltinFunctionCall(const 
TargetInfo &TI,
     return CheckARMBuiltinExclusiveCall(TI, BuiltinID, TheCall);
   }
 
+  if (BuiltinID == AArch64::BI__builtin_arm_atomic_store_with_stshh)
+    return CheckAArch64AtomicStoreWithStshhCall(*this, TheCall);
+
   if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
     return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 1) ||
            SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 3) ||
diff --git a/clang/test/CodeGen/AArch64/pcdphint-atomic-store.c 
b/clang/test/CodeGen/AArch64/pcdphint-atomic-store.c
new file mode 100644
index 0000000000000..79510be522b6a
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/pcdphint-atomic-store.c
@@ -0,0 +1,31 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +pcdphint 
-D__ARM_FEATURE_PCDPHINT -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_acle.h>
+
+void test_u8(unsigned char *p, unsigned char v) {
+  __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
+}
+// CHECK-LABEL: @test_u8
+// CHECK: call void @llvm.aarch64.stshh(i64 0)
+// CHECK-NEXT: store atomic i8 %{{.*}}, ptr %{{.*}} monotonic
+
+void test_u16(unsigned short *p, unsigned short v) {
+  __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELEASE, 1);
+}
+// CHECK-LABEL: @test_u16
+// CHECK: call void @llvm.aarch64.stshh(i64 1)
+// CHECK-NEXT: store atomic i16 %{{.*}}, ptr %{{.*}} release
+
+void test_u32(unsigned int *p, unsigned int v) {
+  __arm_atomic_store_with_stshh(p, v, __ATOMIC_SEQ_CST, 0);
+}
+// CHECK-LABEL: @test_u32
+// CHECK: call void @llvm.aarch64.stshh(i64 0)
+// CHECK-NEXT: store atomic i32 %{{.*}}, ptr %{{.*}} seq_cst
+
+void test_u64(unsigned long long *p, unsigned long long v) {
+  __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 1);
+}
+// CHECK-LABEL: @test_u64
+// CHECK: call void @llvm.aarch64.stshh(i64 1)
+// CHECK-NEXT: store atomic i64 %{{.*}}, ptr %{{.*}} monotonic
diff --git a/clang/test/Sema/AArch64/pcdphint-atomic-store.c 
b/clang/test/Sema/AArch64/pcdphint-atomic-store.c
new file mode 100644
index 0000000000000..091f1c25c2880
--- /dev/null
+++ b/clang/test/Sema/AArch64/pcdphint-atomic-store.c
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +pcdphint \
+// RUN:   -D__ARM_FEATURE_PCDPHINT -fsyntax-only -verify %s
+
+#include <arm_acle.h>
+
+void test_const_pointer(const unsigned int *p, unsigned int v) {
+  __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
+  // expected-error@-1 {{address argument to atomic builtin cannot be 
const-qualified}}
+}
+
+void test_non_integer_pointer(float *p, float v) {
+  __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
+  // expected-error@-1 {{address argument to '__arm_atomic_store_with_stshh' 
must be a pointer to an 8,16,32, or 64-bit integer type}}
+}
+
+void test_invalid_bit_width(__int128 *p, __int128 v) {
+  __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
+  // expected-error@-1 {{address argument to '__arm_atomic_store_with_stshh' 
must be a pointer to an 8,16,32, or 64-bit integer type}}
+}
+
+void test_invalid_memory_order(unsigned int *p, unsigned int v) {
+  __arm_atomic_store_with_stshh(p, v, __ATOMIC_ACQUIRE, 0);
+  // expected-error@-1 {{memory order argument to 
'__arm_atomic_store_with_stshh' must be one of __ATOMIC_RELAXED, 
__ATOMIC_RELEASE, or __ATOMIC_SEQ_CST}}
+}
+
+void test_invalid_retention_policy(unsigned int *p, unsigned int v) {
+  __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 2);
+  // expected-error@-1 {{argument value 2 is outside the valid range [0, 1]}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td 
b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 7f4b7383415c1..19ba3a5a740c5 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -62,6 +62,8 @@ def int_aarch64_frint64x
 // HINT
 
 def int_aarch64_hint : DefaultAttrsIntrinsic<[], [llvm_i32_ty]>;
+def int_aarch64_stshh
+    : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrHasSideEffects]>;
 
 def int_aarch64_break : Intrinsic<[], [llvm_i32_ty],
     [IntrNoMem, IntrHasSideEffects, IntrNoReturn, IntrCold, 
ImmArg<ArgIndex<0>>]>;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td 
b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 19332507efe1d..a5fee66287443 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1864,16 +1864,24 @@ def PHintInstOperand : AsmOperandClass {
     let ParserMethod = "tryParsePHintInstOperand";
 }
 
-def phint_op : Operand<i32> {
+def phint_op : Operand<i64> {
     let ParserMatchClass = PHintInstOperand;
    let PrintMethod = "printPHintOp";
    let OperandType = "OPERAND_IMMEDIATE";
+   let MIOperandInfo = (ops i64imm);
+  let DecoderMethod = "DecodeUImm<3>";
 }
 
 class STSHHI
-    : SimpleSystemI<0, (ins phint_op:$policy), "stshh", "\t$policy", []>,
+    : SimpleSystemI<0, (ins phint_op:$policy), "stshh", "\t$policy",
+                    [(int_aarch64_stshh (i64 imm0_7:$policy))]>,
       Sched<[WriteHint]> {
   bits<3> policy;
+  // NOTE: ideally, this would have mayLoad = 0, mayStore = 0, but we cannot
+  // model patterns with sufficiently fine granularity.
+  let mayLoad = 1;
+  let mayStore = 1;
+  let hasSideEffects = 1;
   let Inst{20-12} = 0b000011001;
   let Inst{11-8} = 0b0110;
   let Inst{7-5} = policy;
diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp 
b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index 4eb762a00d477..8fa1913ce24e5 100644
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -38,6 +38,9 @@ using DecodeStatus = MCDisassembler::DecodeStatus;
 template <int Bits>
 static DecodeStatus DecodeSImm(MCInst &Inst, uint64_t Imm, uint64_t Address,
                                const MCDisassembler *Decoder);
+template <int Bits>
+static DecodeStatus DecodeUImm(MCInst &Inst, uint64_t Imm, uint64_t Address,
+                               const MCDisassembler *Decoder);
 
 #define Success MCDisassembler::Success
 #define Fail MCDisassembler::Fail
@@ -1442,6 +1445,16 @@ static DecodeStatus DecodeSImm(MCInst &Inst, uint64_t 
Imm, uint64_t Address,
   return Success;
 }
 
+template <int Bits>
+static DecodeStatus DecodeUImm(MCInst &Inst, uint64_t Imm, uint64_t Address,
+                               const MCDisassembler *Decoder) {
+  if (Imm & ~((1ULL << Bits) - 1))
+    return Fail;
+
+  Inst.addOperand(MCOperand::createImm(Imm));
+  return Success;
+}
+
 // Decode 8-bit signed/unsigned immediate for a given element width.
 template <int ElementWidth>
 static DecodeStatus DecodeImm8OptLsl(MCInst &Inst, unsigned Imm, uint64_t Addr,

>From 207e214b498d7a0a4eb917b9f3d6e78e871bf150 Mon Sep 17 00:00:00 2001
From: Jonathan Thackray <[email protected]>
Date: Fri, 13 Feb 2026 17:19:25 +0000
Subject: [PATCH 02/11] fixup!

A few small tidyups
---
 clang/lib/CodeGen/TargetBuiltins/ARM.cpp        | 13 +++++++------
 clang/test/Sema/AArch64/pcdphint-atomic-store.c |  4 ++++
 llvm/lib/Target/AArch64/AArch64InstrFormats.td  |  8 ++++----
 3 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp 
b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 16c0ddc1e59b7..b7d7de7cac351 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -5284,7 +5284,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned 
BuiltinID,
     Value *StoreValue = EmitScalarExpr(Arg1);
 
     llvm::APSInt OrderVal = Arg2->EvaluateKnownConstInt(getContext());
-    llvm::APSInt RetVal = Arg3->EvaluateKnownConstInt(getContext());
+    llvm::APSInt RetentionPolicy = Arg3->EvaluateKnownConstInt(getContext());
 
     llvm::AtomicOrdering Ordering;
     switch (OrderVal.getZExtValue()) {
@@ -5302,10 +5302,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned 
BuiltinID,
           "unexpected memory order for __arm_atomic_store_with_stshh");
     }
 
-    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_stshh);
-    llvm::Value *Arg = llvm::ConstantInt::get(Int64Ty, RetVal.getZExtValue());
-    CallInst *HintCall = Builder.CreateCall(F, Arg);
-
     QualType ValQT = Arg0->IgnoreParenImpCasts()
                          ->getType()
                          ->castAs<PointerType>()
@@ -5316,10 +5312,15 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned 
BuiltinID,
     Address Addr = Address(StoreAddr, ValTy, ValAlign);
     LValue LVal = MakeAddrLValue(Addr, ValQT);
 
+    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_stshh);
+    llvm::Value *Arg =
+        llvm::ConstantInt::get(Int64Ty, RetentionPolicy.getZExtValue());
+    Builder.CreateCall(F, Arg);
+
     EmitAtomicStore(RValue::get(StoreValue), LVal, Ordering,
                     /* isVolatile= */ false,
                     /* isInit= */ false);
-    return HintCall;
+    return nullptr;
   }
 
   if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
diff --git a/clang/test/Sema/AArch64/pcdphint-atomic-store.c 
b/clang/test/Sema/AArch64/pcdphint-atomic-store.c
index 091f1c25c2880..d9784656d486d 100644
--- a/clang/test/Sema/AArch64/pcdphint-atomic-store.c
+++ b/clang/test/Sema/AArch64/pcdphint-atomic-store.c
@@ -27,3 +27,7 @@ void test_invalid_retention_policy(unsigned int *p, unsigned 
int v) {
   __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 2);
   // expected-error@-1 {{argument value 2 is outside the valid range [0, 1]}}
 }
+
+void test_signed_ok(int *p, int v) {
+  __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
+}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td 
b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index a5fee66287443..2c5f745847f78 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1866,10 +1866,10 @@ def PHintInstOperand : AsmOperandClass {
 
 def phint_op : Operand<i64> {
     let ParserMatchClass = PHintInstOperand;
-   let PrintMethod = "printPHintOp";
-   let OperandType = "OPERAND_IMMEDIATE";
-   let MIOperandInfo = (ops i64imm);
-  let DecoderMethod = "DecodeUImm<3>";
+    let PrintMethod = "printPHintOp";
+    let OperandType = "OPERAND_IMMEDIATE";
+    let MIOperandInfo = (ops i64imm);
+    let DecoderMethod = "DecodeUImm<3>";
 }
 
 class STSHHI

>From eea3d5ed8d18cb9b6d96b2974a16c89433604d8f Mon Sep 17 00:00:00 2001
From: Jonathan Thackray <[email protected]>
Date: Fri, 13 Feb 2026 17:26:06 +0000
Subject: [PATCH 03/11] fixup!

More small issues tidied, and remove gating.
---
 clang/include/clang/Basic/BuiltinsAArch64.def      | 2 +-
 clang/lib/CodeGen/TargetBuiltins/ARM.cpp           | 4 ++--
 clang/lib/Headers/arm_acle.h                       | 2 --
 clang/lib/Sema/SemaARM.cpp                         | 2 +-
 clang/test/CodeGen/AArch64/pcdphint-atomic-store.c | 2 +-
 clang/test/Sema/AArch64/pcdphint-atomic-store.c    | 8 ++++++--
 6 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def 
b/clang/include/clang/Basic/BuiltinsAArch64.def
index 5d747f4d9c4b2..5722b045f1ed1 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.def
+++ b/clang/include/clang/Basic/BuiltinsAArch64.def
@@ -136,7 +136,7 @@ TARGET_BUILTIN(__builtin_arm_st64bv, "WUiv*WUiC*", "n", 
"ls64")
 TARGET_BUILTIN(__builtin_arm_st64bv0, "WUiv*WUiC*", "n", "ls64")
 
 // Atomic store with PCDPHINT
-TARGET_BUILTIN(__builtin_arm_atomic_store_with_stshh, "v.", "t", "pcdphint")
+TARGET_BUILTIN(__builtin_arm_atomic_store_with_stshh, "v.", "t", "")
 
 // Armv9.3-A Guarded Control Stack
 TARGET_BUILTIN(__builtin_arm_gcspopm, "WUiWUi", "n", "gcs")
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp 
b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index b7d7de7cac351..416c797f3fa48 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -5315,12 +5315,12 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned 
BuiltinID,
     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_stshh);
     llvm::Value *Arg =
         llvm::ConstantInt::get(Int64Ty, RetentionPolicy.getZExtValue());
-    Builder.CreateCall(F, Arg);
+    CallInst *HintCall = Builder.CreateCall(F, Arg);
 
     EmitAtomicStore(RValue::get(StoreValue), LVal, Ordering,
                     /* isVolatile= */ false,
                     /* isInit= */ false);
-    return nullptr;
+    return HintCall;
   }
 
   if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h
index ec06072bcc4bf..88ffd82912df1 100644
--- a/clang/lib/Headers/arm_acle.h
+++ b/clang/lib/Headers/arm_acle.h
@@ -841,10 +841,8 @@ __rndrrs(uint64_t *__p) {
 #endif
 
 /* Atomic store with PCDPHINT */
-#if defined(__ARM_FEATURE_PCDPHINT)
 #define __arm_atomic_store_with_stshh(ptr, data, memory_order, ret)            
\
   __builtin_arm_atomic_store_with_stshh((ptr), (data), (memory_order), (ret))
-#endif
 
 /* 11.2 Guarded Control Stack intrinsics */
 #if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index 64486fe04717c..b09b5c0cf2f66 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -1219,7 +1219,7 @@ static bool CheckAArch64AtomicStoreWithStshhCall(SemaARM 
&S,
     return true;
   }
 
-  // Validate order; not used here; used later in codegen.
+  // Validate order here; the value is mapped to LLVM ordering in codegen.
   llvm::APSInt OrderVal = *OrderValOpt;
   int64_t Order = OrderVal.getSExtValue();
   // __ATOMIC_RELAXED=0, __ATOMIC_RELEASE=3, __ATOMIC_SEQ_CST=5.
diff --git a/clang/test/CodeGen/AArch64/pcdphint-atomic-store.c 
b/clang/test/CodeGen/AArch64/pcdphint-atomic-store.c
index 79510be522b6a..fceb739782641 100644
--- a/clang/test/CodeGen/AArch64/pcdphint-atomic-store.c
+++ b/clang/test/CodeGen/AArch64/pcdphint-atomic-store.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +pcdphint 
-D__ARM_FEATURE_PCDPHINT -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -o - %s | 
FileCheck %s
 
 #include <arm_acle.h>
 
diff --git a/clang/test/Sema/AArch64/pcdphint-atomic-store.c 
b/clang/test/Sema/AArch64/pcdphint-atomic-store.c
index d9784656d486d..bd69ca859f15e 100644
--- a/clang/test/Sema/AArch64/pcdphint-atomic-store.c
+++ b/clang/test/Sema/AArch64/pcdphint-atomic-store.c
@@ -1,5 +1,4 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +pcdphint \
-// RUN:   -D__ARM_FEATURE_PCDPHINT -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -fsyntax-only -verify %s
 
 #include <arm_acle.h>
 
@@ -31,3 +30,8 @@ void test_invalid_retention_policy(unsigned int *p, unsigned 
int v) {
 void test_signed_ok(int *p, int v) {
   __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
 }
+
+void test_value_size_mismatch(int *p, short v) {
+  __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
+  // expected-error@-1 {{value argument to '__arm_atomic_store_with_stshh' 
must be an integer of the same size as the pointed-to type}}
+}

>From c8fbd205e1b0a7f7d1727316c3b948bf9ca59b6e Mon Sep 17 00:00:00 2001
From: Jonathan Thackray <[email protected]>
Date: Fri, 13 Feb 2026 21:38:30 +0000
Subject: [PATCH 04/11] fixup! Improve error diagnostics, and other cleanups

---
 clang/include/clang/Basic/DiagnosticSemaKinds.td   |  2 +-
 clang/lib/CodeGen/TargetBuiltins/ARM.cpp           |  3 ++-
 clang/lib/Headers/arm_acle.h                       |  2 ++
 clang/lib/Sema/SemaARM.cpp                         |  6 ++++--
 clang/test/Sema/AArch64/pcdphint-atomic-store.c    |  2 +-
 llvm/test/CodeGen/AArch64/pcdphint-atomic-store.ll | 12 ++++++++++++
 6 files changed, 22 insertions(+), 5 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/pcdphint-atomic-store.ll

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index a2a9da2baaf00..806b275f6df5f 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9555,7 +9555,7 @@ def err_arm_atomic_store_with_stshh_bad_type : Error<
   "8,16,32, or 64-bit integer type (%0 invalid)">;
 def err_arm_atomic_store_with_stshh_bad_value_type : Error<
   "value argument to '__arm_atomic_store_with_stshh' must be an integer of the 
"
-  "same size as the pointed-to type (%0 invalid)">;
+  "same size as the pointed-to type; expected %0 bits, got %1 bits">;
 def err_arm_atomic_store_with_stshh_bad_order : Error<
   "memory order argument to '__arm_atomic_store_with_stshh' must be one of "
   "__ATOMIC_RELAXED, __ATOMIC_RELEASE, or __ATOMIC_SEQ_CST">;
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp 
b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 416c797f3fa48..a2d7f4471622d 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -5315,10 +5315,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned 
BuiltinID,
     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_stshh);
     llvm::Value *Arg =
         llvm::ConstantInt::get(Int64Ty, RetentionPolicy.getZExtValue());
+    // Execute hint before store to provide cache prefetch guidance.
     CallInst *HintCall = Builder.CreateCall(F, Arg);
 
     EmitAtomicStore(RValue::get(StoreValue), LVal, Ordering,
-                    /* isVolatile= */ false,
+                    /* isVolatile= */ LVal.isVolatile(),
                     /* isInit= */ false);
     return HintCall;
   }
diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h
index 88ffd82912df1..19a534d320790 100644
--- a/clang/lib/Headers/arm_acle.h
+++ b/clang/lib/Headers/arm_acle.h
@@ -841,8 +841,10 @@ __rndrrs(uint64_t *__p) {
 #endif
 
 /* Atomic store with PCDPHINT */
+#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
 #define __arm_atomic_store_with_stshh(ptr, data, memory_order, ret)            
\
   __builtin_arm_atomic_store_with_stshh((ptr), (data), (memory_order), (ret))
+#endif
 
 /* 11.2 Guarded Control Stack intrinsics */
 #if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index b09b5c0cf2f66..fd135c15e5c0c 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -1178,14 +1178,16 @@ static bool 
CheckAArch64AtomicStoreWithStshhCall(SemaARM &S,
   Expr *ValArg = ValRes.get();
   if (!ValArg->getType()->isIntegerType()) {
     SemaRef.Diag(Loc, diag::err_arm_atomic_store_with_stshh_bad_value_type)
-        << ValArg->getType() << ValArg->getSourceRange();
+        << Bits << Context.getTypeSize(ValArg->getType())
+        << ValArg->getSourceRange();
     return true;
   }
 
   // Value width must match the pointee width.
   if (Context.getTypeSize(ValArg->getType()) != Bits) {
     SemaRef.Diag(Loc, diag::err_arm_atomic_store_with_stshh_bad_value_type)
-        << ValArg->getType() << ValArg->getSourceRange();
+        << Bits << Context.getTypeSize(ValArg->getType())
+        << ValArg->getSourceRange();
     return true;
   }
 
diff --git a/clang/test/Sema/AArch64/pcdphint-atomic-store.c 
b/clang/test/Sema/AArch64/pcdphint-atomic-store.c
index bd69ca859f15e..96efaff847b0c 100644
--- a/clang/test/Sema/AArch64/pcdphint-atomic-store.c
+++ b/clang/test/Sema/AArch64/pcdphint-atomic-store.c
@@ -33,5 +33,5 @@ void test_signed_ok(int *p, int v) {
 
 void test_value_size_mismatch(int *p, short v) {
   __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
-  // expected-error@-1 {{value argument to '__arm_atomic_store_with_stshh' 
must be an integer of the same size as the pointed-to type}}
+  // expected-error@-1 {{value argument to '__arm_atomic_store_with_stshh' 
must be an integer of the same size as the pointed-to type; expected 32 bits, 
got 16 bits}}
 }
diff --git a/llvm/test/CodeGen/AArch64/pcdphint-atomic-store.ll 
b/llvm/test/CodeGen/AArch64/pcdphint-atomic-store.ll
new file mode 100644
index 0000000000000..c424c0db6525f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/pcdphint-atomic-store.ll
@@ -0,0 +1,12 @@
+; RUN: llc -mtriple=aarch64 -mattr=+pcdphint < %s | FileCheck %s
+
+declare void @llvm.aarch64.stshh(i64)
+
+define void @test_stshh_atomic_store(ptr %p, i32 %v) {
+; CHECK-LABEL: test_stshh_atomic_store
+; CHECK: stshh
+; CHECK: str
+  call void @llvm.aarch64.stshh(i64 0)
+  store atomic i32 %v, ptr %p monotonic, align 4
+  ret void
+}

>From e041f65333ad1ba4620fa536bff0821e7f0d00ab Mon Sep 17 00:00:00 2001
From: Jonathan Thackray <[email protected]>
Date: Mon, 16 Feb 2026 09:47:22 +0000
Subject: [PATCH 05/11] fixup! Fix Kerry's CR comments and add negative test
 for "must be an integer type"

---
 .../clang/Basic/DiagnosticSemaKinds.td        |  3 +++
 clang/lib/CodeGen/TargetBuiltins/ARM.cpp      | 10 +++------
 clang/lib/Headers/arm_acle.h                  |  2 +-
 clang/lib/Sema/SemaARM.cpp                    |  6 ++---
 .../test/Sema/AArch64/pcdphint-atomic-store.c |  5 +++++
 .../CodeGen/AArch64/pcdphint-atomic-store.ll  | 22 ++++++++++++++-----
 6 files changed, 31 insertions(+), 17 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 806b275f6df5f..9f5d755034b09 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9556,6 +9556,9 @@ def err_arm_atomic_store_with_stshh_bad_type : Error<
 def err_arm_atomic_store_with_stshh_bad_value_type : Error<
   "value argument to '__arm_atomic_store_with_stshh' must be an integer of the 
"
   "same size as the pointed-to type; expected %0 bits, got %1 bits">;
+def err_arm_atomic_store_with_stshh_bad_value_must_be_integer : Error<
+  "value argument to '__arm_atomic_store_with_stshh' must be an integer type "
+  "(%0 invalid)">;
 def err_arm_atomic_store_with_stshh_bad_order : Error<
   "memory order argument to '__arm_atomic_store_with_stshh' must be one of "
   "__ATOMIC_RELAXED, __ATOMIC_RELEASE, or __ATOMIC_SEQ_CST">;
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp 
b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index a2d7f4471622d..0078aba9c2026 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -5276,15 +5276,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned 
BuiltinID,
 
   if (BuiltinID == clang::AArch64::BI__builtin_arm_atomic_store_with_stshh) {
     const Expr *Arg0 = E->getArg(0);
-    const Expr *Arg1 = E->getArg(1);
-    const Expr *Arg2 = E->getArg(2);
-    const Expr *Arg3 = E->getArg(3);
-
     Value *StoreAddr = EmitScalarExpr(Arg0);
-    Value *StoreValue = EmitScalarExpr(Arg1);
+    Value *StoreValue = EmitScalarExpr(E->getArg(1));
 
-    llvm::APSInt OrderVal = Arg2->EvaluateKnownConstInt(getContext());
-    llvm::APSInt RetentionPolicy = Arg3->EvaluateKnownConstInt(getContext());
+    llvm::APSInt OrderVal = E->getArg(2)->EvaluateKnownConstInt(getContext());
+    llvm::APSInt RetentionPolicy = 
E->getArg(3)->EvaluateKnownConstInt(getContext());
 
     llvm::AtomicOrdering Ordering;
     switch (OrderVal.getZExtValue()) {
diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h
index 19a534d320790..fc1af8f1d5a12 100644
--- a/clang/lib/Headers/arm_acle.h
+++ b/clang/lib/Headers/arm_acle.h
@@ -843,7 +843,7 @@ __rndrrs(uint64_t *__p) {
 /* Atomic store with PCDPHINT */
 #if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
 #define __arm_atomic_store_with_stshh(ptr, data, memory_order, ret)            
\
-  __builtin_arm_atomic_store_with_stshh((ptr), (data), (memory_order), (ret))
+  __builtin_arm_atomic_store_with_stshh(ptr, data, memory_order, ret)
 #endif
 
 /* 11.2 Guarded Control Stack intrinsics */
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index fd135c15e5c0c..ca844d647b52a 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -1177,9 +1177,9 @@ static bool CheckAArch64AtomicStoreWithStshhCall(SemaARM 
&S,
   // Check if value is an integer type.
   Expr *ValArg = ValRes.get();
   if (!ValArg->getType()->isIntegerType()) {
-    SemaRef.Diag(Loc, diag::err_arm_atomic_store_with_stshh_bad_value_type)
-        << Bits << Context.getTypeSize(ValArg->getType())
-        << ValArg->getSourceRange();
+    SemaRef.Diag(Loc,
+                 
diag::err_arm_atomic_store_with_stshh_bad_value_must_be_integer)
+        << ValArg->getType() << ValArg->getSourceRange();
     return true;
   }
 
diff --git a/clang/test/Sema/AArch64/pcdphint-atomic-store.c 
b/clang/test/Sema/AArch64/pcdphint-atomic-store.c
index 96efaff847b0c..9ca2c0e8f9172 100644
--- a/clang/test/Sema/AArch64/pcdphint-atomic-store.c
+++ b/clang/test/Sema/AArch64/pcdphint-atomic-store.c
@@ -35,3 +35,8 @@ void test_value_size_mismatch(int *p, short v) {
   __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
   // expected-error@-1 {{value argument to '__arm_atomic_store_with_stshh' 
must be an integer of the same size as the pointed-to type; expected 32 bits, 
got 16 bits}}
 }
+
+void test_non_integer_value(int *p, float v) {
+  __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
+  // expected-error@-1 {{value argument to '__arm_atomic_store_with_stshh' 
must be an integer type ('float' invalid)}}
+}
diff --git a/llvm/test/CodeGen/AArch64/pcdphint-atomic-store.ll 
b/llvm/test/CodeGen/AArch64/pcdphint-atomic-store.ll
index c424c0db6525f..f6e6b1838fa5d 100644
--- a/llvm/test/CodeGen/AArch64/pcdphint-atomic-store.ll
+++ b/llvm/test/CodeGen/AArch64/pcdphint-atomic-store.ll
@@ -1,12 +1,22 @@
-; RUN: llc -mtriple=aarch64 -mattr=+pcdphint < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 6
+; RUN: llc -mtriple=aarch64 -mattr=+v9.6a < %s | FileCheck %s
 
 declare void @llvm.aarch64.stshh(i64)
 
-define void @test_stshh_atomic_store(ptr %p, i32 %v) {
-; CHECK-LABEL: test_stshh_atomic_store
-; CHECK: stshh
-; CHECK: str
+define void @test_keep() {
+; CHECK-LABEL: test_keep:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.stshh(i64 0)
-  store atomic i32 %v, ptr %p monotonic, align 4
+  ret void
+}
+
+define void @test_strm() {
+; CHECK-LABEL: test_strm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    ret
+  call void @llvm.aarch64.stshh(i64 1)
   ret void
 }

>From 4e687170aa178ee97d37b3c137ae5fb7dcb94024 Mon Sep 17 00:00:00 2001
From: Jonathan Thackray <[email protected]>
Date: Mon, 16 Feb 2026 22:40:36 +0000
Subject: [PATCH 06/11] fixup! Ensure stshh always immediately precedes a store
 instruction

---
 clang/lib/CodeGen/TargetBuiltins/ARM.cpp      | 52 ++++++------
 clang/lib/Sema/SemaARM.cpp                    |  4 +-
 .../CodeGen/AArch64/pcdphint-atomic-store.c   | 63 +++++++++++---
 llvm/include/llvm/IR/IntrinsicsAArch64.td     |  8 +-
 .../AArch64/AArch64ExpandPseudoInsts.cpp      | 62 ++++++++++++++
 .../Target/AArch64/AArch64ISelLowering.cpp    | 82 +++++++++++++++++++
 .../lib/Target/AArch64/AArch64InstrFormats.td | 11 ++-
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   | 15 ++++
 .../CodeGen/AArch64/pcdphint-atomic-store.ll  |  6 +-
 9 files changed, 251 insertions(+), 52 deletions(-)

diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp 
b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 0078aba9c2026..c3fad2ddd7aee 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -5278,46 +5278,40 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned 
BuiltinID,
     const Expr *Arg0 = E->getArg(0);
     Value *StoreAddr = EmitScalarExpr(Arg0);
     Value *StoreValue = EmitScalarExpr(E->getArg(1));
+    Value *Order = EmitScalarExpr(E->getArg(2));
+    Value *Policy = EmitScalarExpr(E->getArg(3));
 
-    llvm::APSInt OrderVal = E->getArg(2)->EvaluateKnownConstInt(getContext());
-    llvm::APSInt RetentionPolicy = 
E->getArg(3)->EvaluateKnownConstInt(getContext());
+    auto *OrderC = dyn_cast<llvm::ConstantInt>(Order);
+    auto *PolicyC = dyn_cast<llvm::ConstantInt>(Policy);
 
-    llvm::AtomicOrdering Ordering;
-    switch (OrderVal.getZExtValue()) {
+    assert(OrderC && PolicyC &&
+           "order/policy must be constant for __arm_atomic_store_with_stshh");
+
+    // Validate ordering argument; bail out if invalid
+    switch (OrderC->getZExtValue()) {
     case 0: // __ATOMIC_RELAXED
-      Ordering = llvm::AtomicOrdering::Monotonic;
-      break;
     case 3: // __ATOMIC_RELEASE
-      Ordering = llvm::AtomicOrdering::Release;
-      break;
     case 5: // __ATOMIC_SEQ_CST
-      Ordering = llvm::AtomicOrdering::SequentiallyConsistent;
       break;
     default:
       llvm_unreachable(
           "unexpected memory order for __arm_atomic_store_with_stshh");
     }
 
-    QualType ValQT = Arg0->IgnoreParenImpCasts()
-                         ->getType()
-                         ->castAs<PointerType>()
-                         ->getPointeeType();
-    llvm::Type *ValTy = ConvertType(ValQT);
-
-    CharUnits ValAlign = getContext().getTypeAlignInChars(ValQT);
-    Address Addr = Address(StoreAddr, ValTy, ValAlign);
-    LValue LVal = MakeAddrLValue(Addr, ValQT);
-
-    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_stshh);
-    llvm::Value *Arg =
-        llvm::ConstantInt::get(Int64Ty, RetentionPolicy.getZExtValue());
-    // Execute hint before store to provide cache prefetch guidance.
-    CallInst *HintCall = Builder.CreateCall(F, Arg);
-
-    EmitAtomicStore(RValue::get(StoreValue), LVal, Ordering,
-                    /* isVolatile= */ LVal.isVolatile(),
-                    /* isInit= */ false);
-    return HintCall;
+    llvm::Value *OrderArg =
+        llvm::ConstantInt::get(Int32Ty, OrderC->getZExtValue());
+    llvm::Value *PolicyArg =
+        llvm::ConstantInt::get(Int32Ty, PolicyC->getZExtValue());
+
+    llvm::Type *PtrTy = StoreAddr->getType();
+    llvm::Type *ValTy = StoreValue->getType();
+
+    Function *F =
+        CGM.getIntrinsic(Intrinsic::aarch64_stshh_atomic_store, {PtrTy, 
ValTy});
+
+    // Emit a single intrinsic so backend can expand to STSHH followed by
+    // atomic store, to guarantee STSHH immediately precedes store insn.
+    return Builder.CreateCall(F, {StoreAddr, StoreValue, OrderArg, PolicyArg});
   }
 
   if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index ca844d647b52a..90285ccda49f4 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -1177,8 +1177,8 @@ static bool CheckAArch64AtomicStoreWithStshhCall(SemaARM 
&S,
   // Check if value is an integer type.
   Expr *ValArg = ValRes.get();
   if (!ValArg->getType()->isIntegerType()) {
-    SemaRef.Diag(Loc,
-                 
diag::err_arm_atomic_store_with_stshh_bad_value_must_be_integer)
+    SemaRef.Diag(
+        Loc, diag::err_arm_atomic_store_with_stshh_bad_value_must_be_integer)
         << ValArg->getType() << ValArg->getSourceRange();
     return true;
   }
diff --git a/clang/test/CodeGen/AArch64/pcdphint-atomic-store.c 
b/clang/test/CodeGen/AArch64/pcdphint-atomic-store.c
index fceb739782641..e87ef3253a6cc 100644
--- a/clang/test/CodeGen/AArch64/pcdphint-atomic-store.c
+++ b/clang/test/CodeGen/AArch64/pcdphint-atomic-store.c
@@ -1,31 +1,68 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 6
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -o - %s | 
FileCheck %s
 
 #include <arm_acle.h>
 
+// CHECK-LABEL: define dso_local void @test_u8(
+// CHECK-SAME: ptr noundef [[P:%.*]], i8 noundef [[V:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[P_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[V_ADDR:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    store ptr [[P]], ptr [[P_ADDR]], align 8
+// CHECK-NEXT:    store i8 [[V]], ptr [[V_ADDR]], align 1
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[V_ADDR]], align 1
+// CHECK-NEXT:    call void @llvm.aarch64.stshh.atomic.store.p0.i8(ptr 
[[TMP0]], i8 [[TMP1]], i32 0, i32 0)
+// CHECK-NEXT:    ret void
+//
 void test_u8(unsigned char *p, unsigned char v) {
   __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
 }
-// CHECK-LABEL: @test_u8
-// CHECK: call void @llvm.aarch64.stshh(i64 0)
-// CHECK-NEXT: store atomic i8 %{{.*}}, ptr %{{.*}} monotonic
 
+// CHECK-LABEL: define dso_local void @test_u16(
+// CHECK-SAME: ptr noundef [[P:%.*]], i16 noundef [[V:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[P_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[V_ADDR:%.*]] = alloca i16, align 2
+// CHECK-NEXT:    store ptr [[P]], ptr [[P_ADDR]], align 8
+// CHECK-NEXT:    store i16 [[V]], ptr [[V_ADDR]], align 2
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[V_ADDR]], align 2
+// CHECK-NEXT:    call void @llvm.aarch64.stshh.atomic.store.p0.i16(ptr 
[[TMP0]], i16 [[TMP1]], i32 3, i32 1)
+// CHECK-NEXT:    ret void
+//
 void test_u16(unsigned short *p, unsigned short v) {
   __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELEASE, 1);
 }
-// CHECK-LABEL: @test_u16
-// CHECK: call void @llvm.aarch64.stshh(i64 1)
-// CHECK-NEXT: store atomic i16 %{{.*}}, ptr %{{.*}} release
 
+// CHECK-LABEL: define dso_local void @test_u32(
+// CHECK-SAME: ptr noundef [[P:%.*]], i32 noundef [[V:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[P_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[V_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store ptr [[P]], ptr [[P_ADDR]], align 8
+// CHECK-NEXT:    store i32 [[V]], ptr [[V_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[V_ADDR]], align 4
+// CHECK-NEXT:    call void @llvm.aarch64.stshh.atomic.store.p0.i32(ptr 
[[TMP0]], i32 [[TMP1]], i32 5, i32 0)
+// CHECK-NEXT:    ret void
+//
 void test_u32(unsigned int *p, unsigned int v) {
   __arm_atomic_store_with_stshh(p, v, __ATOMIC_SEQ_CST, 0);
 }
-// CHECK-LABEL: @test_u32
-// CHECK: call void @llvm.aarch64.stshh(i64 0)
-// CHECK-NEXT: store atomic i32 %{{.*}}, ptr %{{.*}} seq_cst
 
-void test_u64(unsigned long long *p, unsigned long long v) {
+// CHECK-LABEL: define dso_local void @test_u64(
+// CHECK-SAME: ptr noundef [[P:%.*]], i64 noundef [[V:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[P_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[V_ADDR:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    store ptr [[P]], ptr [[P_ADDR]], align 8
+// CHECK-NEXT:    store i64 [[V]], ptr [[V_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[V_ADDR]], align 8
+// CHECK-NEXT:    call void @llvm.aarch64.stshh.atomic.store.p0.i64(ptr 
[[TMP0]], i64 [[TMP1]], i32 0, i32 1)
+// CHECK-NEXT:    ret void
+//
+void test_u64(unsigned long *p, unsigned long v) {
   __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 1);
 }
-// CHECK-LABEL: @test_u64
-// CHECK: call void @llvm.aarch64.stshh(i64 1)
-// CHECK-NEXT: store atomic i64 %{{.*}}, ptr %{{.*}} monotonic
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td 
b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 19ba3a5a740c5..52531eebef42a 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -63,7 +63,13 @@ def int_aarch64_frint64x
 
 def int_aarch64_hint : DefaultAttrsIntrinsic<[], [llvm_i32_ty]>;
 def int_aarch64_stshh
-    : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrHasSideEffects]>;
+    : DefaultAttrsIntrinsic<[], [llvm_i32_ty],
+                            [IntrHasSideEffects, ImmArg<ArgIndex<0>>]>;
+def int_aarch64_stshh_atomic_store
+    : Intrinsic<[],
+                [llvm_anyptr_ty, llvm_anyint_ty, llvm_i32_ty, llvm_i32_ty],
+                [IntrHasSideEffects, ImmArg<ArgIndex<2>>,
+                 ImmArg<ArgIndex<3>>]>;
 
 def int_aarch64_break : Intrinsic<[], [llvm_i32_ty],
     [IntrNoMem, IntrHasSideEffects, IntrNoReturn, IntrCold, 
ImmArg<ArgIndex<0>>]>;
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp 
b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 27d5940c808d2..3cbcd80c4c627 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -26,6 +26,7 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/DebugLoc.h"
@@ -92,6 +93,8 @@ class AArch64ExpandPseudo : public MachineFunctionPass {
   bool expandCALL_BTI(MachineBasicBlock &MBB, MachineBasicBlock::iterator 
MBBI);
   bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator MBBI);
+  bool expandSTSHHAtomicStore(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator MBBI);
   struct ConditionalBlocks {
     MachineBasicBlock &CondBB;
     MachineBasicBlock &EndBB;
@@ -1001,6 +1004,60 @@ bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(
   return true;
 }
 
+bool AArch64ExpandPseudo::expandSTSHHAtomicStore(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
+  MachineInstr &MI = *MBBI;
+  DebugLoc DL(MI.getDebugLoc());
+
+  unsigned Order = MI.getOperand(2).getImm();
+  uint64_t Policy = MI.getOperand(3).getImm();
+
+  bool IsRelaxed = Order == 0;
+  unsigned StoreOpc = 0;
+
+  // __ATOMIC_RELAXED uses STR. __ATOMIC_{RELEASE/SEQ_CST} use STLR
+  switch (MI.getOpcode()) {
+  case AArch64::STSHH_ATOMIC_STORE_B:
+    StoreOpc = IsRelaxed ? AArch64::STRBBui : AArch64::STLRB;
+    break;
+  case AArch64::STSHH_ATOMIC_STORE_H:
+    StoreOpc = IsRelaxed ? AArch64::STRHHui : AArch64::STLRH;
+    break;
+  case AArch64::STSHH_ATOMIC_STORE_W:
+    StoreOpc = IsRelaxed ? AArch64::STRWui : AArch64::STLRW;
+    break;
+  case AArch64::STSHH_ATOMIC_STORE_X:
+    StoreOpc = IsRelaxed ? AArch64::STRXui : AArch64::STLRX;
+    break;
+  default:
+    llvm_unreachable("Unexpected STSHH atomic store pseudo");
+  }
+
+  // Emit the hint with the retention policy immediate.
+  MachineInstr *Hint = BuildMI(MBB, MBBI, DL, TII->get(AArch64::STSHH))
+                           .addImm(Policy)
+                           .getInstr();
+
+  // Emit the associated store instruction.
+  MachineInstrBuilder Store = BuildMI(MBB, MBBI, DL, TII->get(StoreOpc))
+                                  .add(MI.getOperand(0))
+                                  .add(MI.getOperand(1));
+
+  // Relaxed uses base+imm addressing with a zero offset.
+  if (IsRelaxed)
+    Store.addImm(0);
+
+  // Preserve memory operands and any implicit uses/defs.
+  Store->setMemRefs(*MBB.getParent(), MI.memoperands());
+  transferImpOps(MI, Store, Store);
+
+  // Bundle the hint and store so they remain adjacent.
+  finalizeBundle(MBB, Hint->getIterator(), std::next(Store->getIterator()));
+
+  MI.eraseFromParent();
+  return true;
+}
+
 AArch64ExpandPseudo::ConditionalBlocks
 AArch64ExpandPseudo::expandConditionalPseudo(MachineBasicBlock &MBB,
                                              MachineBasicBlock::iterator MBBI,
@@ -1696,6 +1753,11 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock 
&MBB,
      return expandCALL_BTI(MBB, MBBI);
    case AArch64::StoreSwiftAsyncContext:
      return expandStoreSwiftAsyncContext(MBB, MBBI);
+   case AArch64::STSHH_ATOMIC_STORE_B:
+   case AArch64::STSHH_ATOMIC_STORE_H:
+   case AArch64::STSHH_ATOMIC_STORE_W:
+   case AArch64::STSHH_ATOMIC_STORE_X:
+     return expandSTSHHAtomicStore(MBB, MBBI);
    case AArch64::RestoreZAPseudo:
    case AArch64::CommitZASavePseudo:
    case AArch64::MSRpstatePseudo: {
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp 
b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 9d68d9b34e962..32ece2713e3c4 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -6356,6 +6356,88 @@ SDValue 
AArch64TargetLowering::LowerINTRINSIC_VOID(SDValue Op,
                        Op.getOperand(0),                        // Chain
                        DAG.getTargetConstant(24, DL, MVT::i32), // Rt
                        Op.getOperand(2));                       // Addr
+  case Intrinsic::aarch64_stshh: {
+    SDValue Chain = Op.getOperand(0);
+    auto *PolicyC = cast<ConstantSDNode>(Op.getOperand(2));
+    SDValue Policy =
+        DAG.getTargetConstant(PolicyC->getZExtValue(), DL, MVT::i32);
+    SDValue Ops[] = {Policy, Chain};
+    MachineSDNode *N = DAG.getMachineNode(AArch64::STSHH, DL, MVT::Other, Ops);
+    return SDValue(N, 0);
+  }
+  case Intrinsic::aarch64_stshh_atomic_store: {
+    SDValue Chain = Op.getOperand(0);
+    SDValue Ptr = Op.getOperand(2);
+    SDValue Val = Op.getOperand(3);
+    auto *OrderC = cast<ConstantSDNode>(Op.getOperand(4));
+    auto *PolicyC = cast<ConstantSDNode>(Op.getOperand(5));
+    uint64_t OrderVal = OrderC->getZExtValue();
+
+    unsigned SizeBits = Val.getValueType().getSizeInBits();
+    if (SizeBits < 8)
+      SizeBits = 8;
+    unsigned PseudoOpc = 0;
+    // Select pseudo opcode based on value size.
+    switch (SizeBits) {
+    case 8:
+      PseudoOpc = AArch64::STSHH_ATOMIC_STORE_B;
+      break;
+    case 16:
+      PseudoOpc = AArch64::STSHH_ATOMIC_STORE_H;
+      break;
+    case 32:
+      PseudoOpc = AArch64::STSHH_ATOMIC_STORE_W;
+      break;
+    case 64:
+      PseudoOpc = AArch64::STSHH_ATOMIC_STORE_X;
+      break;
+    default:
+      llvm_unreachable("Unexpected STSHH atomic store size");
+    }
+
+    // Extend or truncate value to expected store width
+    if (SizeBits <= 32)
+      Val = DAG.getAnyExtOrTrunc(Val, DL, MVT::i32);
+    else
+      Val = DAG.getAnyExtOrTrunc(Val, DL, MVT::i64);
+
+    SDValue Order = DAG.getTargetConstant(OrderVal, DL, MVT::i32);
+    SDValue Policy =
+        DAG.getTargetConstant(PolicyC->getZExtValue(), DL, MVT::i32);
+
+    // Build pseudo which expands to STSHH + atomic store.
+    SDValue Ops[] = {Val, Ptr, Order, Policy, Chain};
+    MachineSDNode *N = DAG.getMachineNode(PseudoOpc, DL, MVT::Other, Ops);
+
+    // Select correct memory ordering for the store
+    AtomicOrdering Ordering;
+    switch (OrderVal) {
+    case 0: // __ATOMIC_RELAXED
+      Ordering = AtomicOrdering::Monotonic;
+      break;
+    case 3: // __ATOMIC_RELEASE
+      Ordering = AtomicOrdering::Release;
+      break;
+    case 5: // __ATOMIC_SEQ_CST
+      Ordering = AtomicOrdering::SequentiallyConsistent;
+      break;
+    default:
+      llvm_unreachable("Unexpected memory order for STSHH atomic store");
+    }
+
+    LLVMContext &Ctx = *DAG.getContext();
+    EVT MemVT = EVT::getIntegerVT(Ctx, SizeBits);
+    Type *MemTy = MemVT.getTypeForEVT(Ctx);
+    Align Alignment = DAG.getDataLayout().getABITypeAlign(MemTy);
+    uint64_t Size = MemVT.getStoreSize();
+
+    MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+        MachinePointerInfo(), MachineMemOperand::MOStore, Size, Alignment,
+        AAMDNodes(), nullptr, SyncScope::System, Ordering);
+
+    DAG.setNodeMemRefs(N, {MMO});
+    return SDValue(N, 0);
+  }
   case Intrinsic::aarch64_sme_str:
   case Intrinsic::aarch64_sme_ldr: {
     return LowerSMELdrStr(Op, DAG, IntNo == Intrinsic::aarch64_sme_ldr);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td 
b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 2c5f745847f78..2bb7f6d8a27c6 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1183,6 +1183,11 @@ def imm0_7 : Operand<i64>, ImmLeaf<i64, [{
   let ParserMatchClass = Imm0_7Operand;
 }
 
+// imm0_7_i32 predicate - True if the immediate is in the range [0,7]
+def imm0_7_i32 : Operand<i32>, ImmLeaf<i32, [{
+  return ((uint32_t)Imm) < 8;
+}]>;
+
 // imm0_3 predicate - True if the immediate is in the range [0,3]
 def imm0_3 : Operand<i64>, ImmLeaf<i64, [{
   return ((uint64_t)Imm) < 4;
@@ -1864,17 +1869,17 @@ def PHintInstOperand : AsmOperandClass {
     let ParserMethod = "tryParsePHintInstOperand";
 }
 
-def phint_op : Operand<i64> {
+def phint_op : Operand<i32> {
     let ParserMatchClass = PHintInstOperand;
     let PrintMethod = "printPHintOp";
     let OperandType = "OPERAND_IMMEDIATE";
-    let MIOperandInfo = (ops i64imm);
+    let MIOperandInfo = (ops i32imm);
     let DecoderMethod = "DecodeUImm<3>";
 }
 
 class STSHHI
     : SimpleSystemI<0, (ins phint_op:$policy), "stshh", "\t$policy",
-                    [(int_aarch64_stshh (i64 imm0_7:$policy))]>,
+                    [(int_aarch64_stshh (i32 imm0_7_i32:$policy))]>,
       Sched<[WriteHint]> {
   bits<3> policy;
   // NOTE: ideally, this would have mayLoad = 0, mayStore = 0, but we cannot
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td 
b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 8329f00b13f03..4726673cc2988 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -1577,6 +1577,21 @@ def : InstAlias<"nop", (NOP)>;
 
 def STSHH: STSHHI;
 
+let hasSideEffects = 1, mayStore = 1, isPseudo = 1, isCodeGenOnly = 1 in {
+def STSHH_ATOMIC_STORE_B
+    : Pseudo<(outs), (ins GPR32:$val, GPR64sp:$addr, i32imm:$order,
+                          i64imm:$policy), []>, Sched<[]>;
+def STSHH_ATOMIC_STORE_H
+    : Pseudo<(outs), (ins GPR32:$val, GPR64sp:$addr, i32imm:$order,
+                          i64imm:$policy), []>, Sched<[]>;
+def STSHH_ATOMIC_STORE_W
+    : Pseudo<(outs), (ins GPR32:$val, GPR64sp:$addr, i32imm:$order,
+                          i64imm:$policy), []>, Sched<[]>;
+def STSHH_ATOMIC_STORE_X
+    : Pseudo<(outs), (ins GPR64:$val, GPR64sp:$addr, i32imm:$order,
+                          i64imm:$policy), []>, Sched<[]>;
+}
+
 // In order to be able to write readable assembly, LLVM should accept assembly
 // inputs that use Branch Target Identification mnemonics, even with BTI 
disabled.
 // However, in order to be compatible with other assemblers (e.g. GAS), LLVM
diff --git a/llvm/test/CodeGen/AArch64/pcdphint-atomic-store.ll 
b/llvm/test/CodeGen/AArch64/pcdphint-atomic-store.ll
index f6e6b1838fa5d..06affdf5ff650 100644
--- a/llvm/test/CodeGen/AArch64/pcdphint-atomic-store.ll
+++ b/llvm/test/CodeGen/AArch64/pcdphint-atomic-store.ll
@@ -1,14 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 6
 ; RUN: llc -mtriple=aarch64 -mattr=+v9.6a < %s | FileCheck %s
 
-declare void @llvm.aarch64.stshh(i64)
-
 define void @test_keep() {
 ; CHECK-LABEL: test_keep:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stshh keep
 ; CHECK-NEXT:    ret
-  call void @llvm.aarch64.stshh(i64 0)
+  call void @llvm.aarch64.stshh(i32 0)
   ret void
 }
 
@@ -17,6 +15,6 @@ define void @test_strm() {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stshh strm
 ; CHECK-NEXT:    ret
-  call void @llvm.aarch64.stshh(i64 1)
+  call void @llvm.aarch64.stshh(i32 1)
   ret void
 }

>From 101d95e711a1830fb9d4718df812661c7976bf1e Mon Sep 17 00:00:00 2001
From: Jonathan Thackray <[email protected]>
Date: Mon, 23 Feb 2026 21:14:32 +0000
Subject: [PATCH 07/11] fixup! remove mayLoad/mayStore as suggested by Kerry

---
 llvm/include/llvm/IR/IntrinsicsAArch64.td      | 2 +-
 llvm/lib/Target/AArch64/AArch64InstrFormats.td | 5 -----
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td 
b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 52531eebef42a..f6abbb81b5071 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -64,7 +64,7 @@ def int_aarch64_frint64x
 def int_aarch64_hint : DefaultAttrsIntrinsic<[], [llvm_i32_ty]>;
 def int_aarch64_stshh
     : DefaultAttrsIntrinsic<[], [llvm_i32_ty],
-                            [IntrHasSideEffects, ImmArg<ArgIndex<0>>]>;
+                            [IntrNoMem, IntrHasSideEffects, 
ImmArg<ArgIndex<0>>]>;
 def int_aarch64_stshh_atomic_store
     : Intrinsic<[],
                 [llvm_anyptr_ty, llvm_anyint_ty, llvm_i32_ty, llvm_i32_ty],
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td 
b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 2bb7f6d8a27c6..749fd029180ae 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1882,11 +1882,6 @@ class STSHHI
                     [(int_aarch64_stshh (i32 imm0_7_i32:$policy))]>,
       Sched<[WriteHint]> {
   bits<3> policy;
-  // NOTE: ideally, this would have mayLoad = 0, mayStore = 0, but we cannot
-  // model patterns with sufficiently fine granularity.
-  let mayLoad = 1;
-  let mayStore = 1;
-  let hasSideEffects = 1;
   let Inst{20-12} = 0b000011001;
   let Inst{11-8} = 0b0110;
   let Inst{7-5} = policy;

>From 2a2f43368d52b6d37e1adfc263f25b1eed4e49b7 Mon Sep 17 00:00:00 2001
From: Jonathan Thackray <[email protected]>
Date: Tue, 24 Feb 2026 12:05:09 +0000
Subject: [PATCH 08/11] fixup! Fix issues Kerry raised in PR

---
 .../clang/Basic/DiagnosticSemaKinds.td        |  6 +---
 clang/lib/CodeGen/TargetBuiltins/ARM.cpp      | 17 +++-------
 clang/lib/Sema/SemaARM.cpp                    | 33 ++++++-------------
 .../test/Sema/AArch64/pcdphint-atomic-store.c | 25 ++++++++------
 4 files changed, 31 insertions(+), 50 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 9f5d755034b09..8f4cce3fb331d 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9554,11 +9554,7 @@ def err_arm_atomic_store_with_stshh_bad_type : Error<
   "address argument to '__arm_atomic_store_with_stshh' must be a pointer to an 
"
   "8,16,32, or 64-bit integer type (%0 invalid)">;
 def err_arm_atomic_store_with_stshh_bad_value_type : Error<
-  "value argument to '__arm_atomic_store_with_stshh' must be an integer of the 
"
-  "same size as the pointed-to type; expected %0 bits, got %1 bits">;
-def err_arm_atomic_store_with_stshh_bad_value_must_be_integer : Error<
-  "value argument to '__arm_atomic_store_with_stshh' must be an integer type "
-  "(%0 invalid)">;
+  "value argument to '__arm_atomic_store_with_stshh' must be %0; got %1">;
 def err_arm_atomic_store_with_stshh_bad_order : Error<
   "memory order argument to '__arm_atomic_store_with_stshh' must be one of "
   "__ATOMIC_RELAXED, __ATOMIC_RELEASE, or __ATOMIC_SEQ_CST">;
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp 
b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index c3fad2ddd7aee..5b28cd0774ca2 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -5275,8 +5275,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned 
BuiltinID,
   }
 
   if (BuiltinID == clang::AArch64::BI__builtin_arm_atomic_store_with_stshh) {
-    const Expr *Arg0 = E->getArg(0);
-    Value *StoreAddr = EmitScalarExpr(Arg0);
+    Value *StoreAddr = EmitScalarExpr(E->getArg(0));
     Value *StoreValue = EmitScalarExpr(E->getArg(1));
     Value *Order = EmitScalarExpr(E->getArg(2));
     Value *Policy = EmitScalarExpr(E->getArg(3));
@@ -5298,20 +5297,14 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned 
BuiltinID,
           "unexpected memory order for __arm_atomic_store_with_stshh");
     }
 
-    llvm::Value *OrderArg =
-        llvm::ConstantInt::get(Int32Ty, OrderC->getZExtValue());
-    llvm::Value *PolicyArg =
-        llvm::ConstantInt::get(Int32Ty, PolicyC->getZExtValue());
-
-    llvm::Type *PtrTy = StoreAddr->getType();
-    llvm::Type *ValTy = StoreValue->getType();
-
     Function *F =
-        CGM.getIntrinsic(Intrinsic::aarch64_stshh_atomic_store, {PtrTy, 
ValTy});
+        CGM.getIntrinsic(Intrinsic::aarch64_stshh_atomic_store,
+              {StoreAddr->getType(), StoreValue->getType()});
+
 
     // Emit a single intrinsic so backend can expand to STSHH followed by
     // atomic store, to guarantee STSHH immediately precedes store insn.
-    return Builder.CreateCall(F, {StoreAddr, StoreValue, OrderArg, PolicyArg});
+    return Builder.CreateCall(F, {StoreAddr, StoreValue, OrderC, PolicyC});
   }
 
   if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index 90285ccda49f4..561a8eebe4055 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -1174,20 +1174,13 @@ static bool 
CheckAArch64AtomicStoreWithStshhCall(SemaARM &S,
   if (ValRes.isInvalid())
     return true;
 
-  // Check if value is an integer type.
   Expr *ValArg = ValRes.get();
-  if (!ValArg->getType()->isIntegerType()) {
-    SemaRef.Diag(
-        Loc, diag::err_arm_atomic_store_with_stshh_bad_value_must_be_integer)
-        << ValArg->getType() << ValArg->getSourceRange();
-    return true;
-  }
+  QualType ValArgType = ValArg->getType().getUnqualifiedType();
 
-  // Value width must match the pointee width.
-  if (Context.getTypeSize(ValArg->getType()) != Bits) {
+  // Check value type and width
+  if (!Context.hasSameType(ValArgType, ValType)) {
     SemaRef.Diag(Loc, diag::err_arm_atomic_store_with_stshh_bad_value_type)
-        << Bits << Context.getTypeSize(ValArg->getType())
-        << ValArg->getSourceRange();
+        << ValType << ValArg->getType() << ValArg->getSourceRange();
     return true;
   }
 
@@ -1221,19 +1214,13 @@ static bool 
CheckAArch64AtomicStoreWithStshhCall(SemaARM &S,
     return true;
   }
 
-  // Validate order here; the value is mapped to LLVM ordering in codegen.
-  llvm::APSInt OrderVal = *OrderValOpt;
-  int64_t Order = OrderVal.getSExtValue();
+  llvm::APSInt OrderVal;
+  if (SemaRef.BuiltinConstantArg(TheCall, 2, OrderVal))
+    return true;
+
   // __ATOMIC_RELAXED=0, __ATOMIC_RELEASE=3, __ATOMIC_SEQ_CST=5.
-  constexpr int64_t AtomicRelaxed = 0;
-  constexpr int64_t AtomicRelease = 3;
-  constexpr int64_t AtomicSeqCst = 5;
-  switch (Order) {
-  case AtomicRelaxed:
-  case AtomicRelease:
-  case AtomicSeqCst:
-    break;
-  default:
+  int64_t Order = OrderVal.getSExtValue();
+  if (Order != 0 && Order != 3 && Order != 5) {
     SemaRef.Diag(Loc, diag::err_arm_atomic_store_with_stshh_bad_order)
         << OrderArg->getSourceRange();
     return true;
diff --git a/clang/test/Sema/AArch64/pcdphint-atomic-store.c 
b/clang/test/Sema/AArch64/pcdphint-atomic-store.c
index 9ca2c0e8f9172..fee7bc9e008b1 100644
--- a/clang/test/Sema/AArch64/pcdphint-atomic-store.c
+++ b/clang/test/Sema/AArch64/pcdphint-atomic-store.c
@@ -2,6 +2,15 @@
 
 #include <arm_acle.h>
 
+void test_signed_ok(int *p, int v) {
+  __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
+}
+
+void test_invalid_retention_policy(unsigned int *p, unsigned int v) {
+  __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 2);
+  // expected-error@-1 {{argument value 2 is outside the valid range [0, 1]}}
+}
+
 void test_const_pointer(const unsigned int *p, unsigned int v) {
   __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
   // expected-error@-1 {{address argument to atomic builtin cannot be 
const-qualified}}
@@ -22,21 +31,17 @@ void test_invalid_memory_order(unsigned int *p, unsigned 
int v) {
   // expected-error@-1 {{memory order argument to 
'__arm_atomic_store_with_stshh' must be one of __ATOMIC_RELAXED, 
__ATOMIC_RELEASE, or __ATOMIC_SEQ_CST}}
 }
 
-void test_invalid_retention_policy(unsigned int *p, unsigned int v) {
-  __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 2);
-  // expected-error@-1 {{argument value 2 is outside the valid range [0, 1]}}
-}
-
-void test_signed_ok(int *p, int v) {
+void test_value_size_mismatch(int *p, short v) {
   __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
+  // expected-error@-1 {{value argument to '__arm_atomic_store_with_stshh' 
must be 'int'; got 'short'}}
 }
 
-void test_value_size_mismatch(int *p, short v) {
+void test_non_integer_value(int *p, float v) {
   __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
-  // expected-error@-1 {{value argument to '__arm_atomic_store_with_stshh' 
must be an integer of the same size as the pointed-to type; expected 32 bits, 
got 16 bits}}
+  // expected-error@-1 {{value argument to '__arm_atomic_store_with_stshh' 
must be 'int'; got 'float'}}
 }
 
-void test_non_integer_value(int *p, float v) {
+void test_value_i128_mismatch(int *p, __int128 v) {
   __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
-  // expected-error@-1 {{value argument to '__arm_atomic_store_with_stshh' 
must be an integer type ('float' invalid)}}
+  // expected-error@-1 {{value argument to '__arm_atomic_store_with_stshh' 
must be 'int'; got '__int128'}}
 }

>From d81c6b5abd1343ac6f85d4ff36a9d1a5056b905a Mon Sep 17 00:00:00 2001
From: Jonathan Thackray <[email protected]>
Date: Tue, 24 Feb 2026 14:02:09 +0000
Subject: [PATCH 09/11] fixup! Fix more PR comments

---
 clang/lib/CodeGen/TargetBuiltins/ARM.cpp      |  3 +-
 clang/test/CodeGen/arm_acle.c                 | 10 +++++++
 clang/test/CodeGen/builtins-arm64.c           |  5 ++++
 .../test/Sema/AArch64/pcdphint-atomic-store.c | 28 +++++++++++++------
 llvm/include/llvm/IR/IntrinsicsAArch64.td     |  4 ---
 .../Target/AArch64/AArch64ISelLowering.cpp    |  9 ------
 .../lib/Target/AArch64/AArch64InstrFormats.td |  3 +-
 .../CodeGen/AArch64/pcdphint-atomic-store.ll  | 14 ++++++----
 8 files changed, 44 insertions(+), 32 deletions(-)

diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp 
b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 5b28cd0774ca2..0d4a64bf83fcc 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -5299,8 +5299,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned 
BuiltinID,
 
     Function *F =
         CGM.getIntrinsic(Intrinsic::aarch64_stshh_atomic_store,
-              {StoreAddr->getType(), StoreValue->getType()});
-
+                         {StoreAddr->getType(), StoreValue->getType()});
 
     // Emit a single intrinsic so backend can expand to STSHH followed by
     // atomic store, to guarantee STSHH immediately precedes store insn.
diff --git a/clang/test/CodeGen/arm_acle.c b/clang/test/CodeGen/arm_acle.c
index b053778581134..d902eb08a9cc7 100644
--- a/clang/test/CodeGen/arm_acle.c
+++ b/clang/test/CodeGen/arm_acle.c
@@ -1822,4 +1822,14 @@ int test_rndrrs(uint64_t *__addr) {
 }
 #endif
 
+#if defined(__ARM_64BIT_STATE)
 
+// AArch64-LABEL: @test_stshh_atomic_store(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    call void @llvm.aarch64.stshh.atomic.store.p0.i32(ptr %p, 
i32 %v, i32 0, i32 0)
+// AArch64-NEXT:    ret void
+//
+void test_stshh_atomic_store(int *p, int v) {
+  __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
+}
+#endif
diff --git a/clang/test/CodeGen/builtins-arm64.c 
b/clang/test/CodeGen/builtins-arm64.c
index 3d054c79f1777..6a1cb845e41fa 100644
--- a/clang/test/CodeGen/builtins-arm64.c
+++ b/clang/test/CodeGen/builtins-arm64.c
@@ -39,6 +39,11 @@ void hints(void) {
   __builtin_arm_sevl();   //CHECK: call {{.*}} @llvm.aarch64.hint(i32 5)
 }
 
+void stshh_atomic_store(int *p, int v) {
+  __builtin_arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
+  // CHECK: call void @llvm.aarch64.stshh.atomic.store.p0.i32(ptr {{.*}}, i32 
{{.*}}, i32 0, i32 0)
+}
+
 void barriers(void) {
   __builtin_arm_dmb(1);  //CHECK: call {{.*}} @llvm.aarch64.dmb(i32 1)
   __builtin_arm_dsb(2);  //CHECK: call {{.*}} @llvm.aarch64.dsb(i32 2)
diff --git a/clang/test/Sema/AArch64/pcdphint-atomic-store.c 
b/clang/test/Sema/AArch64/pcdphint-atomic-store.c
index fee7bc9e008b1..e3314b84c7a1c 100644
--- a/clang/test/Sema/AArch64/pcdphint-atomic-store.c
+++ b/clang/test/Sema/AArch64/pcdphint-atomic-store.c
@@ -3,45 +3,55 @@
 #include <arm_acle.h>
 
 void test_signed_ok(int *p, int v) {
-  __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
+  __builtin_arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
 }
 
 void test_invalid_retention_policy(unsigned int *p, unsigned int v) {
-  __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 2);
+  __builtin_arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 2);
   // expected-error@-1 {{argument value 2 is outside the valid range [0, 1]}}
 }
 
 void test_const_pointer(const unsigned int *p, unsigned int v) {
-  __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
+  __builtin_arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
   // expected-error@-1 {{address argument to atomic builtin cannot be 
const-qualified}}
 }
 
 void test_non_integer_pointer(float *p, float v) {
-  __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
+  __builtin_arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
   // expected-error@-1 {{address argument to '__arm_atomic_store_with_stshh' 
must be a pointer to an 8,16,32, or 64-bit integer type}}
 }
 
 void test_invalid_bit_width(__int128 *p, __int128 v) {
-  __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
+  __builtin_arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
   // expected-error@-1 {{address argument to '__arm_atomic_store_with_stshh' 
must be a pointer to an 8,16,32, or 64-bit integer type}}
 }
 
 void test_invalid_memory_order(unsigned int *p, unsigned int v) {
-  __arm_atomic_store_with_stshh(p, v, __ATOMIC_ACQUIRE, 0);
+  __builtin_arm_atomic_store_with_stshh(p, v, __ATOMIC_ACQUIRE, 0);
   // expected-error@-1 {{memory order argument to 
'__arm_atomic_store_with_stshh' must be one of __ATOMIC_RELAXED, 
__ATOMIC_RELEASE, or __ATOMIC_SEQ_CST}}
 }
 
 void test_value_size_mismatch(int *p, short v) {
-  __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
+  __builtin_arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
   // expected-error@-1 {{value argument to '__arm_atomic_store_with_stshh' 
must be 'int'; got 'short'}}
 }
 
 void test_non_integer_value(int *p, float v) {
-  __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
+  __builtin_arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
   // expected-error@-1 {{value argument to '__arm_atomic_store_with_stshh' 
must be 'int'; got 'float'}}
 }
 
+void test_too_few_args(int *p, int v) {
+  __builtin_arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED);
+  // expected-error@-1 {{too few arguments to function call, expected 4, have 
3}}
+}
+
+void test_too_many_args(int *p, int v) {
+  __builtin_arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0, 1);
+  // expected-error@-1 {{too many arguments to function call, expected 4, have 
5}}
+}
+
 void test_value_i128_mismatch(int *p, __int128 v) {
-  __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
+  __builtin_arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
   // expected-error@-1 {{value argument to '__arm_atomic_store_with_stshh' 
must be 'int'; got '__int128'}}
 }
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td 
b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index f6abbb81b5071..a8ec48da7a8b4 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -62,9 +62,6 @@ def int_aarch64_frint64x
 // HINT
 
 def int_aarch64_hint : DefaultAttrsIntrinsic<[], [llvm_i32_ty]>;
-def int_aarch64_stshh
-    : DefaultAttrsIntrinsic<[], [llvm_i32_ty],
-                            [IntrNoMem, IntrHasSideEffects, 
ImmArg<ArgIndex<0>>]>;
 def int_aarch64_stshh_atomic_store
     : Intrinsic<[],
                 [llvm_anyptr_ty, llvm_anyint_ty, llvm_i32_ty, llvm_i32_ty],
@@ -4295,4 +4292,3 @@ let TargetPrefix = "aarch64" in {
   def int_aarch64_sve_pmlal_pair_x2 : DefaultAttrsIntrinsic<[llvm_nxv2i64_ty, 
llvm_nxv2i64_ty],
       [llvm_nxv2i64_ty, llvm_nxv2i64_ty, llvm_nxv2i64_ty, llvm_nxv2i64_ty], 
[IntrNoMem]>;
 }
-
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp 
b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 32ece2713e3c4..1014bb5025c71 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -6356,15 +6356,6 @@ SDValue 
AArch64TargetLowering::LowerINTRINSIC_VOID(SDValue Op,
                        Op.getOperand(0),                        // Chain
                        DAG.getTargetConstant(24, DL, MVT::i32), // Rt
                        Op.getOperand(2));                       // Addr
-  case Intrinsic::aarch64_stshh: {
-    SDValue Chain = Op.getOperand(0);
-    auto *PolicyC = cast<ConstantSDNode>(Op.getOperand(2));
-    SDValue Policy =
-        DAG.getTargetConstant(PolicyC->getZExtValue(), DL, MVT::i32);
-    SDValue Ops[] = {Policy, Chain};
-    MachineSDNode *N = DAG.getMachineNode(AArch64::STSHH, DL, MVT::Other, Ops);
-    return SDValue(N, 0);
-  }
   case Intrinsic::aarch64_stshh_atomic_store: {
     SDValue Chain = Op.getOperand(0);
     SDValue Ptr = Op.getOperand(2);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td 
b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 749fd029180ae..86af20f0a52e3 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1878,8 +1878,7 @@ def phint_op : Operand<i32> {
 }
 
 class STSHHI
-    : SimpleSystemI<0, (ins phint_op:$policy), "stshh", "\t$policy",
-                    [(int_aarch64_stshh (i32 imm0_7_i32:$policy))]>,
+    : SimpleSystemI<0, (ins phint_op:$policy), "stshh", "\t$policy">,
       Sched<[WriteHint]> {
   bits<3> policy;
   let Inst{20-12} = 0b000011001;
diff --git a/llvm/test/CodeGen/AArch64/pcdphint-atomic-store.ll 
b/llvm/test/CodeGen/AArch64/pcdphint-atomic-store.ll
index 06affdf5ff650..60bbbbbcd7058 100644
--- a/llvm/test/CodeGen/AArch64/pcdphint-atomic-store.ll
+++ b/llvm/test/CodeGen/AArch64/pcdphint-atomic-store.ll
@@ -1,20 +1,22 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 6
 ; RUN: llc -mtriple=aarch64 -mattr=+v9.6a < %s | FileCheck %s
 
-define void @test_keep() {
-; CHECK-LABEL: test_keep:
+define void @test_keep_relaxed(ptr %p, i32 %v) {
+; CHECK-LABEL: test_keep_relaxed:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    str w1, [x0]
 ; CHECK-NEXT:    ret
-  call void @llvm.aarch64.stshh(i32 0)
+  call void @llvm.aarch64.stshh.atomic.store.p0.i32(ptr %p, i32 %v, i32 0, i32 
0)
   ret void
 }
 
-define void @test_strm() {
-; CHECK-LABEL: test_strm:
+define void @test_strm_release(ptr %p, i32 %v) {
+; CHECK-LABEL: test_strm_release:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    stlr w1, [x0]
 ; CHECK-NEXT:    ret
-  call void @llvm.aarch64.stshh(i32 1)
+  call void @llvm.aarch64.stshh.atomic.store.p0.i32(ptr %p, i32 %v, i32 3, i32 
1)
   ret void
 }

>From 4720046e6302a768b401f64df3dda20c2b390e5a Mon Sep 17 00:00:00 2001
From: Jonathan Thackray <[email protected]>
Date: Tue, 24 Feb 2026 20:46:29 +0000
Subject: [PATCH 10/11] fixup! Address more helpful review comments from Kerry

---
 clang/lib/Sema/SemaARM.cpp                    |   5 +-
 .../lib/Target/AArch64/AArch64InstrFormats.td |   5 -
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |   8 +-
 .../CodeGen/AArch64/pcdphint-atomic-store.ll  | 160 ++++++++++++++++++
 4 files changed, 165 insertions(+), 13 deletions(-)

diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index 561a8eebe4055..1d22abff33d48 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -1227,10 +1227,7 @@ static bool CheckAArch64AtomicStoreWithStshhCall(SemaARM 
&S,
   }
 
   // Arg 3 (retention policy) must be between KEEP(0) and STRM(1).
-  if (SemaRef.BuiltinConstantArgRange(TheCall, 3, 0, 1))
-    return true;
-
-  return false;
+  return SemaRef.BuiltinConstantArgRange(TheCall, 3, 0, 1);
 }
 
 bool SemaARM::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td 
b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 86af20f0a52e3..8f14ddd372844 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1183,11 +1183,6 @@ def imm0_7 : Operand<i64>, ImmLeaf<i64, [{
   let ParserMatchClass = Imm0_7Operand;
 }
 
-// imm0_7_i32 predicate - True if the immediate is in the range [0,7]
-def imm0_7_i32 : Operand<i32>, ImmLeaf<i32, [{
-  return ((uint32_t)Imm) < 8;
-}]>;
-
 // imm0_3 predicate - True if the immediate is in the range [0,3]
 def imm0_3 : Operand<i64>, ImmLeaf<i64, [{
   return ((uint64_t)Imm) < 4;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td 
b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 4726673cc2988..8f3e83d65474c 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -1580,16 +1580,16 @@ def STSHH: STSHHI;
 let hasSideEffects = 1, mayStore = 1, isPseudo = 1, isCodeGenOnly = 1 in {
 def STSHH_ATOMIC_STORE_B
     : Pseudo<(outs), (ins GPR32:$val, GPR64sp:$addr, i32imm:$order,
-                          i64imm:$policy), []>, Sched<[]>;
+                          i32imm:$policy), []>, Sched<[]>;
 def STSHH_ATOMIC_STORE_H
     : Pseudo<(outs), (ins GPR32:$val, GPR64sp:$addr, i32imm:$order,
-                          i64imm:$policy), []>, Sched<[]>;
+                          i32imm:$policy), []>, Sched<[]>;
 def STSHH_ATOMIC_STORE_W
     : Pseudo<(outs), (ins GPR32:$val, GPR64sp:$addr, i32imm:$order,
-                          i64imm:$policy), []>, Sched<[]>;
+                          i32imm:$policy), []>, Sched<[]>;
 def STSHH_ATOMIC_STORE_X
     : Pseudo<(outs), (ins GPR64:$val, GPR64sp:$addr, i32imm:$order,
-                          i64imm:$policy), []>, Sched<[]>;
+                          i32imm:$policy), []>, Sched<[]>;
 }
 
 // In order to be able to write readable assembly, LLVM should accept assembly
diff --git a/llvm/test/CodeGen/AArch64/pcdphint-atomic-store.ll 
b/llvm/test/CodeGen/AArch64/pcdphint-atomic-store.ll
index 60bbbbbcd7058..67b89112f91a7 100644
--- a/llvm/test/CodeGen/AArch64/pcdphint-atomic-store.ll
+++ b/llvm/test/CodeGen/AArch64/pcdphint-atomic-store.ll
@@ -20,3 +20,163 @@ define void @test_strm_release(ptr %p, i32 %v) {
   call void @llvm.aarch64.stshh.atomic.store.p0.i32(ptr %p, i32 %v, i32 3, i32 
1)
   ret void
 }
+
+define void @test_keep_i8(ptr %p, i8 %v) {
+; CHECK-LABEL: test_keep_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    strb w1, [x0]
+; CHECK-NEXT:    ret
+  call void @llvm.aarch64.stshh.atomic.store.p0.i8(ptr %p, i8 %v, i32 0, i32 0)
+  ret void
+}
+
+define void @test_keep_i16(ptr %p, i16 %v) {
+; CHECK-LABEL: test_keep_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    strh w1, [x0]
+; CHECK-NEXT:    ret
+  call void @llvm.aarch64.stshh.atomic.store.p0.i16(ptr %p, i16 %v, i32 0, i32 
0)
+  ret void
+}
+
+define void @test_keep_i32(ptr %p, i32 %v) {
+; CHECK-LABEL: test_keep_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    str w1, [x0]
+; CHECK-NEXT:    ret
+  call void @llvm.aarch64.stshh.atomic.store.p0.i32(ptr %p, i32 %v, i32 0, i32 
0)
+  ret void
+}
+
+define void @test_keep_i64(ptr %p, i64 %v) {
+; CHECK-LABEL: test_keep_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    str x1, [x0]
+; CHECK-NEXT:    ret
+  call void @llvm.aarch64.stshh.atomic.store.p0.i64(ptr %p, i64 %v, i32 0, i32 
0)
+  ret void
+}
+
+define void @test_strm_i8(ptr %p, i8 %v) {
+; CHECK-LABEL: test_strm_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    strb w1, [x0]
+; CHECK-NEXT:    ret
+  call void @llvm.aarch64.stshh.atomic.store.p0.i8(ptr %p, i8 %v, i32 0, i32 1)
+  ret void
+}
+
+define void @test_strm_i16(ptr %p, i16 %v) {
+; CHECK-LABEL: test_strm_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    strh w1, [x0]
+; CHECK-NEXT:    ret
+  call void @llvm.aarch64.stshh.atomic.store.p0.i16(ptr %p, i16 %v, i32 0, i32 
1)
+  ret void
+}
+
+define void @test_strm_i32(ptr %p, i32 %v) {
+; CHECK-LABEL: test_strm_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    str w1, [x0]
+; CHECK-NEXT:    ret
+  call void @llvm.aarch64.stshh.atomic.store.p0.i32(ptr %p, i32 %v, i32 0, i32 
1)
+  ret void
+}
+
+define void @test_strm_i64(ptr %p, i64 %v) {
+; CHECK-LABEL: test_strm_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    str x1, [x0]
+; CHECK-NEXT:    ret
+  call void @llvm.aarch64.stshh.atomic.store.p0.i64(ptr %p, i64 %v, i32 0, i32 
1)
+  ret void
+}
+
+define void @test_strm_release_i8(ptr %p, i8 %v) {
+; CHECK-LABEL: test_strm_release_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    stlrb w1, [x0]
+; CHECK-NEXT:    ret
+  call void @llvm.aarch64.stshh.atomic.store.p0.i8(ptr %p, i8 %v, i32 3, i32 1)
+  ret void
+}
+
+define void @test_strm_release_i16(ptr %p, i16 %v) {
+; CHECK-LABEL: test_strm_release_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    stlrh w1, [x0]
+; CHECK-NEXT:    ret
+  call void @llvm.aarch64.stshh.atomic.store.p0.i16(ptr %p, i16 %v, i32 3, i32 
1)
+  ret void
+}
+
+define void @test_strm_release_i32(ptr %p, i32 %v) {
+; CHECK-LABEL: test_strm_release_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    stlr w1, [x0]
+; CHECK-NEXT:    ret
+  call void @llvm.aarch64.stshh.atomic.store.p0.i32(ptr %p, i32 %v, i32 3, i32 
1)
+  ret void
+}
+
+define void @test_strm_release_i64(ptr %p, i64 %v) {
+; CHECK-LABEL: test_strm_release_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    stlr x1, [x0]
+; CHECK-NEXT:    ret
+  call void @llvm.aarch64.stshh.atomic.store.p0.i64(ptr %p, i64 %v, i32 3, i32 
1)
+  ret void
+}
+
+define void @test_strm_seqcst_i8(ptr %p, i8 %v) {
+; CHECK-LABEL: test_strm_seqcst_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    stlrb w1, [x0]
+; CHECK-NEXT:    ret
+  call void @llvm.aarch64.stshh.atomic.store.p0.i8(ptr %p, i8 %v, i32 5, i32 1)
+  ret void
+}
+
+define void @test_strm_seqcst_i16(ptr %p, i16 %v) {
+; CHECK-LABEL: test_strm_seqcst_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    stlrh w1, [x0]
+; CHECK-NEXT:    ret
+  call void @llvm.aarch64.stshh.atomic.store.p0.i16(ptr %p, i16 %v, i32 5, i32 
1)
+  ret void
+}
+
+define void @test_strm_seqcst_i32(ptr %p, i32 %v) {
+; CHECK-LABEL: test_strm_seqcst_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    stlr w1, [x0]
+; CHECK-NEXT:    ret
+  call void @llvm.aarch64.stshh.atomic.store.p0.i32(ptr %p, i32 %v, i32 5, i32 
1)
+  ret void
+}
+
+define void @test_strm_seqcst_i64(ptr %p, i64 %v) {
+; CHECK-LABEL: test_strm_seqcst_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    stlr x1, [x0]
+; CHECK-NEXT:    ret
+  call void @llvm.aarch64.stshh.atomic.store.p0.i64(ptr %p, i64 %v, i32 5, i32 
1)
+  ret void
+}

>From 50b13c3df0df0afbb277b83754515d168b8dfb97 Mon Sep 17 00:00:00 2001
From: Jonathan Thackray <[email protected]>
Date: Tue, 24 Feb 2026 23:57:51 +0000
Subject: [PATCH 11/11] fixup! Fix tests

---
 llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp 
b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 1014bb5025c71..342b4b54c89fe 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2118,6 +2118,8 @@ AArch64TargetLowering::AArch64TargetLowering(const 
TargetMachine &TM,
     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
   }
 
+  setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
+  setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
 
   if (Subtarget->hasSVE()) {

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to