serge-sans-paille updated this revision to Diff 224254.
serge-sans-paille added a comment.

Added documentation + release note entry


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D68720/new/

https://reviews.llvm.org/D68720

Files:
  clang/docs/ReleaseNotes.rst
  clang/include/clang/Basic/CodeGenOptions.def
  clang/include/clang/Basic/DiagnosticFrontendKinds.td
  clang/include/clang/Basic/TargetInfo.h
  clang/include/clang/Driver/CC1Options.td
  clang/include/clang/Driver/Options.td
  clang/lib/Basic/Targets/X86.h
  clang/lib/CodeGen/CGStmt.cpp
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Frontend/CompilerInvocation.cpp
  clang/test/CodeGen/stack-clash-protection.c
  clang/test/Driver/stack-clash-protection.c
  llvm/docs/ReleaseNotes.rst
  llvm/lib/Target/X86/X86FrameLowering.cpp
  llvm/lib/Target/X86/X86FrameLowering.h
  llvm/lib/Target/X86/X86ISelLowering.cpp
  llvm/lib/Target/X86/X86ISelLowering.h
  llvm/lib/Target/X86/X86InstrCompiler.td
  llvm/lib/Target/X86/X86InstrInfo.td
  llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
  llvm/test/CodeGen/X86/stack-clash-medium-natural-probes.ll
  llvm/test/CodeGen/X86/stack-clash-medium.ll
  llvm/test/CodeGen/X86/stack-clash-small.ll

Index: llvm/test/CodeGen/X86/stack-clash-small.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/X86/stack-clash-small.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s | FileCheck %s
+
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK-LABEL: foo:
+define i32 @foo() local_unnamed_addr #0 {
+; only one stack growth
+; CHECK: subq
+; CHECK-NOT: subq
+  %a = alloca i32, i64 100, align 16
+  %b = getelementptr inbounds i32, i32* %a, i64 98
+  store volatile i32 1, i32* %b
+  %c = load volatile i32, i32* %a
+  ret i32 %c
+}
+
+attributes #0 =  {"probe-stack"="inline-asm"}
Index: llvm/test/CodeGen/X86/stack-clash-medium.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/X86/stack-clash-medium.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s | FileCheck %s
+
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK-LABEL: foo:
+define i32 @foo() local_unnamed_addr #0 {
+; two stack growth, with a natural probe inbetween and an extra probe afterward
+; CHECK: subq
+; CHECK: movl
+; CHECK: subq
+; CHECK: movq $0
+  %a = alloca i32, i64 2000, align 16
+  %b = getelementptr inbounds i32, i32* %a, i64 1198
+  store volatile i32 1, i32* %b
+  %c = load volatile i32, i32* %a
+  ret i32 %c
+}
+
+attributes #0 =  {"probe-stack"="inline-asm"}
Index: llvm/test/CodeGen/X86/stack-clash-medium-natural-probes.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/X86/stack-clash-medium-natural-probes.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s | FileCheck %s
+
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK-LABEL: foo:
+define i32 @foo() local_unnamed_addr #0 {
+; two stack growth, with an extra probe inbetween and a natural probe afterward
+; CHECK: subq
+; CHECK-NOT: movq $0
+; CHECK: mov
+; CHECK: subq
+; CHECK-NOT: movq $0
+; CHECK: mov
+  %a = alloca i32, i64 2000, align 16
+  %b0 = getelementptr inbounds i32, i32* %a, i64 98
+  %b1 = getelementptr inbounds i32, i32* %a, i64 1198
+  store volatile i32 1, i32* %b0
+  store volatile i32 1, i32* %b1
+  %c = load volatile i32, i32* %a
+  ret i32 %c
+}
+
+attributes #0 =  {"probe-stack"="inline-asm"}
Index: llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s | FileCheck %s
+
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK-LABEL: foo:
+define i32 @foo(i32 %n) local_unnamed_addr #0 {
+; CHECK: subq   $4096, %rsp
+; CHECK: movq   $0, (%rsp)
+  %a = alloca i32, i32 %n, align 16
+  %b = getelementptr inbounds i32, i32* %a, i64 1198
+  store volatile i32 1, i32* %b
+  %c = load volatile i32, i32* %a
+  ret i32 %c
+}
+
+attributes #0 =  {"probe-stack"="inline-asm"}
Index: llvm/lib/Target/X86/X86InstrInfo.td
===================================================================
--- llvm/lib/Target/X86/X86InstrInfo.td
+++ llvm/lib/Target/X86/X86InstrInfo.td
@@ -121,6 +121,8 @@
 
 def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
 
+def SDT_X86PROBED_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
+
 def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
 
 def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
@@ -292,6 +294,9 @@
 def X86SegAlloca : SDNode<"X86ISD::SEG_ALLOCA", SDT_X86SEG_ALLOCA,
                           [SDNPHasChain]>;
 
+def X86ProbedAlloca : SDNode<"X86ISD::PROBED_ALLOCA", SDT_X86PROBED_ALLOCA,
+                          [SDNPHasChain]>;
+
 def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL,
                         [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 
Index: llvm/lib/Target/X86/X86InstrCompiler.td
===================================================================
--- llvm/lib/Target/X86/X86InstrCompiler.td
+++ llvm/lib/Target/X86/X86InstrCompiler.td
@@ -111,6 +111,23 @@
                       [(set GR64:$dst,
                          (X86SegAlloca GR64:$size))]>,
                     Requires<[In64BitMode]>;
+
+// To protect against stack clash, dynamic allocation should perform a memory
+// probe at each page.
+
+let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
+def PROBED_ALLOCA_32 : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$size),
+                      "# variable sized alloca with probing",
+                      [(set GR32:$dst,
+                         (X86ProbedAlloca GR32:$size))]>,
+                    Requires<[NotLP64]>;
+
+let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in
+def PROBED_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size),
+                      "# variable sized alloca with probing",
+                      [(set GR64:$dst,
+                         (X86ProbedAlloca GR64:$size))]>,
+                    Requires<[In64BitMode]>;
 }
 
 // Dynamic stack allocation yields a _chkstk or _alloca call for all Windows
Index: llvm/lib/Target/X86/X86ISelLowering.h
===================================================================
--- llvm/lib/Target/X86/X86ISelLowering.h
+++ llvm/lib/Target/X86/X86ISelLowering.h
@@ -537,6 +537,10 @@
       // falls back to heap allocation if not.
       SEG_ALLOCA,
 
+      // For allocating stack space when using stack clash protector.
+      // Allocation is performed by block, and each block is probed.
+      PROBED_ALLOCA,
+
       // Memory barriers.
       MEMBARRIER,
       MFENCE,
@@ -1439,6 +1443,9 @@
     MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
                                             MachineBasicBlock *BB) const;
 
+    MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI,
+                                               MachineBasicBlock *BB) const;
+
     MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
                                           MachineBasicBlock *BB) const;
 
Index: llvm/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- llvm/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -22144,7 +22144,8 @@
                                            SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   bool SplitStack = MF.shouldSplitStack();
-  bool EmitStackProbe = !getStackProbeSymbolName(MF).empty();
+  const StringRef StackProbeSymbolName = getStackProbeSymbolName(MF);
+  bool EmitStackProbe = !StackProbeSymbolName.empty();
   bool Lower = (Subtarget.isOSWindows() && !Subtarget.isTargetMachO()) ||
                SplitStack || EmitStackProbe;
   SDLoc dl(Op);
@@ -22164,7 +22165,19 @@
   MVT SPTy = getPointerTy(DAG.getDataLayout());
 
   SDValue Result;
-  if (!Lower) {
+  const bool InlineStackClashProtector =
+      StackProbeSymbolName == "inline-asm" &&
+      !(Subtarget.isOSWindows() && !Subtarget.isTargetMachO());
+  if (InlineStackClashProtector) {
+    MachineRegisterInfo &MRI = MF.getRegInfo();
+
+    const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy);
+    Register Vreg = MRI.createVirtualRegister(AddrRegClass);
+    Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size);
+    Result = DAG.getNode(X86ISD::PROBED_ALLOCA, dl, SPTy, Chain,
+                         DAG.getRegister(Vreg, SPTy));
+
+  } else if (!Lower) {
     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
     unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
     assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
@@ -28702,6 +28715,8 @@
   case X86ISD::MEMBARRIER:         return "X86ISD::MEMBARRIER";
   case X86ISD::MFENCE:             return "X86ISD::MFENCE";
   case X86ISD::SEG_ALLOCA:         return "X86ISD::SEG_ALLOCA";
+  case X86ISD::PROBED_ALLOCA:
+    return "X86ISD::PROBED_ALLOCA";
   case X86ISD::SAHF:               return "X86ISD::SAHF";
   case X86ISD::RDRAND:             return "X86ISD::RDRAND";
   case X86ISD::RDSEED:             return "X86ISD::RDSEED";
@@ -29959,6 +29974,95 @@
 }
 
 MachineBasicBlock *
+X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI,
+                                           MachineBasicBlock *BB) const {
+  MachineFunction *MF = BB->getParent();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+  DebugLoc DL = MI.getDebugLoc();
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+
+  const bool Is64Bit = Subtarget.is64Bit();
+  const bool IsLP64 = Subtarget.isTarget64BitLP64();
+
+  const unsigned ProbeSize = getStackProbeSize(*MF);
+
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  MachineBasicBlock *testMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *tailMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *blockMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+
+  MachineFunction::iterator MBBIter = ++BB->getIterator();
+  MF->insert(MBBIter, testMBB);
+  MF->insert(MBBIter, blockMBB);
+  MF->insert(MBBIter, tailMBB);
+
+  unsigned sizeVReg = MI.getOperand(1).getReg();
+
+  const TargetRegisterClass *SizeRegClass = MRI.getRegClass(sizeVReg);
+
+  unsigned tmpSizeVReg = MRI.createVirtualRegister(SizeRegClass);
+  unsigned tmpSizeVReg2 = MRI.createVirtualRegister(SizeRegClass);
+
+  unsigned physSPReg =
+      IsLP64 || Subtarget.isTargetNaCl64() ? X86::RSP : X86::ESP;
+
+  // test rsp size
+  BuildMI(testMBB, DL, TII->get(X86::PHI), tmpSizeVReg)
+      .addReg(sizeVReg)
+      .addMBB(BB)
+      .addReg(tmpSizeVReg2)
+      .addMBB(blockMBB);
+
+  BuildMI(testMBB, DL, TII->get(IsLP64 ? X86::CMP64ri32 : X86::CMP32ri))
+      .addReg(tmpSizeVReg)
+      .addImm(ProbeSize);
+
+  BuildMI(testMBB, DL, TII->get(X86::JCC_1))
+      .addMBB(tailMBB)
+      .addImm(X86::COND_L);
+  testMBB->addSuccessor(blockMBB);
+  testMBB->addSuccessor(tailMBB);
+
+  // allocate a block and touch it
+
+  BuildMI(blockMBB, DL, TII->get(IsLP64 ? X86::SUB64ri32 : X86::SUB32ri),
+          tmpSizeVReg2)
+      .addReg(tmpSizeVReg)
+      .addImm(ProbeSize);
+
+  BuildMI(blockMBB, DL, TII->get(IsLP64 ? X86::SUB64ri32 : X86::SUB32ri),
+          physSPReg)
+      .addReg(physSPReg)
+      .addImm(ProbeSize);
+
+  const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
+  addRegOffset(BuildMI(blockMBB, DL, TII->get(MovMIOpc)), physSPReg, false, 0)
+      .addImm(0);
+
+  BuildMI(blockMBB, DL, TII->get(X86::JMP_1)).addMBB(testMBB);
+  blockMBB->addSuccessor(testMBB);
+
+  // allocate the tail and continue
+  BuildMI(tailMBB, DL, TII->get(IsLP64 ? X86::SUB64rr : X86::SUB32rr),
+          physSPReg)
+      .addReg(physSPReg)
+      .addReg(tmpSizeVReg);
+  BuildMI(tailMBB, DL, TII->get(TargetOpcode::COPY), MI.getOperand(0).getReg())
+      .addReg(physSPReg);
+
+  tailMBB->splice(tailMBB->end(), BB,
+                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
+  tailMBB->transferSuccessorsAndUpdatePHIs(BB);
+  BB->addSuccessor(testMBB);
+
+  // Delete the original pseudo instruction.
+  MI.eraseFromParent();
+
+  // And we're done.
+  return tailMBB;
+}
+
+MachineBasicBlock *
 X86TargetLowering::EmitLoweredSegAlloca(MachineInstr &MI,
                                         MachineBasicBlock *BB) const {
   MachineFunction *MF = BB->getParent();
@@ -31133,6 +31237,9 @@
   case X86::SEG_ALLOCA_32:
   case X86::SEG_ALLOCA_64:
     return EmitLoweredSegAlloca(MI, BB);
+  case X86::PROBED_ALLOCA_32:
+  case X86::PROBED_ALLOCA_64:
+    return EmitLoweredProbedAlloca(MI, BB);
   case X86::TLSCall_32:
   case X86::TLSCall_64:
     return EmitLoweredTLSCall(MI, BB);
Index: llvm/lib/Target/X86/X86FrameLowering.h
===================================================================
--- llvm/lib/Target/X86/X86FrameLowering.h
+++ llvm/lib/Target/X86/X86FrameLowering.h
@@ -128,6 +128,10 @@
   void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
                     const DebugLoc &DL, int64_t NumBytes, bool InEpilogue) const;
 
+  MachineBasicBlock::iterator
+  findFreeStackProbe(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+                     int64_t MinOffset, int64_t MaxOffset) const;
+
   /// Check that LEA can be used on SP in an epilogue sequence for \p MF.
   bool canUseLEAForSPInEpilogue(const MachineFunction &MF) const;
 
Index: llvm/lib/Target/X86/X86FrameLowering.cpp
===================================================================
--- llvm/lib/Target/X86/X86FrameLowering.cpp
+++ llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -343,6 +343,11 @@
   }
 }
 
+const DenseMap<unsigned, unsigned> OpcodeToSPOperandIndex = {
+    {X86::MOV8mi, 3}, {X86::MOV16mi, 3}, {X86::MOV32mi, 3}, {X86::MOV64mi32, 3},
+    {X86::MOV8mr, 3}, {X86::MOV16mr, 3}, {X86::MOV32mr, 3}, {X86::MOV64mr, 3},
+};
+
 MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
     const DebugLoc &DL, int64_t Offset, bool InEpilogue) const {
@@ -381,16 +386,98 @@
   } else {
     bool IsSub = Offset < 0;
     uint64_t AbsOffset = IsSub ? -Offset : Offset;
-    unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr, AbsOffset)
-                         : getADDriOpcode(Uses64BitFramePtr, AbsOffset);
-    MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
-             .addReg(StackPtr)
-             .addImm(AbsOffset);
-    MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+    const unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr, AbsOffset)
+                               : getADDriOpcode(Uses64BitFramePtr, AbsOffset);
+    const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
+    uint64_t CurrentAbsOffset = 0;
+    MachineFunction &MF = *MBB.getParent();
+    const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
+    const X86TargetLowering &TLI = *STI.getTargetLowering();
+    const StringRef StackProbeSymbolName = TLI.getStackProbeSymbolName(MF);
+    const bool InlineStackClashProtector =
+        StackProbeSymbolName == "inline-asm" &&
+        !(STI.isOSWindows() && !STI.isTargetMachO());
+    if (InlineStackClashProtector && !InEpilogue) {
+      const uint64_t PageSize = TLI.getStackProbeSize(MF);
+      while (CurrentAbsOffset < AbsOffset) {
+        uint64_t ChunkSize = std::min(AbsOffset - CurrentAbsOffset, PageSize);
+        MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
+                 .addReg(StackPtr)
+                 .addImm(ChunkSize);
+        CurrentAbsOffset += ChunkSize;
+        MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+
+        // We need to touch the stack allocated by MI. Instead of generating
+        // a probe, we try to reuse existing stack operation available in the
+        // EntryBlock, starting from MI. findFreeStackProbe gives us such a
+        // probe, if any and continuing allocation from that probe doesn't
+        // conflict with instructions between MI and the free stack probe.
+
+        auto FreeProbeIterator =
+            findFreeStackProbe(MBB, MBBI, AbsOffset - CurrentAbsOffset,
+                               AbsOffset - CurrentAbsOffset + PageSize);
+        if (FreeProbeIterator != MBB.end()) {
+          MachineInstr &FreeProbe = *FreeProbeIterator;
+          unsigned OffsetIndex =
+              OpcodeToSPOperandIndex.find(FreeProbe.getOpcode())->second;
+          FreeProbe.getOperand(OffsetIndex)
+              .setImm(FreeProbe.getOperand(OffsetIndex).getImm() -
+                      (AbsOffset - CurrentAbsOffset));
+          MBBI = std::next(FreeProbeIterator);
+        } else {
+          addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc)), StackPtr,
+                       false, 0)
+              .addImm(0);
+        }
+      }
+    } else {
+      MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
+               .addReg(StackPtr)
+               .addImm(AbsOffset);
+      MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+    }
   }
   return MI;
 }
 
+/// findFreeStackProbe - traverse instruction from @p MBBI to the end of @p MBB,
+/// stoping when either of the following happens first:
+///
+/// - a MachineInstruction writing to the stack at an offset statically known
+///   to be in [MinOffset, MaxOffset] is found, and return an iterator to that
+///   instruction.
+/// - a MachineInstruction reading or writing to the stack at an offset not
+///   statically known is found, and return MBBI.end().
+/// - none of the above happen, and return MBBI.end().
+MachineBasicBlock::iterator X86FrameLowering::findFreeStackProbe(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, int64_t MinOffset,
+    int64_t MaxOffset) const {
+  for (; MBBI != MBB.end(); ++MBBI) {
+    MachineInstr &MI = *MBBI;
+    auto StackOpCode = OpcodeToSPOperandIndex.find(MI.getOpcode());
+    if (StackOpCode != OpcodeToSPOperandIndex.end()) {
+      auto &Dst = MI.getOperand(0);
+      if (!Dst.isFI())
+        continue;
+      auto &MOOffset = MI.getOperand(StackOpCode->second);
+      if (MOOffset.isImm()) {
+        int64_t Offset = MOOffset.getImm();
+        if (MinOffset <= Offset && Offset <= MaxOffset) {
+          return MBBI;
+        }
+        continue;
+      }
+      // don't know where we write, stopping
+      break;
+    }
+    if (std::any_of(MI.operands_begin(), MI.operands_end(),
+                    [](MachineOperand &MO) { return MO.isFI(); })) {
+      break; // effect on stack pointer not modelled, stopping
+    }
+  }
+  return MBB.end();
+}
+
 int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator &MBBI,
                                      bool doMergeWithPrevious) const {
@@ -1015,7 +1102,10 @@
     X86FI->setCalleeSavedFrameSize(
       X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
 
-  bool UseStackProbe = !STI.getTargetLowering()->getStackProbeSymbolName(MF).empty();
+  const StringRef StackProbeSymbolName =
+      STI.getTargetLowering()->getStackProbeSymbolName(MF);
+  bool UseStackProbe =
+      !StackProbeSymbolName.empty() && StackProbeSymbolName != "inline-asm";
   unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF);
 
   // Re-align the stack on 64-bit if the x86-interrupt calling convention is
Index: llvm/docs/ReleaseNotes.rst
===================================================================
--- llvm/docs/ReleaseNotes.rst
+++ llvm/docs/ReleaseNotes.rst
@@ -101,6 +101,10 @@
   decrease in CPU frequency on these CPUs. This can be re-enabled by passing
   -mprefer-vector-width=512 to clang or passing -mattr=-prefer-256-bit to llc.
 
+* Functions with the probe-stack attribute set to "inline-asm" are now protected
+  against stack clash without the need of a third-party probing function and
+  with limited impact on performance.
+
 Changes to the AMDGPU Target
 -----------------------------
 
Index: clang/test/Driver/stack-clash-protection.c
===================================================================
--- /dev/null
+++ clang/test/Driver/stack-clash-protection.c
@@ -0,0 +1,22 @@
+// RUN: %clang -target i386-unknown-linux -fstack-clash-protection -### %s 2>&1 | FileCheck %s -check-prefix=SCP-i386
+// SCP-i386: "-stack-clash-protection"
+
+// RUN: %clang -target x86_64-scei-ps4 -fstack-clash-protection -### %s 2>&1 | FileCheck %s -check-prefix=SCP-x86
+// SCP-x86: "-stack-clash-protection"
+
+// RUN: %clang -target armv7k-apple-watchos2.0 -fstack-clash-protection -### %s 2>&1 | FileCheck %s -check-prefix=SCP-armv7
+// SCP-armv7-NOT: "-stack-clash-protection"
+
+// RUN: %clang -target x86_64-unknown-linux -fstack-clash-protection -c %s 2>&1 | FileCheck %s -check-prefix=SCP-warn
+// SCP-warn: warning: Unable to protect inline asm that clobbers stack pointer against stack clash
+
+int foo(int c) {
+  int r;
+  __asm__("sub %0, %%rsp"
+          :
+          : "rm"(c)
+          : "rsp");
+  __asm__("mov %%rsp, %0"
+          : "=rm"(r)::);
+  return r;
+}
Index: clang/test/CodeGen/stack-clash-protection.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/stack-clash-protection.c
@@ -0,0 +1,40 @@
+// RUN: %clang -S -target x86_64 -o - %s -fstack-clash-protection | FileCheck %s
+// RUN: %clang -target x86_64 -o %t.out %s -fstack-clash-protection && %t.out
+
+#include <alloca.h>
+
+// CHECK-LABEL: @main
+// static alloca instrumentation
+// CHECK: subq  $4096
+// CHECK: mov
+// CHECK: subq  $4096
+//
+// VLA instrumentation
+// CHECK: subq  $4096, %rax
+// CHECK: subq  $4096, %rsp
+//
+// dynamic alloca instrumentation (1)
+// CHECK: subq  $4096, %rax
+// CHECK: subq  $4096, %rsp
+//
+// dynamic alloca instrumentation (2)
+// CHECK: subq  $4096, %rax
+// CHECK: subq  $4096, %rsp
+//
+int main(int argc, char **argv) {
+  int volatile static_mem[8000];
+  for (size_t i = 0; i < argc * sizeof(static_mem) / sizeof(static_mem[0]); ++i)
+    static_mem[i] = argc * i;
+
+  int vla[argc];
+  memset(&vla[0], 0, argc);
+
+  // also check allocation of 0 size
+  volatile void *mem = __builtin_alloca(argc - 1);
+
+  int volatile *dyn_mem = alloca(sizeof(static_mem) * argc);
+  for (size_t i = 0; i < argc * sizeof(static_mem) / sizeof(static_mem[0]); ++i)
+    dyn_mem[i] = argc * i;
+
+  return static_mem[(7999 * argc) / 2] - dyn_mem[(7999 * argc) / 2] + vla[argc - 1];
+}
Index: clang/lib/Frontend/CompilerInvocation.cpp
===================================================================
--- clang/lib/Frontend/CompilerInvocation.cpp
+++ clang/lib/Frontend/CompilerInvocation.cpp
@@ -1213,6 +1213,8 @@
 
   Opts.NoStackArgProbe = Args.hasArg(OPT_mno_stack_arg_probe);
 
+  Opts.StackClashProtector = Args.hasArg(OPT_stack_clash_protection);
+
   if (Arg *A = Args.getLastArg(OPT_fobjc_dispatch_method_EQ)) {
     StringRef Name = A->getValue();
     unsigned Method = llvm::StringSwitch<unsigned>(Name)
Index: clang/lib/Driver/ToolChains/Clang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -2603,6 +2603,29 @@
   }
 }
 
+static void RenderSCPOptions(const ToolChain &TC, const ArgList &Args,
+                             ArgStringList &CmdArgs) {
+  const llvm::Triple &EffectiveTriple = TC.getEffectiveTriple();
+
+  for (const Arg *A : Args) {
+    switch (A->getOption().getID()) {
+    default:
+      continue;
+    case options::OPT_fstack_clash_protection: {
+      switch (EffectiveTriple.getArch()) {
+      default:
+        return;
+      case llvm::Triple::ArchType::x86:
+      case llvm::Triple::ArchType::x86_64:
+        break;
+      }
+      A->claim();
+      CmdArgs.push_back("-stack-clash-protection");
+    }
+    }
+  }
+}
+
 static void RenderTrivialAutoVarInitOptions(const Driver &D,
                                             const ToolChain &TC,
                                             const ArgList &Args,
@@ -4722,6 +4745,7 @@
     CmdArgs.push_back(Args.MakeArgString("-mspeculative-load-hardening"));
 
   RenderSSPOptions(TC, Args, CmdArgs, KernelOrKext);
+  RenderSCPOptions(TC, Args, CmdArgs);
   RenderTrivialAutoVarInitOptions(D, TC, Args, CmdArgs);
 
   // Translate -mstackrealign
Index: clang/lib/CodeGen/CodeGenModule.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -1486,6 +1486,9 @@
   if (CodeGenOpts.UnwindTables)
     B.addAttribute(llvm::Attribute::UWTable);
 
+  if (CodeGenOpts.StackClashProtector)
+    B.addAttribute("probe-stack", "inline-asm");
+
   if (!hasUnwindExceptions(LangOpts))
     B.addAttribute(llvm::Attribute::NoUnwind);
 
Index: clang/lib/CodeGen/CGStmt.cpp
===================================================================
--- clang/lib/CodeGen/CGStmt.cpp
+++ clang/lib/CodeGen/CGStmt.cpp
@@ -10,14 +10,16 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "CodeGenFunction.h"
 #include "CGDebugInfo.h"
+#include "CodeGenFunction.h"
 #include "CodeGenModule.h"
 #include "TargetInfo.h"
 #include "clang/AST/StmtVisitor.h"
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/PrettyStackTrace.h"
 #include "clang/Basic/TargetInfo.h"
+#include "clang/Driver/DriverDiagnostic.h"
+#include "clang/Frontend/FrontendDiagnostic.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/InlineAsm.h"
@@ -2229,8 +2231,14 @@
 
     if (Clobber == "memory")
       ReadOnly = ReadNone = false;
-    else if (Clobber != "cc")
+    else if (Clobber != "cc") {
       Clobber = getTarget().getNormalizedGCCRegisterName(Clobber);
+      if (CGM.getCodeGenOpts().StackClashProtector &&
+          Clobber == getTarget().getSPRegName()) {
+        CGM.getDiags().Report(S.getAsmLoc(),
+                              diag::warn_fe_stack_clash_protection_inline_asm);
+      }
+    }
 
     if (!Constraints.empty())
       Constraints += ',';
Index: clang/lib/Basic/Targets/X86.h
===================================================================
--- clang/lib/Basic/Targets/X86.h
+++ clang/lib/Basic/Targets/X86.h
@@ -149,6 +149,8 @@
 
   ArrayRef<TargetInfo::AddlRegName> getGCCAddlRegNames() const override;
 
+  const char *getSPRegName() const override { return "rsp"; }
+
   bool validateCpuSupports(StringRef Name) const override;
 
   bool validateCpuIs(StringRef Name) const override;
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -1706,6 +1706,8 @@
 def fsplit_stack : Flag<["-"], "fsplit-stack">, Group<f_Group>;
 def fstack_protector_all : Flag<["-"], "fstack-protector-all">, Group<f_Group>,
   HelpText<"Enable stack protectors for all functions">;
+def fstack_clash_protection : Flag<["-"], "fstack-clash-protection">, Group<f_Group>,
+  HelpText<"Enable stack clash protection">;
 def fstack_protector_strong : Flag<["-"], "fstack-protector-strong">, Group<f_Group>,
   HelpText<"Enable stack protectors for some functions vulnerable to stack smashing. "
            "Compared to -fstack-protector, this uses a stronger heuristic "
Index: clang/include/clang/Driver/CC1Options.td
===================================================================
--- clang/include/clang/Driver/CC1Options.td
+++ clang/include/clang/Driver/CC1Options.td
@@ -736,6 +736,8 @@
   HelpText<"Enable stack protectors">;
 def stack_protector_buffer_size : Separate<["-"], "stack-protector-buffer-size">,
   HelpText<"Lower bound for a buffer to be considered for stack protection">;
+def stack_clash_protection : Separate<["-"], "stack-clash-protection">,
+  HelpText<"Enable stack clash protection">;
 def fvisibility : Separate<["-"], "fvisibility">,
   HelpText<"Default type and symbol visibility">;
 def ftype_visibility : Separate<["-"], "ftype-visibility">,
Index: clang/include/clang/Basic/TargetInfo.h
===================================================================
--- clang/include/clang/Basic/TargetInfo.h
+++ clang/include/clang/Basic/TargetInfo.h
@@ -815,6 +815,8 @@
   StringRef getNormalizedGCCRegisterName(StringRef Name,
                                          bool ReturnCanonical = false) const;
 
+  virtual const char *getSPRegName() const { return nullptr; }
+
   /// Extracts a register from the passed constraint (if it is a
   /// single-register constraint) and the asm label expression related to a
   /// variable in the input or output list of an inline asm statement.
Index: clang/include/clang/Basic/DiagnosticFrontendKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticFrontendKinds.td
+++ clang/include/clang/Basic/DiagnosticFrontendKinds.td
@@ -17,6 +17,9 @@
 
 def err_fe_inline_asm : Error<"%0">, CatInlineAsm;
 def warn_fe_inline_asm : Warning<"%0">, CatInlineAsm, InGroup<BackendInlineAsm>;
+def warn_fe_stack_clash_protection_inline_asm : Warning<
+    "Unable to protect inline asm that clobbers stack pointer against stack clash">,
+    CatInlineAsm, InGroup<BackendInlineAsm>;
 def note_fe_inline_asm : Note<"%0">, CatInlineAsm;
 def note_fe_inline_asm_here : Note<"instantiated into assembly here">;
 def err_fe_cannot_link_module : Error<"cannot link module '%0': %1">,
Index: clang/include/clang/Basic/CodeGenOptions.def
===================================================================
--- clang/include/clang/Basic/CodeGenOptions.def
+++ clang/include/clang/Basic/CodeGenOptions.def
@@ -135,6 +135,7 @@
                                      ///< enabled.
 CODEGENOPT(NoWarn            , 1, 0) ///< Set when -Wa,--no-warn is enabled.
 CODEGENOPT(EnableSegmentedStacks , 1, 0) ///< Set when -fsplit-stack is enabled.
+CODEGENOPT(StackClashProtector, 1, 0) ///< Set when -fstack-clash-protection is enabled.
 CODEGENOPT(NoImplicitFloat   , 1, 0) ///< Set when -mno-implicit-float is enabled.
 CODEGENOPT(NoInfsFPMath      , 1, 0) ///< Assume FP arguments, results not +-Inf.
 CODEGENOPT(NoSignedZeros     , 1, 0) ///< Allow ignoring the signedness of FP zero
Index: clang/docs/ReleaseNotes.rst
===================================================================
--- clang/docs/ReleaseNotes.rst
+++ clang/docs/ReleaseNotes.rst
@@ -70,7 +70,9 @@
 New Compiler Flags
 ------------------
 
-- ...
+- -fstack-clash-protection will provide a protection against the stack clash
+  attack for x86 architecture through automatic probing of each page of
+  allocated stack.
 
 Deprecated Compiler Flags
 -------------------------
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to