sconstab created this revision.
sconstab added reviewers: craig.topper, andrew.w.kaylor, zbrid, chandlerc.
Herald added subscribers: jfb, hiraditya.
sconstab added a parent revision: D76811: [X86] Refactor X86IndirectThunks.cpp
to Accomodate Mitigations other than Retpoline [2/3].
sconstab retitled this revision from "Add Indirect Thunk Support to X86 to
mitigate Load Value Injection (LVI) [3/3]" to "[X86] Add Indirect Thunk Support
to X86 to mitigate Load Value Injection (LVI) [3/3]".
This pass replaces each indirect call/jump with a direct call to a thunk that
looks like:
lfence
jmpq *%r11
This ensures that if the value in register `%r11` was loaded from memory, then
the value in `%r11` is (architecturally) correct prior to the jump.
Also adds a new target feature to X86: +lvi-cfi
("cfi" meaning control-flow integrity)
The feature can be added via clang CLI using `-mlvi-cfi`.
This is an alternate implementation to https://reviews.llvm.org/D75934 That
merges the thunk insertion functionality with the existing X86 retpoline code.
https://reviews.llvm.org/D76812
Files:
clang/include/clang/Driver/Options.td
clang/lib/Driver/ToolChains/Arch/X86.cpp
llvm/lib/Target/X86/X86.td
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86IndirectThunks.cpp
llvm/lib/Target/X86/X86Subtarget.h
llvm/test/CodeGen/X86/lvi-hardening-indirectbr.ll
Index: llvm/test/CodeGen/X86/lvi-hardening-indirectbr.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/X86/lvi-hardening-indirectbr.ll
@@ -0,0 +1,281 @@
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown -mattr=+lvi-cfi < %s | FileCheck %s --check-prefix=X64
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown -mattr=+lvi-cfi -O0 < %s | FileCheck %s --check-prefix=X64FAST
+;
+; Note that a lot of this code was lifted from retpoline.ll.
+
+declare void @bar(i32)
+
+; Test a simple indirect call and tail call.
+define void @icall_reg(void (i32)* %fp, i32 %x) {
+entry:
+ tail call void @bar(i32 %x)
+ tail call void %fp(i32 %x)
+ tail call void @bar(i32 %x)
+ tail call void %fp(i32 %x)
+ ret void
+}
+
+; X64-LABEL: icall_reg:
+; X64-DAG: movq %rdi, %[[fp:[^ ]*]]
+; X64-DAG: movl %esi, %[[x:[^ ]*]]
+; X64: movl %esi, %edi
+; X64: callq bar
+; X64-DAG: movl %[[x]], %edi
+; X64-DAG: movq %[[fp]], %r11
+; X64: callq __llvm_lvi_thunk_r11
+; X64: movl %[[x]], %edi
+; X64: callq bar
+; X64-DAG: movl %[[x]], %edi
+; X64-DAG: movq %[[fp]], %r11
+; X64: jmp __llvm_lvi_thunk_r11 # TAILCALL
+
+; X64FAST-LABEL: icall_reg:
+; X64FAST: callq bar
+; X64FAST: callq __llvm_lvi_thunk_r11
+; X64FAST: callq bar
+; X64FAST: jmp __llvm_lvi_thunk_r11 # TAILCALL
+
+
+@global_fp = external global void (i32)*
+
+; Test an indirect call through a global variable.
+define void @icall_global_fp(i32 %x, void (i32)** %fpp) #0 {
+ %fp1 = load void (i32)*, void (i32)** @global_fp
+ call void %fp1(i32 %x)
+ %fp2 = load void (i32)*, void (i32)** @global_fp
+ tail call void %fp2(i32 %x)
+ ret void
+}
+
+; X64-LABEL: icall_global_fp:
+; X64-DAG: movl %edi, %[[x:[^ ]*]]
+; X64-DAG: movq global_fp(%rip), %r11
+; X64: callq __llvm_lvi_thunk_r11
+; X64-DAG: movl %[[x]], %edi
+; X64-DAG: movq global_fp(%rip), %r11
+; X64: jmp __llvm_lvi_thunk_r11 # TAILCALL
+
+; X64FAST-LABEL: icall_global_fp:
+; X64FAST: movq global_fp(%rip), %r11
+; X64FAST: callq __llvm_lvi_thunk_r11
+; X64FAST: movq global_fp(%rip), %r11
+; X64FAST: jmp __llvm_lvi_thunk_r11 # TAILCALL
+
+
+%struct.Foo = type { void (%struct.Foo*)** }
+
+; Test an indirect call through a vtable.
+define void @vcall(%struct.Foo* %obj) #0 {
+ %vptr_field = getelementptr %struct.Foo, %struct.Foo* %obj, i32 0, i32 0
+ %vptr = load void (%struct.Foo*)**, void (%struct.Foo*)*** %vptr_field
+ %vslot = getelementptr void(%struct.Foo*)*, void(%struct.Foo*)** %vptr, i32 1
+ %fp = load void(%struct.Foo*)*, void(%struct.Foo*)** %vslot
+ tail call void %fp(%struct.Foo* %obj)
+ tail call void %fp(%struct.Foo* %obj)
+ ret void
+}
+
+; X64-LABEL: vcall:
+; X64: movq %rdi, %[[obj:[^ ]*]]
+; X64: movq (%rdi), %[[vptr:[^ ]*]]
+; X64: movq 8(%[[vptr]]), %[[fp:[^ ]*]]
+; X64: movq %[[fp]], %r11
+; X64: callq __llvm_lvi_thunk_r11
+; X64-DAG: movq %[[obj]], %rdi
+; X64-DAG: movq %[[fp]], %r11
+; X64: jmp __llvm_lvi_thunk_r11 # TAILCALL
+
+; X64FAST-LABEL: vcall:
+; X64FAST: callq __llvm_lvi_thunk_r11
+; X64FAST: jmp __llvm_lvi_thunk_r11 # TAILCALL
+
+
+declare void @direct_callee()
+
+define void @direct_tail() #0 {
+ tail call void @direct_callee()
+ ret void
+}
+
+; X64-LABEL: direct_tail:
+; X64: jmp direct_callee # TAILCALL
+; X64FAST-LABEL: direct_tail:
+; X64FAST: jmp direct_callee # TAILCALL
+
+
+declare void @nonlazybind_callee() #1
+
+define void @nonlazybind_caller() #0 {
+ call void @nonlazybind_callee()
+ tail call void @nonlazybind_callee()
+ ret void
+}
+
+; X64-LABEL: nonlazybind_caller:
+; X64: movq nonlazybind_callee@GOTPCREL(%rip), %[[REG:.*]]
+; X64: movq %[[REG]], %r11
+; X64: callq __llvm_lvi_thunk_r11
+; X64: movq %[[REG]], %r11
+; X64: jmp __llvm_lvi_thunk_r11 # TAILCALL
+; X64FAST-LABEL: nonlazybind_caller:
+; X64FAST: movq nonlazybind_callee@GOTPCREL(%rip), %r11
+; X64FAST: callq __llvm_lvi_thunk_r11
+; X64FAST: movq nonlazybind_callee@GOTPCREL(%rip), %r11
+; X64FAST: jmp __llvm_lvi_thunk_r11 # TAILCALL
+
+
+; Check that a switch gets lowered using a jump table
+define void @switch_jumptable(i32* %ptr, i64* %sink) #0 {
+; X64-LABEL: switch_jumptable:
+; X64_NOT: jmpq *
+entry:
+ br label %header
+
+header:
+ %i = load volatile i32, i32* %ptr
+ switch i32 %i, label %bb0 [
+ i32 1, label %bb1
+ i32 2, label %bb2
+ i32 3, label %bb3
+ i32 4, label %bb4
+ i32 5, label %bb5
+ i32 6, label %bb6
+ i32 7, label %bb7
+ i32 8, label %bb8
+ i32 9, label %bb9
+ ]
+
+bb0:
+ store volatile i64 0, i64* %sink
+ br label %header
+
+bb1:
+ store volatile i64 1, i64* %sink
+ br label %header
+
+bb2:
+ store volatile i64 2, i64* %sink
+ br label %header
+
+bb3:
+ store volatile i64 3, i64* %sink
+ br label %header
+
+bb4:
+ store volatile i64 4, i64* %sink
+ br label %header
+
+bb5:
+ store volatile i64 5, i64* %sink
+ br label %header
+
+bb6:
+ store volatile i64 6, i64* %sink
+ br label %header
+
+bb7:
+ store volatile i64 7, i64* %sink
+ br label %header
+
+bb8:
+ store volatile i64 8, i64* %sink
+ br label %header
+
+bb9:
+ store volatile i64 9, i64* %sink
+ br label %header
+}
+
+
+@indirectbr_rewrite.targets = constant [10 x i8*] [i8* blockaddress(@indirectbr_rewrite, %bb0),
+ i8* blockaddress(@indirectbr_rewrite, %bb1),
+ i8* blockaddress(@indirectbr_rewrite, %bb2),
+ i8* blockaddress(@indirectbr_rewrite, %bb3),
+ i8* blockaddress(@indirectbr_rewrite, %bb4),
+ i8* blockaddress(@indirectbr_rewrite, %bb5),
+ i8* blockaddress(@indirectbr_rewrite, %bb6),
+ i8* blockaddress(@indirectbr_rewrite, %bb7),
+ i8* blockaddress(@indirectbr_rewrite, %bb8),
+ i8* blockaddress(@indirectbr_rewrite, %bb9)]
+
+; Check that when thunks are enabled the indirectbr instruction gets
+; rewritten to use switch, and that in turn doesn't get lowered as a jump
+; table.
+define void @indirectbr_rewrite(i64* readonly %p, i64* %sink) #0 {
+; X64-LABEL: indirectbr_rewrite:
+; X64-NOT: jmpq *
+entry:
+ %i0 = load i64, i64* %p
+ %target.i0 = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_rewrite.targets, i64 0, i64 %i0
+ %target0 = load i8*, i8** %target.i0
+ indirectbr i8* %target0, [label %bb1, label %bb3]
+
+bb0:
+ store volatile i64 0, i64* %sink
+ br label %latch
+
+bb1:
+ store volatile i64 1, i64* %sink
+ br label %latch
+
+bb2:
+ store volatile i64 2, i64* %sink
+ br label %latch
+
+bb3:
+ store volatile i64 3, i64* %sink
+ br label %latch
+
+bb4:
+ store volatile i64 4, i64* %sink
+ br label %latch
+
+bb5:
+ store volatile i64 5, i64* %sink
+ br label %latch
+
+bb6:
+ store volatile i64 6, i64* %sink
+ br label %latch
+
+bb7:
+ store volatile i64 7, i64* %sink
+ br label %latch
+
+bb8:
+ store volatile i64 8, i64* %sink
+ br label %latch
+
+bb9:
+ store volatile i64 9, i64* %sink
+ br label %latch
+
+latch:
+ %i.next = load i64, i64* %p
+ %target.i.next = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_rewrite.targets, i64 0, i64 %i.next
+ %target.next = load i8*, i8** %target.i.next
+ ; Potentially hit a full 10 successors here so that even if we rewrite as
+ ; a switch it will try to be lowered with a jump table.
+ indirectbr i8* %target.next, [label %bb0,
+ label %bb1,
+ label %bb2,
+ label %bb3,
+ label %bb4,
+ label %bb5,
+ label %bb6,
+ label %bb7,
+ label %bb8,
+ label %bb9]
+}
+
+; Lastly check that the necessary thunks were emitted.
+;
+; X64-LABEL: .section .text.__llvm_lvi_thunk_r11,{{.*}},__llvm_lvi_thunk_r11,comdat
+; X64-NEXT: .hidden __llvm_lvi_thunk_r11
+; X64-NEXT: .weak __llvm_lvi_thunk_r11
+; X64: __llvm_lvi_thunk_r11:
+; X64-NEXT: # {{.*}} # %entry
+; X64-NEXT: lfence
+; X64-NEXT: jmpq *%r11
+
+attributes #1 = { nonlazybind }
Index: llvm/lib/Target/X86/X86Subtarget.h
===================================================================
--- llvm/lib/Target/X86/X86Subtarget.h
+++ llvm/lib/Target/X86/X86Subtarget.h
@@ -425,6 +425,12 @@
/// than emitting one inside the compiler.
bool UseRetpolineExternalThunk = false;
+ /// Prevent generation of indirect call/branch instructions from memory,
+ /// and force all indirect call/branch instructions from a register to be
+ /// preceded by an LFENCE. Also decompose RET instructions into a
+ /// POP+LFENCE+JMP sequence.
+ bool UseLVIControlFlowIntegrity = false;
+
/// Use software floating point for code generation.
bool UseSoftFloat = false;
@@ -715,13 +721,16 @@
// These are generic getters that OR together all of the thunk types
// supported by the subtarget. Therefore useIndirectThunk*() will return true
// if any respective thunk feature is enabled.
- bool useIndirectThunkCalls() const { return useRetpolineIndirectCalls(); }
+ bool useIndirectThunkCalls() const {
+ return useRetpolineIndirectCalls() || useLVIControlFlowIntegrity();
+ }
bool useIndirectThunkBranches() const {
- return useRetpolineIndirectBranches();
+ return useRetpolineIndirectBranches() || useLVIControlFlowIntegrity();
}
bool preferMaskRegisters() const { return PreferMaskRegisters; }
bool useGLMDivSqrtCosts() const { return UseGLMDivSqrtCosts; }
+ bool useLVIControlFlowIntegrity() const { return UseLVIControlFlowIntegrity; }
unsigned getPreferVectorWidth() const { return PreferVectorWidth; }
unsigned getRequiredVectorWidth() const { return RequiredVectorWidth; }
Index: llvm/lib/Target/X86/X86IndirectThunks.cpp
===================================================================
--- llvm/lib/Target/X86/X86IndirectThunks.cpp
+++ llvm/lib/Target/X86/X86IndirectThunks.cpp
@@ -47,6 +47,9 @@
static const char EDXRetpolineName[] = "__llvm_retpoline_edx";
static const char EDIRetpolineName[] = "__llvm_retpoline_edi";
+static const char LVIThunkNamePrefix[] = "__llvm_lvi_thunk_";
+static const char R11LVIThunkName[] = "__llvm_lvi_thunk_r11";
+
namespace {
template <typename Derived> class ThunkInserter {
Derived &getDerived() { return *static_cast<Derived *>(this); }
@@ -77,6 +80,31 @@
void populateThunk(MachineFunction &MF);
};
+struct LVIThunkInserter : ThunkInserter<LVIThunkInserter> {
+ const char *getThunkPrefix() { return LVIThunkNamePrefix; }
+ bool mayUseThunk(const MachineFunction &MF) {
+ return MF.getSubtarget<X86Subtarget>().useLVIControlFlowIntegrity();
+ }
+ void insertThunks(MachineModuleInfo &MMI) {
+ createThunkFunction(MMI, R11LVIThunkName);
+ }
+ void populateThunk(MachineFunction &MF) {
+ // This code mitigates LVI by replacing each indirect call/jump with a direct
+ // call/jump to a thunk that looks like:
+ // ```
+ // lfence
+ // jmpq *%r11
+ // ```
+ // This ensures that if the value in register %r11 was loaded from memory, then
+ // the value in %r11 is (architecturally) correct prior to the jump.
+ const TargetInstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
+ BuildMI(&MF.front(), DebugLoc(), TII->get(X86::LFENCE));
+ BuildMI(&MF.front(), DebugLoc(), TII->get(X86::JMP64r)).addReg(X86::R11);
+ MF.front().addLiveIn(X86::R11);
+ return;
+ }
+};
+
class X86IndirectThunks : public MachineFunctionPass {
public:
static char ID;
@@ -95,7 +123,7 @@
}
private:
- std::tuple<RetpolineThunkInserter> TIs;
+ std::tuple<RetpolineThunkInserter, LVIThunkInserter> TIs;
// FIXME: When LLVM moves to C++17, these can become folds
template <typename... ThunkInserterT>
Index: llvm/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- llvm/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -31839,6 +31839,11 @@
}
llvm_unreachable("unexpected reg for retpoline");
}
+
+ if (Subtarget.useLVIControlFlowIntegrity()) {
+ assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
+ return "__llvm_lvi_thunk_r11";
+ }
llvm_unreachable("getIndirectThunkSymbol() invoked without thunk feature");
}
Index: llvm/lib/Target/X86/X86.td
===================================================================
--- llvm/lib/Target/X86/X86.td
+++ llvm/lib/Target/X86/X86.td
@@ -431,6 +431,15 @@
"ourselves. Only has effect when combined with some other retpoline "
"feature", [FeatureRetpolineIndirectCalls]>;
+// Mitigate LVI attacks against indirect calls/branches and call returns
+def FeatureLVIControlFlowIntegrity
+ : SubtargetFeature<
+ "lvi-cfi", "UseLVIControlFlowIntegrity", "true",
+ "Prevent indirect calls/branches from using a memory operand, and "
+ "precede all indirect calls/branches from a register with an "
+ "LFENCE instruction to serialize control flow. Also decompose RET "
+ "instructions into a POP+LFENCE+JMP sequence.">;
+
// Direct Move instructions.
def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
"Support movdiri instruction">;
Index: clang/lib/Driver/ToolChains/Arch/X86.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Arch/X86.cpp
+++ clang/lib/Driver/ToolChains/Arch/X86.cpp
@@ -146,6 +146,7 @@
// flags). This is a bit hacky but keeps existing usages working. We should
// consider deprecating this and instead warn if the user requests external
// retpoline thunks and *doesn't* request some form of retpolines.
+ auto SpectreOpt = clang::driver::options::ID::OPT_INVALID;
if (Args.hasArgNoClaim(options::OPT_mretpoline, options::OPT_mno_retpoline,
options::OPT_mspeculative_load_hardening,
options::OPT_mno_speculative_load_hardening)) {
@@ -153,12 +154,14 @@
false)) {
Features.push_back("+retpoline-indirect-calls");
Features.push_back("+retpoline-indirect-branches");
+ SpectreOpt = options::OPT_mretpoline;
} else if (Args.hasFlag(options::OPT_mspeculative_load_hardening,
options::OPT_mno_speculative_load_hardening,
false)) {
// On x86, speculative load hardening relies on at least using retpolines
// for indirect calls.
Features.push_back("+retpoline-indirect-calls");
+ SpectreOpt = options::OPT_mspeculative_load_hardening;
}
} else if (Args.hasFlag(options::OPT_mretpoline_external_thunk,
options::OPT_mno_retpoline_external_thunk, false)) {
@@ -166,6 +169,20 @@
// eventually switch to an error here.
Features.push_back("+retpoline-indirect-calls");
Features.push_back("+retpoline-indirect-branches");
+ SpectreOpt = options::OPT_mretpoline_external_thunk;
+ }
+
+ auto LVIOpt = clang::driver::options::ID::OPT_INVALID;
+ if (Args.hasFlag(options::OPT_mlvi_cfi, options::OPT_mno_lvi_cfi, false)) {
+ Features.push_back("+lvi-cfi");
+ LVIOpt = options::OPT_mlvi_cfi;
+ }
+
+ if (SpectreOpt != clang::driver::options::ID::OPT_INVALID &&
+ LVIOpt != clang::driver::options::ID::OPT_INVALID) {
+ D.Diag(diag::err_drv_argument_not_allowed_with)
+ << D.getOpts().getOptionName(SpectreOpt)
+ << D.getOpts().getOptionName(LVIOpt);
}
// Now add any that the user explicitly requested on the command line,
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -2295,6 +2295,10 @@
Group<m_Group>, Flags<[CoreOption,CC1Option]>;
def mno_speculative_load_hardening : Flag<["-"], "mno-speculative-load-hardening">,
Group<m_Group>, Flags<[CoreOption]>;
+def mlvi_cfi : Flag<["-"], "mlvi-cfi">, Group<m_Group>, Flags<[CoreOption,DriverOption]>,
+ HelpText<"Enable only control-flow mitigations for Load Value Injection (LVI)">;
+def mno_lvi_cfi : Flag<["-"], "mno-lvi-cfi">, Group<m_Group>, Flags<[CoreOption,DriverOption]>,
+ HelpText<"Disable control-flow mitigations for Load Value Injection (LVI)">;
def mrelax : Flag<["-"], "mrelax">, Group<m_riscv_Features_Group>,
HelpText<"Enable linker relaxation">;
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits