[PATCH] D76812: [X86] Add Indirect Thunk Support to X86 to mitigate Load Value Injection (LVI) [3/3]

2020-04-03 Thread Craig Topper via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG5b519cf1fc67: [X86] Add Indirect Thunk Support to X86 to 
mitigate Load Value Injection (LVI) (authored by sconstab, committed by 
craig.topper).
Herald added a project: clang.

Changed prior to commit:
  https://reviews.llvm.org/D76812?vs=254276=254720#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D76812/new/

https://reviews.llvm.org/D76812

Files:
  clang/docs/ClangCommandLineReference.rst
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/Arch/X86.cpp
  clang/test/Driver/x86-target-features.c
  llvm/lib/Target/X86/X86.td
  llvm/lib/Target/X86/X86ISelLowering.cpp
  llvm/lib/Target/X86/X86IndirectThunks.cpp
  llvm/lib/Target/X86/X86Subtarget.h
  llvm/test/CodeGen/X86/lvi-hardening-indirectbr.ll

Index: llvm/test/CodeGen/X86/lvi-hardening-indirectbr.ll
===
--- /dev/null
+++ llvm/test/CodeGen/X86/lvi-hardening-indirectbr.ll
@@ -0,0 +1,281 @@
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown -mattr=+lvi-cfi < %s | FileCheck %s --check-prefix=X64
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown -mattr=+lvi-cfi -O0 < %s | FileCheck %s --check-prefix=X64FAST
+;
+; Note that a lot of this code was lifted from retpoline.ll.
+
+declare void @bar(i32)
+
+; Test a simple indirect call and tail call.
+define void @icall_reg(void (i32)* %fp, i32 %x) {
+entry:
+  tail call void @bar(i32 %x)
+  tail call void %fp(i32 %x)
+  tail call void @bar(i32 %x)
+  tail call void %fp(i32 %x)
+  ret void
+}
+
+; X64-LABEL: icall_reg:
+; X64-DAG:   movq %rdi, %[[fp:[^ ]*]]
+; X64-DAG:   movl %esi, %[[x:[^ ]*]]
+; X64:   movl %esi, %edi
+; X64:   callq bar
+; X64-DAG:   movl %[[x]], %edi
+; X64-DAG:   movq %[[fp]], %r11
+; X64:   callq __llvm_lvi_thunk_r11
+; X64:   movl %[[x]], %edi
+; X64:   callq bar
+; X64-DAG:   movl %[[x]], %edi
+; X64-DAG:   movq %[[fp]], %r11
+; X64:   jmp __llvm_lvi_thunk_r11 # TAILCALL
+
+; X64FAST-LABEL: icall_reg:
+; X64FAST:   callq bar
+; X64FAST:   callq __llvm_lvi_thunk_r11
+; X64FAST:   callq bar
+; X64FAST:   jmp __llvm_lvi_thunk_r11 # TAILCALL
+
+
+@global_fp = external global void (i32)*
+
+; Test an indirect call through a global variable.
+define void @icall_global_fp(i32 %x, void (i32)** %fpp) #0 {
+  %fp1 = load void (i32)*, void (i32)** @global_fp
+  call void %fp1(i32 %x)
+  %fp2 = load void (i32)*, void (i32)** @global_fp
+  tail call void %fp2(i32 %x)
+  ret void
+}
+
+; X64-LABEL: icall_global_fp:
+; X64-DAG:   movl %edi, %[[x:[^ ]*]]
+; X64-DAG:   movq global_fp(%rip), %r11
+; X64:   callq __llvm_lvi_thunk_r11
+; X64-DAG:   movl %[[x]], %edi
+; X64-DAG:   movq global_fp(%rip), %r11
+; X64:   jmp __llvm_lvi_thunk_r11 # TAILCALL
+
+; X64FAST-LABEL: icall_global_fp:
+; X64FAST:   movq global_fp(%rip), %r11
+; X64FAST:   callq __llvm_lvi_thunk_r11
+; X64FAST:   movq global_fp(%rip), %r11
+; X64FAST:   jmp __llvm_lvi_thunk_r11 # TAILCALL
+
+
+%struct.Foo = type { void (%struct.Foo*)** }
+
+; Test an indirect call through a vtable.
+define void @vcall(%struct.Foo* %obj) #0 {
+  %vptr_field = getelementptr %struct.Foo, %struct.Foo* %obj, i32 0, i32 0
+  %vptr = load void (%struct.Foo*)**, void (%struct.Foo*)*** %vptr_field
+  %vslot = getelementptr void(%struct.Foo*)*, void(%struct.Foo*)** %vptr, i32 1
+  %fp = load void(%struct.Foo*)*, void(%struct.Foo*)** %vslot
+  tail call void %fp(%struct.Foo* %obj)
+  tail call void %fp(%struct.Foo* %obj)
+  ret void
+}
+
+; X64-LABEL: vcall:
+; X64:   movq %rdi, %[[obj:[^ ]*]]
+; X64:   movq (%rdi), %[[vptr:[^ ]*]]
+; X64:   movq 8(%[[vptr]]), %[[fp:[^ ]*]]
+; X64:   movq %[[fp]], %r11
+; X64:   callq __llvm_lvi_thunk_r11
+; X64-DAG:   movq %[[obj]], %rdi
+; X64-DAG:   movq %[[fp]], %r11
+; X64:   jmp __llvm_lvi_thunk_r11 # TAILCALL
+
+; X64FAST-LABEL: vcall:
+; X64FAST:   callq __llvm_lvi_thunk_r11
+; X64FAST:   jmp __llvm_lvi_thunk_r11 # TAILCALL
+
+
+declare void @direct_callee()
+
+define void @direct_tail() #0 {
+  tail call void @direct_callee()
+  ret void
+}
+
+; X64-LABEL: direct_tail:
+; X64:   jmp direct_callee # TAILCALL
+; X64FAST-LABEL: direct_tail:
+; X64FAST:   jmp direct_callee # TAILCALL
+
+
+declare void @nonlazybind_callee() #1
+
+define void @nonlazybind_caller() #0 {
+  call void @nonlazybind_callee()
+  tail call void @nonlazybind_callee()
+  ret void
+}
+
+; X64-LABEL: nonlazybind_caller:
+; X64:   movq nonlazybind_callee@GOTPCREL(%rip), %[[REG:.*]]
+; X64:   movq %[[REG]], %r11
+; X64:   callq __llvm_lvi_thunk_r11
+; X64:   movq %[[REG]], %r11
+; X64:   jmp __llvm_lvi_thunk_r11 # TAILCALL
+; X64FAST-LABEL: nonlazybind_caller:
+; X64FAST:   movq nonlazybind_callee@GOTPCREL(%rip), %r11
+; X64FAST:   callq __llvm_lvi_thunk_r11
+; X64FAST:   movq 

[PATCH] D76812: [X86] Add Indirect Thunk Support to X86 to mitigate Load Value Injection (LVI) [3/3]

2020-04-01 Thread Craig Topper via Phabricator via cfe-commits
craig.topper accepted this revision.
craig.topper added a comment.
This revision is now accepted and ready to land.

LGTM


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D76812/new/

https://reviews.llvm.org/D76812



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D76812: [X86] Add Indirect Thunk Support to X86 to mitigate Load Value Injection (LVI) [3/3]

2020-04-01 Thread Scott Constable via Phabricator via cfe-commits
sconstab updated this revision to Diff 254276.
sconstab added a comment.

@craig.topper I think that removing spurious MBBs is not really necessary 
because the emitted machine code doesn't contain the spurious MBBs, from what I 
have observed. I added the check anyways, if only because others may look at 
this discrepancy and have the same question.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D76812/new/

https://reviews.llvm.org/D76812

Files:
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/Arch/X86.cpp
  llvm/lib/Target/X86/X86.td
  llvm/lib/Target/X86/X86ISelLowering.cpp
  llvm/lib/Target/X86/X86IndirectThunks.cpp
  llvm/lib/Target/X86/X86Subtarget.h
  llvm/test/CodeGen/X86/lvi-hardening-indirectbr.ll

Index: llvm/test/CodeGen/X86/lvi-hardening-indirectbr.ll
===
--- /dev/null
+++ llvm/test/CodeGen/X86/lvi-hardening-indirectbr.ll
@@ -0,0 +1,281 @@
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown -mattr=+lvi-cfi < %s | FileCheck %s --check-prefix=X64
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown -mattr=+lvi-cfi -O0 < %s | FileCheck %s --check-prefix=X64FAST
+;
+; Note that a lot of this code was lifted from retpoline.ll.
+
+declare void @bar(i32)
+
+; Test a simple indirect call and tail call.
+define void @icall_reg(void (i32)* %fp, i32 %x) {
+entry:
+  tail call void @bar(i32 %x)
+  tail call void %fp(i32 %x)
+  tail call void @bar(i32 %x)
+  tail call void %fp(i32 %x)
+  ret void
+}
+
+; X64-LABEL: icall_reg:
+; X64-DAG:   movq %rdi, %[[fp:[^ ]*]]
+; X64-DAG:   movl %esi, %[[x:[^ ]*]]
+; X64:   movl %esi, %edi
+; X64:   callq bar
+; X64-DAG:   movl %[[x]], %edi
+; X64-DAG:   movq %[[fp]], %r11
+; X64:   callq __llvm_lvi_thunk_r11
+; X64:   movl %[[x]], %edi
+; X64:   callq bar
+; X64-DAG:   movl %[[x]], %edi
+; X64-DAG:   movq %[[fp]], %r11
+; X64:   jmp __llvm_lvi_thunk_r11 # TAILCALL
+
+; X64FAST-LABEL: icall_reg:
+; X64FAST:   callq bar
+; X64FAST:   callq __llvm_lvi_thunk_r11
+; X64FAST:   callq bar
+; X64FAST:   jmp __llvm_lvi_thunk_r11 # TAILCALL
+
+
+@global_fp = external global void (i32)*
+
+; Test an indirect call through a global variable.
+define void @icall_global_fp(i32 %x, void (i32)** %fpp) #0 {
+  %fp1 = load void (i32)*, void (i32)** @global_fp
+  call void %fp1(i32 %x)
+  %fp2 = load void (i32)*, void (i32)** @global_fp
+  tail call void %fp2(i32 %x)
+  ret void
+}
+
+; X64-LABEL: icall_global_fp:
+; X64-DAG:   movl %edi, %[[x:[^ ]*]]
+; X64-DAG:   movq global_fp(%rip), %r11
+; X64:   callq __llvm_lvi_thunk_r11
+; X64-DAG:   movl %[[x]], %edi
+; X64-DAG:   movq global_fp(%rip), %r11
+; X64:   jmp __llvm_lvi_thunk_r11 # TAILCALL
+
+; X64FAST-LABEL: icall_global_fp:
+; X64FAST:   movq global_fp(%rip), %r11
+; X64FAST:   callq __llvm_lvi_thunk_r11
+; X64FAST:   movq global_fp(%rip), %r11
+; X64FAST:   jmp __llvm_lvi_thunk_r11 # TAILCALL
+
+
+%struct.Foo = type { void (%struct.Foo*)** }
+
+; Test an indirect call through a vtable.
+define void @vcall(%struct.Foo* %obj) #0 {
+  %vptr_field = getelementptr %struct.Foo, %struct.Foo* %obj, i32 0, i32 0
+  %vptr = load void (%struct.Foo*)**, void (%struct.Foo*)*** %vptr_field
+  %vslot = getelementptr void(%struct.Foo*)*, void(%struct.Foo*)** %vptr, i32 1
+  %fp = load void(%struct.Foo*)*, void(%struct.Foo*)** %vslot
+  tail call void %fp(%struct.Foo* %obj)
+  tail call void %fp(%struct.Foo* %obj)
+  ret void
+}
+
+; X64-LABEL: vcall:
+; X64:   movq %rdi, %[[obj:[^ ]*]]
+; X64:   movq (%rdi), %[[vptr:[^ ]*]]
+; X64:   movq 8(%[[vptr]]), %[[fp:[^ ]*]]
+; X64:   movq %[[fp]], %r11
+; X64:   callq __llvm_lvi_thunk_r11
+; X64-DAG:   movq %[[obj]], %rdi
+; X64-DAG:   movq %[[fp]], %r11
+; X64:   jmp __llvm_lvi_thunk_r11 # TAILCALL
+
+; X64FAST-LABEL: vcall:
+; X64FAST:   callq __llvm_lvi_thunk_r11
+; X64FAST:   jmp __llvm_lvi_thunk_r11 # TAILCALL
+
+
+declare void @direct_callee()
+
+define void @direct_tail() #0 {
+  tail call void @direct_callee()
+  ret void
+}
+
+; X64-LABEL: direct_tail:
+; X64:   jmp direct_callee # TAILCALL
+; X64FAST-LABEL: direct_tail:
+; X64FAST:   jmp direct_callee # TAILCALL
+
+
+declare void @nonlazybind_callee() #1
+
+define void @nonlazybind_caller() #0 {
+  call void @nonlazybind_callee()
+  tail call void @nonlazybind_callee()
+  ret void
+}
+
+; X64-LABEL: nonlazybind_caller:
+; X64:   movq nonlazybind_callee@GOTPCREL(%rip), %[[REG:.*]]
+; X64:   movq %[[REG]], %r11
+; X64:   callq __llvm_lvi_thunk_r11
+; X64:   movq %[[REG]], %r11
+; X64:   jmp __llvm_lvi_thunk_r11 # TAILCALL
+; X64FAST-LABEL: nonlazybind_caller:
+; X64FAST:   movq nonlazybind_callee@GOTPCREL(%rip), %r11
+; X64FAST:   callq __llvm_lvi_thunk_r11
+; X64FAST:   movq nonlazybind_callee@GOTPCREL(%rip), %r11
+; X64FAST:   jmp __llvm_lvi_thunk_r11 # TAILCALL
+
+
+; Check that a switch gets lowered using a jump table

[PATCH] D76812: [X86] Add Indirect Thunk Support to X86 to mitigate Load Value Injection (LVI) [3/3]

2020-04-01 Thread Craig Topper via Phabricator via cfe-commits
craig.topper added inline comments.



Comment at: llvm/lib/Target/X86/X86IndirectThunks.cpp:97
+  void populateThunk(MachineFunction ) {
+// This code mitigates LVI by replacing each indirect call/jump with a 
direct
+// call/jump to a thunk that looks like:

Why don't you need the code from retpoline that erases extra BBs?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D76812/new/

https://reviews.llvm.org/D76812



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D76812: [X86] Add Indirect Thunk Support to X86 to mitigate Load Value Injection (LVI) [3/3]

2020-03-31 Thread Scott Constable via Phabricator via cfe-commits
sconstab updated this revision to Diff 253888.
sconstab added a comment.

Added a comment to the header of X86IndirectThunks.cpp to indicate support for 
LVI thunks.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D76812/new/

https://reviews.llvm.org/D76812

Files:
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/Arch/X86.cpp
  llvm/lib/Target/X86/X86.td
  llvm/lib/Target/X86/X86ISelLowering.cpp
  llvm/lib/Target/X86/X86IndirectThunks.cpp
  llvm/lib/Target/X86/X86Subtarget.h
  llvm/test/CodeGen/X86/lvi-hardening-indirectbr.ll

Index: llvm/test/CodeGen/X86/lvi-hardening-indirectbr.ll
===
--- /dev/null
+++ llvm/test/CodeGen/X86/lvi-hardening-indirectbr.ll
@@ -0,0 +1,281 @@
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown -mattr=+lvi-cfi < %s | FileCheck %s --check-prefix=X64
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown -mattr=+lvi-cfi -O0 < %s | FileCheck %s --check-prefix=X64FAST
+;
+; Note that a lot of this code was lifted from retpoline.ll.
+
+declare void @bar(i32)
+
+; Test a simple indirect call and tail call.
+define void @icall_reg(void (i32)* %fp, i32 %x) {
+entry:
+  tail call void @bar(i32 %x)
+  tail call void %fp(i32 %x)
+  tail call void @bar(i32 %x)
+  tail call void %fp(i32 %x)
+  ret void
+}
+
+; X64-LABEL: icall_reg:
+; X64-DAG:   movq %rdi, %[[fp:[^ ]*]]
+; X64-DAG:   movl %esi, %[[x:[^ ]*]]
+; X64:   movl %esi, %edi
+; X64:   callq bar
+; X64-DAG:   movl %[[x]], %edi
+; X64-DAG:   movq %[[fp]], %r11
+; X64:   callq __llvm_lvi_thunk_r11
+; X64:   movl %[[x]], %edi
+; X64:   callq bar
+; X64-DAG:   movl %[[x]], %edi
+; X64-DAG:   movq %[[fp]], %r11
+; X64:   jmp __llvm_lvi_thunk_r11 # TAILCALL
+
+; X64FAST-LABEL: icall_reg:
+; X64FAST:   callq bar
+; X64FAST:   callq __llvm_lvi_thunk_r11
+; X64FAST:   callq bar
+; X64FAST:   jmp __llvm_lvi_thunk_r11 # TAILCALL
+
+
+@global_fp = external global void (i32)*
+
+; Test an indirect call through a global variable.
+define void @icall_global_fp(i32 %x, void (i32)** %fpp) #0 {
+  %fp1 = load void (i32)*, void (i32)** @global_fp
+  call void %fp1(i32 %x)
+  %fp2 = load void (i32)*, void (i32)** @global_fp
+  tail call void %fp2(i32 %x)
+  ret void
+}
+
+; X64-LABEL: icall_global_fp:
+; X64-DAG:   movl %edi, %[[x:[^ ]*]]
+; X64-DAG:   movq global_fp(%rip), %r11
+; X64:   callq __llvm_lvi_thunk_r11
+; X64-DAG:   movl %[[x]], %edi
+; X64-DAG:   movq global_fp(%rip), %r11
+; X64:   jmp __llvm_lvi_thunk_r11 # TAILCALL
+
+; X64FAST-LABEL: icall_global_fp:
+; X64FAST:   movq global_fp(%rip), %r11
+; X64FAST:   callq __llvm_lvi_thunk_r11
+; X64FAST:   movq global_fp(%rip), %r11
+; X64FAST:   jmp __llvm_lvi_thunk_r11 # TAILCALL
+
+
+%struct.Foo = type { void (%struct.Foo*)** }
+
+; Test an indirect call through a vtable.
+define void @vcall(%struct.Foo* %obj) #0 {
+  %vptr_field = getelementptr %struct.Foo, %struct.Foo* %obj, i32 0, i32 0
+  %vptr = load void (%struct.Foo*)**, void (%struct.Foo*)*** %vptr_field
+  %vslot = getelementptr void(%struct.Foo*)*, void(%struct.Foo*)** %vptr, i32 1
+  %fp = load void(%struct.Foo*)*, void(%struct.Foo*)** %vslot
+  tail call void %fp(%struct.Foo* %obj)
+  tail call void %fp(%struct.Foo* %obj)
+  ret void
+}
+
+; X64-LABEL: vcall:
+; X64:   movq %rdi, %[[obj:[^ ]*]]
+; X64:   movq (%rdi), %[[vptr:[^ ]*]]
+; X64:   movq 8(%[[vptr]]), %[[fp:[^ ]*]]
+; X64:   movq %[[fp]], %r11
+; X64:   callq __llvm_lvi_thunk_r11
+; X64-DAG:   movq %[[obj]], %rdi
+; X64-DAG:   movq %[[fp]], %r11
+; X64:   jmp __llvm_lvi_thunk_r11 # TAILCALL
+
+; X64FAST-LABEL: vcall:
+; X64FAST:   callq __llvm_lvi_thunk_r11
+; X64FAST:   jmp __llvm_lvi_thunk_r11 # TAILCALL
+
+
+declare void @direct_callee()
+
+define void @direct_tail() #0 {
+  tail call void @direct_callee()
+  ret void
+}
+
+; X64-LABEL: direct_tail:
+; X64:   jmp direct_callee # TAILCALL
+; X64FAST-LABEL: direct_tail:
+; X64FAST:   jmp direct_callee # TAILCALL
+
+
+declare void @nonlazybind_callee() #1
+
+define void @nonlazybind_caller() #0 {
+  call void @nonlazybind_callee()
+  tail call void @nonlazybind_callee()
+  ret void
+}
+
+; X64-LABEL: nonlazybind_caller:
+; X64:   movq nonlazybind_callee@GOTPCREL(%rip), %[[REG:.*]]
+; X64:   movq %[[REG]], %r11
+; X64:   callq __llvm_lvi_thunk_r11
+; X64:   movq %[[REG]], %r11
+; X64:   jmp __llvm_lvi_thunk_r11 # TAILCALL
+; X64FAST-LABEL: nonlazybind_caller:
+; X64FAST:   movq nonlazybind_callee@GOTPCREL(%rip), %r11
+; X64FAST:   callq __llvm_lvi_thunk_r11
+; X64FAST:   movq nonlazybind_callee@GOTPCREL(%rip), %r11
+; X64FAST:   jmp __llvm_lvi_thunk_r11 # TAILCALL
+
+
+; Check that a switch gets lowered using a jump table
+define void @switch_jumptable(i32* %ptr, i64* %sink) #0 {
+; X64-LABEL: switch_jumptable:
+; X64_NOT:  jmpq *
+entry:
+  br label %header
+
+header:
+  %i = load volatile i32, i32* 

[PATCH] D76812: [X86] Add Indirect Thunk Support to X86 to mitigate Load Value Injection (LVI) [3/3]

2020-03-25 Thread Scott Constable via Phabricator via cfe-commits
sconstab created this revision.
sconstab added reviewers: craig.topper, andrew.w.kaylor, zbrid, chandlerc.
Herald added subscribers: jfb, hiraditya.
sconstab added a parent revision: D76811: [X86] Refactor X86IndirectThunks.cpp 
to Accomodate Mitigations other than Retpoline [2/3].
sconstab retitled this revision from "Add Indirect Thunk Support to X86 to 
mitigate Load Value Injection (LVI) [3/3]" to "[X86] Add Indirect Thunk Support 
to X86 to mitigate Load Value Injection (LVI) [3/3]".

This pass replaces each indirect call/jump with a direct call to a thunk that 
looks like:

  lfence
  jmpq *%r11

This ensures that if the value in register `%r11` was loaded from memory, then
the value in `%r11` is (architecturally) correct prior to the jump.
Also adds a new target feature to X86: +lvi-cfi
("cfi" meaning control-flow integrity)
The feature can be added via clang CLI using `-mlvi-cfi`.

This is an alternate implementation to https://reviews.llvm.org/D75934 That 
merges the thunk insertion functionality with the existing X86 retpoline code.


https://reviews.llvm.org/D76812

Files:
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/Arch/X86.cpp
  llvm/lib/Target/X86/X86.td
  llvm/lib/Target/X86/X86ISelLowering.cpp
  llvm/lib/Target/X86/X86IndirectThunks.cpp
  llvm/lib/Target/X86/X86Subtarget.h
  llvm/test/CodeGen/X86/lvi-hardening-indirectbr.ll

Index: llvm/test/CodeGen/X86/lvi-hardening-indirectbr.ll
===
--- /dev/null
+++ llvm/test/CodeGen/X86/lvi-hardening-indirectbr.ll
@@ -0,0 +1,281 @@
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown -mattr=+lvi-cfi < %s | FileCheck %s --check-prefix=X64
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown -mattr=+lvi-cfi -O0 < %s | FileCheck %s --check-prefix=X64FAST
+;
+; Note that a lot of this code was lifted from retpoline.ll.
+
+declare void @bar(i32)
+
+; Test a simple indirect call and tail call.
+define void @icall_reg(void (i32)* %fp, i32 %x) {
+entry:
+  tail call void @bar(i32 %x)
+  tail call void %fp(i32 %x)
+  tail call void @bar(i32 %x)
+  tail call void %fp(i32 %x)
+  ret void
+}
+
+; X64-LABEL: icall_reg:
+; X64-DAG:   movq %rdi, %[[fp:[^ ]*]]
+; X64-DAG:   movl %esi, %[[x:[^ ]*]]
+; X64:   movl %esi, %edi
+; X64:   callq bar
+; X64-DAG:   movl %[[x]], %edi
+; X64-DAG:   movq %[[fp]], %r11
+; X64:   callq __llvm_lvi_thunk_r11
+; X64:   movl %[[x]], %edi
+; X64:   callq bar
+; X64-DAG:   movl %[[x]], %edi
+; X64-DAG:   movq %[[fp]], %r11
+; X64:   jmp __llvm_lvi_thunk_r11 # TAILCALL
+
+; X64FAST-LABEL: icall_reg:
+; X64FAST:   callq bar
+; X64FAST:   callq __llvm_lvi_thunk_r11
+; X64FAST:   callq bar
+; X64FAST:   jmp __llvm_lvi_thunk_r11 # TAILCALL
+
+
+@global_fp = external global void (i32)*
+
+; Test an indirect call through a global variable.
+define void @icall_global_fp(i32 %x, void (i32)** %fpp) #0 {
+  %fp1 = load void (i32)*, void (i32)** @global_fp
+  call void %fp1(i32 %x)
+  %fp2 = load void (i32)*, void (i32)** @global_fp
+  tail call void %fp2(i32 %x)
+  ret void
+}
+
+; X64-LABEL: icall_global_fp:
+; X64-DAG:   movl %edi, %[[x:[^ ]*]]
+; X64-DAG:   movq global_fp(%rip), %r11
+; X64:   callq __llvm_lvi_thunk_r11
+; X64-DAG:   movl %[[x]], %edi
+; X64-DAG:   movq global_fp(%rip), %r11
+; X64:   jmp __llvm_lvi_thunk_r11 # TAILCALL
+
+; X64FAST-LABEL: icall_global_fp:
+; X64FAST:   movq global_fp(%rip), %r11
+; X64FAST:   callq __llvm_lvi_thunk_r11
+; X64FAST:   movq global_fp(%rip), %r11
+; X64FAST:   jmp __llvm_lvi_thunk_r11 # TAILCALL
+
+
+%struct.Foo = type { void (%struct.Foo*)** }
+
+; Test an indirect call through a vtable.
+define void @vcall(%struct.Foo* %obj) #0 {
+  %vptr_field = getelementptr %struct.Foo, %struct.Foo* %obj, i32 0, i32 0
+  %vptr = load void (%struct.Foo*)**, void (%struct.Foo*)*** %vptr_field
+  %vslot = getelementptr void(%struct.Foo*)*, void(%struct.Foo*)** %vptr, i32 1
+  %fp = load void(%struct.Foo*)*, void(%struct.Foo*)** %vslot
+  tail call void %fp(%struct.Foo* %obj)
+  tail call void %fp(%struct.Foo* %obj)
+  ret void
+}
+
+; X64-LABEL: vcall:
+; X64:   movq %rdi, %[[obj:[^ ]*]]
+; X64:   movq (%rdi), %[[vptr:[^ ]*]]
+; X64:   movq 8(%[[vptr]]), %[[fp:[^ ]*]]
+; X64:   movq %[[fp]], %r11
+; X64:   callq __llvm_lvi_thunk_r11
+; X64-DAG:   movq %[[obj]], %rdi
+; X64-DAG:   movq %[[fp]], %r11
+; X64:   jmp __llvm_lvi_thunk_r11 # TAILCALL
+
+; X64FAST-LABEL: vcall:
+; X64FAST:   callq __llvm_lvi_thunk_r11
+; X64FAST:   jmp __llvm_lvi_thunk_r11 # TAILCALL
+
+
+declare void @direct_callee()
+
+define void @direct_tail() #0 {
+  tail call void @direct_callee()
+  ret void
+}
+
+; X64-LABEL: direct_tail:
+; X64:   jmp direct_callee # TAILCALL
+; X64FAST-LABEL: direct_tail:
+; X64FAST:   jmp direct_callee # TAILCALL
+
+
+declare void @nonlazybind_callee() #1
+
+define void @nonlazybind_caller() #0 {
+  call void