Implement x86_64-specific KCFI backend:

- Function preamble generation with type IDs positioned at -(4+prefix_nops)
  offset from function entry point.

- 16-byte alignment of KCFI preambles using calculated prefix NOPs:
  aligned(prefix_nops + 5, 16) to maintain cache lines.

- Type-id hash avoids generating ENDBR instruction in type IDs
  (0xfa1e0ff3/0xfb1e0ff3 are incremented by 1 to prevent execution).

- On-demand scratch register allocation strategy (r11 as needed).
  The clobbers are available both early and late.

- Atomic bundled KCFI check + call/branch sequences using UNSPECV_KCFI
  to prevent optimizer separation and maintain security properties.

- Uses the .kcfi_traps section for debugger/runtime metadata.

Assembly Code Pattern layout required by Linux kernel:
  movl $inverse_type_id, %r10d  ; Load expected type (0 - hash)
  addl offset(%target), %r10d   ; Add stored type ID from preamble
  je .Lpass                     ; Branch if types match (sum == 0)
  .Ltrap: ud2                   ; Undefined instruction trap on mismatch
  .Lpass: call/jmp *%target     ; Execute validated indirect transfer

The initialization of the kcfi callbacks in ix86_option_override()
seems like a hack. I couldn't find a better place to do this.

Build and run tested on x86_64 Linux kernel with various CPU errata
handling alternatives and FineIBT.

Signed-off-by: Kees Cook <k...@kernel.org>
---
 gcc/config/i386/i386-protos.h   |   4 +
 gcc/config/i386/i386-options.cc |   3 +
 gcc/config/i386/i386.cc         | 128 ++++++++++++++++++++++++++++
 gcc/config/i386/i386.md         | 144 ++++++++++++++++++++++++++++++++
 gcc/doc/invoke.texi             |  20 +++++
 5 files changed, 299 insertions(+)

diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 69bc0ee570dd..a5209077506c 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -36,6 +36,10 @@ extern void ix86_maybe_emit_epilogue_vzeroupper (void);
 extern void ix86_expand_epilogue (int);
 extern void ix86_expand_split_stack_prologue (void);
 
+/* KCFI support.  */
+extern void ix86_kcfi_init (void);
+extern void kcfi_emit_trap_with_section (FILE *file, rtx trap_label_rtx);
+
 extern void ix86_output_addr_vec_elt (FILE *, int);
 extern void ix86_output_addr_diff_elt (FILE *, int, int);
 
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index 09a35ef62980..f7726c3fdd8f 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -3180,6 +3180,9 @@ void
 ix86_option_override (void)
 {
   ix86_option_override_internal (true, &global_options, &global_options_set);
+
+  /* Initialize KCFI target hooks for x86-64.  */
+  ix86_kcfi_init ();
 }
 
 /* Remember the last target of ix86_set_current_function.  */
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 65e04d3760d5..1cecd6be2f57 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -98,6 +98,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "i386-builtins.h"
 #include "i386-expand.h"
 #include "i386-features.h"
+#include "kcfi.h"
 #include "function-abi.h"
 #include "rtl-error.h"
 #include "gimple-pretty-print.h"
@@ -1700,6 +1701,19 @@ ix86_function_naked (const_tree fn)
   return false;
 }
 
+/* Apply x86-64 specific masking to KCFI type ID.  */
+static uint32_t
+ix86_kcfi_mask_type_id (uint32_t type_id)
+{
+  /* Avoid embedding ENDBR instructions in KCFI type IDs.
+     ENDBR64: 0xfa1e0ff3, ENDBR32: 0xfb1e0ff3
+     If the type ID matches either instruction encoding, increment by 1.  */
+  if (type_id == 0xfa1e0ff3U || type_id == 0xfb1e0ff3U)
+    return type_id + 1;
+
+  return type_id;
+}
+
 /* Write the extra assembler code needed to declare a function properly.  */
 
 void
@@ -1711,6 +1725,9 @@ ix86_asm_output_function_label (FILE *out_file, const 
char *fname,
   if (cfun)
     cfun->machine->function_label_emitted = true;
 
+  /* Handle KCFI preamble for non-patchable functions.  */
+  kcfi_emit_preamble_if_needed (out_file, decl, false, 0, fname);
+
   if (is_ms_hook)
     {
       int i, filler_count = (TARGET_64BIT ? 32 : 16);
@@ -28456,6 +28473,117 @@ ix86_set_handled_components (sbitmap components)
       }
 }
 
+/* Generate KCFI checked call - replaces indirect call with bundled KCFI check 
+ call.  */
+static rtx
+ix86_kcfi_gen_checked_call (rtx call_insn, rtx target_reg, uint32_t type_id, 
HOST_WIDE_INT prefix_nops)
+{
+  rtx inverse_type_id_rtx, offset_rtx, pass_label, trap_label, call_args;
+  bool is_sibcall = false;
+
+  /* Check if this is a sibling call (tail call) */
+  if (CALL_P (call_insn))
+    is_sibcall = SIBLING_CALL_P (call_insn);
+
+  /* Convert type ID to inverse for the check (0 - hash) */
+  uint32_t inverse_type_id = (uint32_t)(0 - type_id);
+  inverse_type_id_rtx = gen_int_mode (inverse_type_id, SImode);
+
+  /* Calculate variable offset: -(4 + prefix_nops) */
+  HOST_WIDE_INT offset = -(4 + prefix_nops);
+  offset_rtx = gen_int_mode (offset, DImode);
+
+  /* Generate unique labels for this check.  */
+  pass_label = gen_label_rtx ();
+  trap_label = gen_label_rtx ();
+
+  /* Extract call arguments from original call insn.  */
+  rtx pattern = PATTERN (call_insn);
+  if (GET_CODE (pattern) == CALL)
+    call_args = XEXP (pattern, 1);
+  else if (GET_CODE (pattern) == SET && GET_CODE (SET_SRC (pattern)) == CALL)
+    call_args = XEXP (SET_SRC (pattern), 1);
+  else if (GET_CODE (pattern) == PARALLEL)
+    {
+      /* Handle PARALLEL patterns (includes peephole2 optimizations and other 
legitimate cases) */
+      is_sibcall = true;  /* PARALLEL indicates a sibling call.  */
+      rtx first_elem = XVECEXP (pattern, 0, 0);
+      if (GET_CODE (first_elem) == CALL)
+       {
+         call_args = XEXP (first_elem, 1);
+       }
+      else if (GET_CODE (first_elem) == SET && GET_CODE (SET_SRC (first_elem)) 
== CALL)
+       {
+         call_args = XEXP (SET_SRC (first_elem), 1);
+       }
+      else
+       {
+         error ("KCFI: Unexpected PARALLEL pattern structure");
+         gcc_unreachable ();
+       }
+    }
+  else
+    {
+      /* This should never happen - all indirect calls should match one of the 
above patterns.  */
+      error ("KCFI: Unexpected call pattern structure");
+      gcc_unreachable ();
+    }
+
+  rtx bundled_call;
+  if (is_sibcall)
+    {
+      /* Use sibling call pattern for tail calls.  */
+      bundled_call = gen_kcfi_checked_sibcall (target_reg, call_args, 
inverse_type_id_rtx, offset_rtx, pass_label, trap_label);
+    }
+  else
+    {
+      /* Use regular call pattern.  */
+      bundled_call = gen_kcfi_checked_call (target_reg, call_args, 
inverse_type_id_rtx, offset_rtx, pass_label, trap_label);
+    }
+
+  return bundled_call;
+}
+
+/* Calculate x86_64-specific KCFI prefix NOPs for 16-byte alignment.  */
+static int
+ix86_kcfi_calculate_prefix_nops (HOST_WIDE_INT prefix_nops)
+{
+  /* Calculate KCFI NOPs needed: aligned(prefix_nops + 5, 16).  */
+  return (16 - ((prefix_nops + 5) % 16)) % 16;
+}
+
+/* Emit x86_64-specific type ID instruction.  */
+static void
+ix86_kcfi_emit_type_id_instruction (FILE *file, uint32_t type_id)
+{
+  /* Emit movl instruction with type ID.  */
+  fprintf (file, "\tmovl\t$0x%08x, %%eax\n", type_id);
+}
+
+/* Add x86-64 specific register clobbers for KCFI calls.  */
+static void
+ix86_kcfi_add_clobbers (rtx_insn *call_insn)
+{
+  /* Add r10/r11 clobbers so register allocator knows they'll be used.  */
+  rtx usage = CALL_INSN_FUNCTION_USAGE (call_insn);
+  clobber_reg (&usage, gen_rtx_REG (DImode, R10_REG));
+  clobber_reg (&usage, gen_rtx_REG (DImode, R11_REG));
+  CALL_INSN_FUNCTION_USAGE (call_insn) = usage;
+}
+
+/* Initialize x86-64 KCFI target hooks.  */
+void
+ix86_kcfi_init (void)
+{
+  if (TARGET_64BIT && (flag_sanitize & SANITIZE_KCFI))
+    {
+      kcfi_target.mask_type_id = ix86_kcfi_mask_type_id;
+      kcfi_target.gen_kcfi_checked_call = ix86_kcfi_gen_checked_call;
+      kcfi_target.add_kcfi_clobbers = ix86_kcfi_add_clobbers;
+      kcfi_target.calculate_prefix_nops = ix86_kcfi_calculate_prefix_nops;
+      kcfi_target.emit_type_id_instruction = 
ix86_kcfi_emit_type_id_instruction;
+    }
+}
+
 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS ix86_get_separate_components
 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index a50475bdaf4c..acefc2246537 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -248,6 +248,7 @@
   UNSPECV_RDGSBASE
   UNSPECV_WRFSBASE
   UNSPECV_WRGSBASE
+  UNSPECV_KCFI
   UNSPECV_FXSAVE
   UNSPECV_FXRSTOR
   UNSPECV_FXSAVE64
@@ -30582,6 +30583,149 @@
    (set_attr "type" "other")
    (set_attr "mode" "<MODE>")])
 
+;; KCFI checked call - atomic KCFI check + indirect call bundle
+;; This prevents optimizer from separating KCFI checks from their protected 
calls
+(define_insn "kcfi_checked_call"
+  [(call (mem:QI (match_operand:DI 0 "nonimmediate_operand" "rm"))
+         (match_operand 1))
+   (unspec_volatile [(match_operand:SI 2 "const_int_operand" "n")
+                     (match_operand:DI 3 "const_int_operand" "n")
+                     (label_ref (match_operand 4))
+                     (label_ref (match_operand 5))] UNSPECV_KCFI)
+   (clobber (reg:SI R10_REG))
+   (clobber (reg:SI R11_REG))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && !SIBLING_CALL_P (insn)"
+{
+  rtx target_reg;
+  bool need_r11 = false;
+
+  /* If target is not in a register, move it to r11.  */
+  if (!REG_P (operands[0]))
+    {
+      target_reg = gen_rtx_REG (DImode, R11_REG);
+      /* Emit the move to r11.  */
+      rtx mov_to_r11[2] = { target_reg, operands[0] };
+      output_asm_insn ("movq\t%1, %0", mov_to_r11);
+      need_r11 = true;
+    }
+  else
+    {
+      target_reg = operands[0];
+    }
+
+  /* Choose scratch register: r10 by default, r11 if r10 is the target.  */
+  bool target_is_r10 = (REG_P (target_reg) && REGNO (target_reg) == R10_REG);
+  int scratch_reg = target_is_r10 ? R11_REG : R10_REG;
+  const char *scratch_name = target_is_r10 ? "r11d" : "r10d";
+
+  /* Output complete KCFI check + call sequence atomically.  */
+  char mov_insn[64];
+  sprintf (mov_insn, "movl\t$%%c2, %%%%%s", scratch_name);
+  output_asm_insn (mov_insn, operands);
+
+  /* Create memory operand for the addl instruction.  */
+  rtx mem_op = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, target_reg, 
operands[3]));
+  rtx temp_operands[2] = { mem_op, gen_rtx_REG (SImode, scratch_reg) };
+  output_asm_insn ("addl\t%0, %1", temp_operands);
+
+  output_asm_insn ("je\t%l4", operands);
+
+  /* Output trap label and instruction.  */
+  output_asm_insn ("%l5:", operands);
+  output_asm_insn ("ud2", operands);
+
+  /* Use existing function with trap and entry label RTX.  */
+  kcfi_emit_trap_with_section (asm_out_file, operands[5]);
+
+  /* Output pass label.  */
+  output_asm_insn ("%l4:", operands);
+
+  /* Finally emit the protected call using the register we chose.  */
+  if (need_r11)
+    {
+      rtx r11_operand = gen_rtx_REG (DImode, R11_REG);
+      output_asm_insn ("call\t*%0", &r11_operand);
+      return "";
+    }
+  else
+    return "call\t*%0";
+}
+  [(set_attr "type" "call")
+   (set_attr "mode" "DI")])
+
+;; KCFI checked sibling call - atomic KCFI check + indirect sibling call bundle
+;; This handles tail call optimization cases
+(define_insn "kcfi_checked_sibcall"
+  [(call (mem:QI (match_operand:DI 0 "nonimmediate_operand" "rm"))
+         (match_operand 1))
+   (unspec_volatile [(match_operand:SI 2 "const_int_operand" "n")
+                     (match_operand:DI 3 "const_int_operand" "n")
+                     (label_ref (match_operand 4))
+                     (label_ref (match_operand 5))] UNSPECV_KCFI)
+   (clobber (reg:SI R10_REG))
+   (clobber (reg:SI R11_REG))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && SIBLING_CALL_P (insn)"
+{
+  rtx target_reg;
+  bool need_r11 = false;
+
+  /* If target is not in a register, move it to r11.  */
+  if (!REG_P (operands[0]))
+    {
+      target_reg = gen_rtx_REG (DImode, R11_REG);
+      /* Emit the move to r11.  */
+      rtx mov_to_r11[2] = { target_reg, operands[0] };
+      output_asm_insn ("movq\t%1, %0", mov_to_r11);
+      need_r11 = true;
+    }
+  else
+    {
+      target_reg = operands[0];
+    }
+
+  /* Choose scratch register: r10 by default, r11 if r10 is the target.  */
+  bool target_is_r10 = (REG_P (target_reg) && REGNO (target_reg) == R10_REG);
+  int scratch_reg = target_is_r10 ? R11_REG : R10_REG;
+  const char *scratch_name = target_is_r10 ? "r11d" : "r10d";
+
+  /* Output complete KCFI check + sibling call sequence atomically.  */
+  char mov_insn[64];
+  sprintf (mov_insn, "movl\t$%%c2, %%%%%s", scratch_name);
+  output_asm_insn (mov_insn, operands);
+
+  /* Create memory operand for the addl instruction.  */
+  rtx mem_op = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, target_reg, 
operands[3]));
+  rtx temp_operands[2] = { mem_op, gen_rtx_REG (SImode, scratch_reg) };
+  output_asm_insn ("addl\t%0, %1", temp_operands);
+
+  output_asm_insn ("je\t%l4", operands);
+
+  /* Output trap label and instruction.  */
+  output_asm_insn ("%l5:", operands);
+  output_asm_insn ("ud2", operands);
+
+  /* Use existing function with trap and entry label RTX.  */
+  kcfi_emit_trap_with_section (asm_out_file, operands[5]);
+
+  /* Output pass label.  */
+  output_asm_insn ("%l4:", operands);
+
+  /* Finally emit the protected sibling call (jmp) using the register we 
chose.  */
+  if (need_r11)
+    {
+      rtx r11_operand = gen_rtx_REG (DImode, R11_REG);
+      output_asm_insn ("jmp\t*%0", &r11_operand);
+      return "";
+    }
+  else
+    return "jmp\t*%0";
+}
+  [(set_attr "type" "call")
+   (set_attr "mode" "DI")])
+
+
 (include "mmx.md")
 (include "sse.md")
 (include "sync.md")
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index c66f47336826..f531a9f6ce33 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -18316,6 +18316,26 @@ and without changing the entry points of the target 
functions. Only
 functions that have referenced by their address receive the KCFI preamble
 instrumentation.
 
+Platform-specific implementation details:
+
+On x86_64, KCFI type identifiers are emitted as a @code{movl $ID, %eax}
+instruction before the function entry.  The implementation ensures that
+type IDs never collide with ENDBR instruction encodings.  When used with
+@option{-fpatchable-function-entry}, the type identifier is placed before
+any patchable NOPs, with appropriate alignment to maintain a 16-byte
+boundary for the function entry.  The runtime check loads the type ID
+from the target function into @code{%r10d} and uses an @code{addl}
+instruction to add the negative expected type ID, effectively zeroing
+the register if the types match.  A conditional jump follows to either
+continue execution or trap on mismatch.  The check sequence uses
+@code{%r10d} and @code{%r11d} as scratch registers.  Trap locations are
+recorded in a special @code{.kcfi_traps} section that maps trap sites
+to their corresponding function entry points, enabling debuggers and
+crash handlers to identify KCFI violations.  The exact instruction
+sequences for both the KCFI preamble and the check-call bundle are
+considered ABI, as the Linux kernel may optionally rewrite these areas
+at boot time to mitigate detected CPU errata.
+
 KCFI is intended primarily for kernel code and may not be suitable
 for user-space applications that rely on techniques incompatible
 with strict type checking of indirect calls.
-- 
2.34.1

Reply via email to