https://github.com/atrosinenko updated https://github.com/llvm/llvm-project/pull/132540
>From de28401e6c4f68117f0b71f2b08c3c065b286f62 Mon Sep 17 00:00:00 2001 From: Anatoly Trosinenko <atrosine...@accesssoftek.com> Date: Thu, 20 Mar 2025 20:15:07 +0300 Subject: [PATCH] [BOLT] Gadget scanner: Detect address materialization and arithmetics In addition to authenticated pointers, consider the contents of a register safe if it was * written by PC-relative address computation * updated by an arithmetic instruction whose input address is safe --- bolt/include/bolt/Core/MCPlusBuilder.h | 16 ++ bolt/lib/Passes/PAuthGadgetScanner.cpp | 92 +++++-- .../Target/AArch64/AArch64MCPlusBuilder.cpp | 30 +++ .../AArch64/gs-pacret-autiasp.s | 15 -- .../gs-pauth-address-materialization.s | 228 ++++++++++++++++++ .../binary-analysis/AArch64/lit.local.cfg | 3 +- 6 files changed, 345 insertions(+), 39 deletions(-) create mode 100644 bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index 8b6dc14121480..e94f82d00349a 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -587,6 +587,22 @@ class MCPlusBuilder { return getNoRegister(); } + virtual MCPhysReg getSafelyMaterializedAddressReg(const MCInst &Inst) const { + llvm_unreachable("not implemented"); + return getNoRegister(); + } + + /// Analyzes if this instruction can safely perform address arithmetics. + /// + /// If the first element of the returned pair is no-register, this instruction + /// is considered unknown. Otherwise, (output, input) pair is returned, + /// so that output is as trusted as input is. + virtual std::pair<MCPhysReg, MCPhysReg> + analyzeSafeAddressArithmetics(const MCInst &Inst) const { + llvm_unreachable("not implemented"); + return std::make_pair(getNoRegister(), getNoRegister()); + } + virtual bool isTerminator(const MCInst &Inst) const; virtual bool isNoop(const MCInst &Inst) const { diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp index dcc7d93183900..33f925070290e 100644 --- a/bolt/lib/Passes/PAuthGadgetScanner.cpp +++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp @@ -335,6 +335,50 @@ class PacRetAnalysis }); } + BitVector getClobberedRegs(const MCInst &Point) const { + BitVector Clobbered(NumRegs, false); + // Assume a call can clobber all registers, including callee-saved + // registers. There's a good chance that callee-saved registers will be + // saved on the stack at some point during execution of the callee. + // Therefore they should also be considered as potentially modified by an + // attacker/written to. + // Also, not all functions may respect the AAPCS ABI rules about + // caller/callee-saved registers. + if (BC.MIB->isCall(Point)) + Clobbered.set(); + else + BC.MIB->getClobberedRegs(Point, Clobbered); + return Clobbered; + } + + // Returns all registers that can be treated as if they are written by an + // authentication instruction. + SmallVector<MCPhysReg> getAuthenticatedRegs(const MCInst &Point, + const State &Cur) const { + SmallVector<MCPhysReg> Regs; + const MCPhysReg NoReg = BC.MIB->getNoRegister(); + + // A signed pointer can be authenticated, or + ErrorOr<MCPhysReg> AutReg = BC.MIB->getAuthenticatedReg(Point); + if (AutReg && *AutReg != NoReg) + Regs.push_back(*AutReg); + + // ... a safe address can be materialized, or + MCPhysReg NewAddrReg = BC.MIB->getSafelyMaterializedAddressReg(Point); + if (NewAddrReg != NoReg) + Regs.push_back(NewAddrReg); + + // ... an address can be updated in a safe manner, producing the result + // which is as trusted as the input address. + MCPhysReg ArithResult, ArithSrc; + std::tie(ArithResult, ArithSrc) = + BC.MIB->analyzeSafeAddressArithmetics(Point); + if (ArithResult != NoReg && Cur.SafeToDerefRegs[ArithSrc]) + Regs.push_back(ArithResult); + + return Regs; + } + State computeNext(const MCInst &Point, const State &Cur) { PacStatePrinter P(BC); LLVM_DEBUG({ @@ -355,19 +399,20 @@ class PacRetAnalysis return State(); } + // First, compute various properties of the instruction, taking the state + // before its execution into account, if necessary. + + BitVector Clobbered = getClobberedRegs(Point); + // Compute the set of registers that can be considered as written by + // an authentication instruction. This includes operations that are + // *strictly better* than authentication, such as materializing a + // PC-relative constant. + SmallVector<MCPhysReg> AuthenticatedOrBetter = + getAuthenticatedRegs(Point, Cur); + + // Then, compute the state after this instruction is executed. State Next = Cur; - BitVector Clobbered(NumRegs, false); - // Assume a call can clobber all registers, including callee-saved - // registers. There's a good chance that callee-saved registers will be - // saved on the stack at some point during execution of the callee. - // Therefore they should also be considered as potentially modified by an - // attacker/written to. - // Also, not all functions may respect the AAPCS ABI rules about - // caller/callee-saved registers. - if (BC.MIB->isCall(Point)) - Clobbered.set(); - else - BC.MIB->getClobberedRegs(Point, Clobbered); + Next.SafeToDerefRegs.reset(Clobbered); // Keep track of this instruction if it writes to any of the registers we // need to track that for: @@ -375,17 +420,18 @@ class PacRetAnalysis if (Clobbered[Reg]) lastWritingInsts(Next, Reg) = {&Point}; - ErrorOr<MCPhysReg> AutReg = BC.MIB->getAuthenticatedReg(Point); - if (AutReg && *AutReg != BC.MIB->getNoRegister()) { - // The sub-registers of *AutReg are also trusted now, but not its - // super-registers (as they retain untrusted register units). - BitVector AuthenticatedSubregs = - BC.MIB->getAliases(*AutReg, /*OnlySmaller=*/true); - for (MCPhysReg Reg : AuthenticatedSubregs.set_bits()) { - Next.SafeToDerefRegs.set(Reg); - if (RegsToTrackInstsFor.isTracked(Reg)) - lastWritingInsts(Next, Reg).clear(); - } + // After accounting for clobbered registers in general, override the state + // according to authentication and other *special cases* of clobbering. + + // The sub-registers of each authenticated register are also trusted now, + // but not their super-registers (as they retain untrusted register units). + BitVector AuthenticatedSubregs(NumRegs); + for (MCPhysReg AutReg : AuthenticatedOrBetter) + AuthenticatedSubregs |= BC.MIB->getAliases(AutReg, /*OnlySmaller=*/true); + for (MCPhysReg Reg : AuthenticatedSubregs.set_bits()) { + Next.SafeToDerefRegs.set(Reg); + if (RegsToTrackInstsFor.isTracked(Reg)) + lastWritingInsts(Next, Reg).clear(); } LLVM_DEBUG({ diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp index 9ce1514639f95..d5e92fb47a39b 100644 --- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -319,6 +319,36 @@ class AArch64MCPlusBuilder : public MCPlusBuilder { } } + MCPhysReg getSafelyMaterializedAddressReg(const MCInst &Inst) const override { + switch (Inst.getOpcode()) { + case AArch64::ADR: + case AArch64::ADRP: + return Inst.getOperand(0).getReg(); + default: + return getNoRegister(); + } + } + + std::pair<MCPhysReg, MCPhysReg> + analyzeSafeAddressArithmetics(const MCInst &Inst) const override { + switch (Inst.getOpcode()) { + default: + return std::make_pair(getNoRegister(), getNoRegister()); + case AArch64::ADDXri: + case AArch64::SUBXri: + return std::make_pair(Inst.getOperand(0).getReg(), + Inst.getOperand(1).getReg()); + case AArch64::ORRXrs: + // "mov Xd, Xm" is equivalent to "orr Xd, XZR, Xm, lsl #0" + if (Inst.getOperand(1).getReg() != AArch64::XZR || + Inst.getOperand(3).getImm() != 0) + return std::make_pair(getNoRegister(), getNoRegister()); + + return std::make_pair(Inst.getOperand(0).getReg(), + Inst.getOperand(2).getReg()); + } + } + bool isADRP(const MCInst &Inst) const override { return Inst.getOpcode() == AArch64::ADRP; } diff --git a/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s b/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s index 01b7cec3272e6..d506ec13f4895 100644 --- a/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s +++ b/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s @@ -141,24 +141,9 @@ f_nonx30_ret_ok: stp x29, x30, [sp, #-16]! mov x29, sp bl g - add x0, x0, #3 ldp x29, x30, [sp], #16 - // FIXME: Should the scanner understand that an authenticated register (below x30, - // after the autiasp instruction), is OK to be moved to another register - // and then that register being used to return? - // This respects that pac-ret hardening intent, but the scanner currently - // will produce a false positive for this. - // Is it worthwhile to make the scanner more complex for this case? - // So far, scanning many millions of instructions across a linux distro, - // I haven't encountered such an example. - // The ".if 0" block below tests this case and currently fails. -.if 0 autiasp mov x16, x30 -.else - mov x16, x30 - autia x16, sp -.endif // CHECK-NOT: function f_nonx30_ret_ok ret x16 .size f_nonx30_ret_ok, .-f_nonx30_ret_ok diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s b/bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s new file mode 100644 index 0000000000000..b7559f15d6bb9 --- /dev/null +++ b/bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s @@ -0,0 +1,228 @@ +// RUN: %clang %cflags -march=armv8.3-a %s -o %t.exe +// RUN: llvm-bolt-binary-analysis --scanners=pacret %t.exe 2>&1 | FileCheck %s + +// Test various patterns that should or should not be considered safe +// materialization of PC-relative addresses. +// +// Note that while "instructions that write to the affected registers" +// section of the report is still technically correct, it does not necessarily +// mentions the instructions that are used incorrectly. +// +// FIXME: Switch to PAC* instructions instead of indirect tail call for testing +// if a register is considered safe when detection of signing oracles is +// implemented, as it is more traditional usage of PC-relative constants. +// Moreover, using PAC instructions would improve test robustness, as +// handling of *calls* can be influenced by what BOLT classifies as a +// tail call, for example. + + .text + +// Define a function that is reachable by ADR instruction. + .type sym,@function +sym: + ret + .size sym, .-sym + + .globl good_adr + .type good_adr,@function +good_adr: +// CHECK-NOT: good_adr + adr x0, sym + br x0 + .size good_adr, .-good_adr + + .globl good_adrp + .type good_adrp,@function +good_adrp: +// CHECK-NOT: good_adrp + adrp x0, sym + br x0 + .size good_adrp, .-good_adrp + + .globl good_adrp_add + .type good_adrp_add,@function +good_adrp_add: +// CHECK-NOT: good_adrp_add + adrp x0, sym + add x0, x0, :lo12:sym + br x0 + .size good_adrp_add, .-good_adrp_add + + .globl good_adrp_add_with_const_offset + .type good_adrp_add_with_const_offset,@function +good_adrp_add_with_const_offset: +// CHECK-NOT: good_adrp_add_with_const_offset + adrp x0, sym + add x0, x0, :lo12:sym + add x0, x0, #8 + br x0 + .size good_adrp_add_with_const_offset, .-good_adrp_add_with_const_offset + + .globl bad_adrp_with_nonconst_offset + .type bad_adrp_with_nonconst_offset,@function +bad_adrp_with_nonconst_offset: +// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_adrp_with_nonconst_offset, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x0 # TAILCALL +// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are: +// CHECK-NEXT: 1. {{[0-9a-f]+}}: add x0, x0, x1 +// CHECK-NEXT: This happens in the following basic block: +// CHECK-NEXT: {{[0-9a-f]+}}: adrp x0, #{{.*}} +// CHECK-NEXT: {{[0-9a-f]+}}: add x0, x0, x1 +// CHECK-NEXT: {{[0-9a-f]+}}: br x0 # TAILCALL + adrp x0, sym + add x0, x0, x1 + br x0 + .size bad_adrp_with_nonconst_offset, .-bad_adrp_with_nonconst_offset + + .globl bad_split_adrp + .type bad_split_adrp,@function +bad_split_adrp: +// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_split_adrp, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x0 # UNKNOWN CONTROL FLOW +// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are: +// CHECK-NEXT: 1. {{[0-9a-f]+}}: add x0, x0, #0x{{[0-9a-f]+}} +// CHECK-NEXT: This happens in the following basic block: +// CHECK-NEXT: {{[0-9a-f]+}}: add x0, x0, #0x{{[0-9a-f]+}} +// CHECK-NEXT: {{[0-9a-f]+}}: br x0 # UNKNOWN CONTROL FLOW + cbz x2, 1f + adrp x0, sym +1: + add x0, x0, :lo12:sym + br x0 + .size bad_split_adrp, .-bad_split_adrp + +// Materialization of absolute addresses is not expected. + + .globl bad_immediate_constant + .type bad_immediate_constant,@function +bad_immediate_constant: +// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_immediate_constant, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x0 # TAILCALL +// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are: +// CHECK-NEXT: 1. {{[0-9a-f]+}}: mov x0, #{{.*}} +// CHECK-NEXT: This happens in the following basic block: +// CHECK-NEXT: {{[0-9a-f]+}}: mov x0, #{{.*}} +// CHECK-NEXT: {{[0-9a-f]+}}: br x0 # TAILCALL + movz x0, #1234 + br x0 + .size bad_immediate_constant, .-bad_immediate_constant + +// Any ADR or ADRP instruction followed by any number of increments/decrements +// by constant is considered safe. + + .globl good_adr_with_add + .type good_adr_with_add,@function +good_adr_with_add: +// CHECK-NOT: good_adr_with_add + adr x0, sym + add x0, x0, :lo12:sym + br x0 + .size good_adr_with_add, .-good_adr_with_add + + .globl good_adrp_with_add_non_consecutive + .type good_adrp_with_add_non_consecutive,@function +good_adrp_with_add_non_consecutive: +// CHECK-NOT: good_adrp_with_add_non_consecutive + adrp x0, sym + mul x1, x2, x3 + add x0, x0, :lo12:sym + br x0 + .size good_adrp_with_add_non_consecutive, .-good_adrp_with_add_non_consecutive + + .globl good_many_offsets + .type good_many_offsets,@function +good_many_offsets: +// CHECK-NOT: good_many_offsets + adrp x0, sym + add x1, x0, #8 + add x2, x1, :lo12:sym + br x2 + .size good_many_offsets, .-good_many_offsets + +// MOV Xd, Xm (which is an alias of ORR Xd, XZR, Xm) is handled as part of +// support for address arithmetics, but ORR in general is not. + + .globl good_mov_reg + .type good_mov_reg,@function +good_mov_reg: +// CHECK-NOT: good_mov_reg + adrp x0, sym + mov x1, x0 + orr x2, xzr, x1 // the same as "mov x2, x1" + br x2 + .size good_mov_reg, .-good_mov_reg + + .globl bad_orr_not_xzr + .type bad_orr_not_xzr,@function +bad_orr_not_xzr: +// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_orr_not_xzr, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x2 # TAILCALL +// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are: +// CHECK-NEXT: 1. {{[0-9a-f]+}}: orr x2, x1, x0 +// CHECK-NEXT: This happens in the following basic block: +// CHECK-NEXT: {{[0-9a-f]+}}: adrp x0, #{{(0x)?[0-9a-f]+}} +// CHECK-NEXT: {{[0-9a-f]+}}: mov x1, #0 +// CHECK-NEXT: {{[0-9a-f]+}}: orr x2, x1, x0 +// CHECK-NEXT: {{[0-9a-f]+}}: br x2 # TAILCALL + adrp x0, sym + movz x1, #0 + orr x2, x1, x0 + br x2 + .size bad_orr_not_xzr, .-bad_orr_not_xzr + + .globl bad_orr_not_lsl0 + .type bad_orr_not_lsl0,@function +bad_orr_not_lsl0: +// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_orr_not_lsl0, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x2 # TAILCALL +// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are: +// CHECK-NEXT: 1. {{[0-9a-f]+}}: orr x2, xzr, x0, lsl #1 +// CHECK-NEXT: This happens in the following basic block: +// CHECK-NEXT: {{[0-9a-f]+}}: adrp x0, #{{(0x)?[0-9a-f]+}} +// CHECK-NEXT: {{[0-9a-f]+}}: orr x2, xzr, x0, lsl #1 +// CHECK-NEXT: {{[0-9a-f]+}}: br x2 # TAILCALL + adrp x0, sym + orr x2, xzr, x0, lsl #1 + br x2 + .size bad_orr_not_lsl0, .-bad_orr_not_lsl0 + +// Check that the input register operands of `add`/`mov` is correct. + + .globl bad_add_input_reg + .type bad_add_input_reg,@function +bad_add_input_reg: +// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_add_input_reg, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x0 # TAILCALL +// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are: +// CHECK-NEXT: 1. {{[0-9a-f]+}}: add x0, x1, #0x{{[0-9a-f]+}} +// CHECK-NEXT: This happens in the following basic block: +// CHECK-NEXT: {{[0-9a-f]+}}: adrp x0, #{{(0x)?[0-9a-f]+}} +// CHECK-NEXT: {{[0-9a-f]+}}: add x0, x1, #0x{{[0-9a-f]+}} +// CHECK-NEXT: {{[0-9a-f]+}}: br x0 # TAILCALL + adrp x0, sym + add x0, x1, :lo12:sym + br x0 + .size bad_add_input_reg, .-bad_add_input_reg + + .globl bad_mov_input_reg + .type bad_mov_input_reg,@function +bad_mov_input_reg: +// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_mov_input_reg, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x0 # TAILCALL +// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are: +// CHECK-NEXT: 1. {{[0-9a-f]+}}: mov x0, x1 +// CHECK-NEXT: This happens in the following basic block: +// CHECK-NEXT: {{[0-9a-f]+}}: adrp x0, #{{(0x)?[0-9a-f]+}} +// CHECK-NEXT: {{[0-9a-f]+}}: mov x0, x1 +// CHECK-NEXT: {{[0-9a-f]+}}: br x0 # TAILCALL + adrp x0, sym + mov x0, x1 + br x0 + .size bad_mov_input_reg, .-bad_mov_input_reg + + .globl main + .type main,@function +main: + mov x0, 0 + ret + .size main, .-main diff --git a/bolt/test/binary-analysis/AArch64/lit.local.cfg b/bolt/test/binary-analysis/AArch64/lit.local.cfg index 6ac7d3cc0fec7..54e093566dea9 100644 --- a/bolt/test/binary-analysis/AArch64/lit.local.cfg +++ b/bolt/test/binary-analysis/AArch64/lit.local.cfg @@ -1,7 +1,8 @@ if "AArch64" not in config.root.targets: config.unsupported = True -flags = "--target=aarch64-linux-gnu -nostartfiles -nostdlib -ffreestanding" +# -Wl,--no-relax prevents converting ADRP+ADD pairs into NOP+ADR. +flags = "--target=aarch64-linux-gnu -nostartfiles -nostdlib -ffreestanding -Wl,--no-relax" config.substitutions.insert(0, ("%cflags", f"%cflags {flags}")) config.substitutions.insert(0, ("%cxxflags", f"%cxxflags {flags}")) _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits