> diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
> --- a/arch/x86/kernel/uprobes.c
> +++ b/arch/x86/kernel/uprobes.c

[ ... ]

> @@ -963,7 +975,7 @@ static int verify_insn(struct page *page, unsigned long 
> vaddr, uprobe_opcode_t *new_opcode,
>   *   - SMP sync all CPUs
>   */
>  static int int3_update(struct arch_uprobe *auprobe, struct vm_area_struct 
> *vma,
> -                    unsigned long vaddr, char *insn, bool optimize)
> +                    unsigned long vaddr, char *insn, int size, bool optimize)
>  {
>       uprobe_opcode_t int3 = UPROBE_SWBP_INSN;
>       struct write_opcode_ctx ctx = {
> @@ -978,7 +990,7 @@ struct write_opcode_ctx ctx = {
>        * so we can skip this step for optimize == true.
>        */
>       if (!optimize) {
> -             ctx.expect = EXPECT_CALL;
> +             ctx.expect = EXPECT_OPTIMIZED;
>               err = uprobe_write(auprobe, vma, vaddr, &int3, 1, verify_insn,
>                                  true /* is_register */, false /* 
> do_update_ref_ctr */,
>                                  &ctx);
> @@ -990,7 +1002,7 @@ struct write_opcode_ctx ctx = {
>
>       /* Write all but the first byte of the patched range. */
>       ctx.expect = EXPECT_SWBP;
> -     err = uprobe_write(auprobe, vma, vaddr + 1, insn + 1, 4, verify_insn,
> +     err = uprobe_write(auprobe, vma, vaddr + 1, insn + 1, size - 1, 
> verify_insn,

When re-optimizing from JMP8 back to LEA+CALL, does the CALL instruction at
offset 5 get rewritten without INT3 protection?

The int3_update() sequence writes INT3 at offset 0 during unoptimize
(!optimize path), then later writes bytes 1..9 while verifying offset 0
stays INT3. During re-optimization (optimize == true), the code skips the
initial INT3 write and directly writes bytes 1..9.

Consider this sequence when re-optimizing a previously unoptimized uprobe
that is currently 'jmp rel8' back to 'lea; call':

  Step 7: Current state is 0xCC 0x08 ... (INT3 followed by JMP8 offset)
  Step 8: Write bytes 1..9, transforming to 0xCC 0x8d 0x64 0x24 0x80 0xE8 ...
          The CALL opcode (0xE8) at offset 5 and its 4-byte displacement
          get written without an INT3 protecting that location.

A concurrent thread executing at vaddr could observe a partially-written CALL
instruction during the transition. Peter Zijlstra suggested placing INT3 at
both offset 0 and offset 5 to protect both the LEA and CALL instructions
during modification:

  Reference: 
https://lore.kernel.org/bpf/[email protected]/

  "Note that from step 7 to step 8, you re-write the CALL instruction
   without going through INT3. This means it is entirely possible for a
   concurrent execution to observe a composite instruction. This is NOT
   sound! However, I think it can be salvaged, if instead of only writing
   INT3 at +0, you also write INT3 at +5."

[ ... ]

> @@ -1095,14 +1122,25 @@ int set_orig_insn(struct arch_uprobe *auprobe, struct 
> vm_area_struct *vma,
>                 unsigned long vaddr)
>  {
>       if (test_bit(ARCH_UPROBE_FLAG_CAN_OPTIMIZE, &auprobe->flags)) {
> -             int ret = is_optimized(vma->vm_mm, vaddr);
> -             if (ret < 0)
> +             uprobe_opcode_t insn[OPT_INSN_SIZE];
> +             int ret;
> +
> +             ret = copy_from_vaddr(vma->vm_mm, vaddr, &insn, OPT_INSN_SIZE);
> +             if (ret)
>                       return ret;
> -             if (ret) {
> +             if (__is_optimized(vma->vm_mm, (uprobe_opcode_t *)&insn, 
> vaddr)) {
>                       ret = swbp_unoptimize(auprobe, vma, vaddr);
>                       WARN_ON_ONCE(ret);
>                       return ret;
>               }
> +             /*
> +              * We can have re-attached probe on top of jmp8 instruction,
> +              * which did not get optimized. We need to restore the jmp8
> +              * instruction, instead of the original instruction (nop10).
> +              */
> +             if (is_swbp_insn(&insn[0]) && insn[1] == OPT_JMP8_OFFSET)
> +                     return uprobe_write_opcode(auprobe, vma, vaddr, 
> JMP8_INSN_OPCODE,

This isn't a bug, but the multi-line if statement is missing braces:

                if (is_swbp_insn(&insn[0]) && insn[1] == OPT_JMP8_OFFSET)
                        return uprobe_write_opcode(auprobe, vma, vaddr, 
JMP8_INSN_OPCODE,
                                                   false /* is_register */);

Kernel coding style requires braces on any multi-line statement.

  Reference: 
https://lore.kernel.org/bpf/[email protected]/

[ ... ]

> @@ -1160,14 +1198,24 @@ void arch_uprobe_optimize(struct arch_uprobe 
> *auprobe, unsigned long vaddr)
>
>  static bool can_optimize(struct insn *insn, unsigned long vaddr)
>  {
> -     if (!insn->x86_64 || insn->length != 5)
> +     if (!insn->x86_64)
>               return false;
>
> -     if (!insn_is_nop(insn))
> +     /* We can't do cross page atomic writes yet. */
> +     if (PAGE_SIZE - (vaddr & ~PAGE_MASK) < OPT_INSN_SIZE)
>               return false;
>
> -     /* We can't do cross page atomic writes yet. */
> -     return PAGE_SIZE - (vaddr & ~PAGE_MASK) >= 5;
> +     /* We can optimize on top of nop10.. */
> +     if (insn->length == OPT_INSN_SIZE && insn_is_nop(insn))
> +             return true;
> +
> +     /* .. and JMP rel8 to end of slot — check swbp_unoptimize. */
> +     if (insn->length == 2 &&
> +         insn->opcode.bytes[0] == JMP8_INSN_OPCODE &&
> +         insn->immediate.value == OPT_JMP8_OFFSET)
> +             return true;
> +
> +     return false;
>  }

Can can_optimize() safely accept naturally occurring 'jmp +8' instructions
from user binaries?

The JMP8 branch returns true for any 2-byte 'jmp rel8' with displacement 8
(bytes 0xEB 0x08). The comment says this is for swbp_unoptimize()'s
kernel-placed jmp, but can_optimize() is called during initial analysis via
prepare_uprobe() -> arch_uprobe_analyze_insn() -> uprobe_init_insn(), before
any kernel modifications occur. The instruction bytes come directly from the
user binary through copy_insn().

If a user binary naturally contains 'EB 08' at the uprobe location (e.g., a
forward branch over an 8-byte literal pool or alignment padding), this path
would trigger:

  1. uprobe_init_insn() decodes the user's 'jmp 0x08' (length=2, opcode=0xEB,
     immediate=8).
  2. can_optimize() matches the new JMP8 branch and returns true.
  3. ARCH_UPROBE_FLAG_CAN_OPTIMIZE is set.
  4. set_swbp() writes INT3 at vaddr, replacing the 0xEB.
  5. When the breakpoint fires, swbp_optimize() -> int3_update() writes 10
     bytes (lea -0x80(%rsp),%rsp; call tramp) over vaddr..vaddr+9.
  6. verify_insn() only checks that byte 0 is INT3 (EXPECT_SWBP) before
     writing bytes 1..9. It does not verify those 8 bytes are from a prior
     optimization.
  7. The 8 bytes at vaddr+2..vaddr+9 (original user code/data following the
     jmp) get overwritten with the tail of the LEA and the CALL encoding.
  8. On unregister, swbp_unoptimize() only writes 2 bytes back. Bytes
     vaddr+2..vaddr+9 stay corrupted with call instruction remnants in the
     COW'd page.

The corruption persists even though the original jmp would skip those bytes,
because other control flow (indirect branches, function pointers, alternative
entry points) might reach that range.

Contrast with nop10: when can_optimize returns true for a 10-byte nop
(length==OPT_INSN_SIZE && insn_is_nop), writing 10 bytes only overwrites the
nop itself. The earlier nop5 code was safe because it required insn->length
== 5 and wrote exactly 5 bytes (write size matched instruction size).

The JMP8 case breaks that invariant: write size (10 bytes) exceeds
instruction size (2 bytes), so the 8 trailing bytes are arbitrary user
content. Would a length check (requiring insn->length == OPT_INSN_SIZE for
the JMP8 case) or tracking optimization state via a flag prevent optimizing
user instructions that happen to match the JMP8 pattern?


---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md

CI run summary: https://github.com/kernel-patches/bpf/actions/runs/26030080109

Reply via email to