[PATCH] io_uring: check ctx->sq_data before io_sq_offload_start
syzkaller identified KASAN: null-ptr-deref Read in io_uring_create bug on the stable 5.11-y tree. BUG: KASAN: null-ptr-deref in io_sq_offload_start fs/io_uring.c:8254 [inline] BUG: KASAN: null-ptr-deref in io_disable_sqo_submit fs/io_uring.c:8999 [inline] BUG: KASAN: null-ptr-deref in io_uring_create+0x1275/0x22f0 fs/io_uring.c:9824 Read of size 8 at addr 0068 by task syz-executor.0/4350 A simple reproducer for this bug is: int main(void) { syscall(__NR_mmap, 0x2000ul, 0x100ul, 7ul, 0x32ul, -1, 0ul); intptr_t res = 0; pid_t parent = getpid(); *(uint32_t*)0x2084 = 0; *(uint32_t*)0x2088 = 0x42; *(uint32_t*)0x208c = 0; *(uint32_t*)0x2090 = 0; *(uint32_t*)0x2098 = -1; *(uint32_t*)0x209c = 0; *(uint32_t*)0x20a0 = 0; *(uint32_t*)0x20a4 = 0; if (fork() == 0) { kill(parent,SIGKILL); exit(0); } res = syscall(__NR_io_uring_setup, 0x7994, 0x2080ul); return 0; } Due to the SIGKILL sent to the process before io_uring_setup completes, ctx->sq_data is NULL. Therefore, io_sq_offload_start does a null pointer dereferenced read. More details on this bug are in [1]. Discussion for this patch happened in [2]. [1] https://oswalpalash.com/exploring-null-ptr-deref-io-uring-submit [2] https://lore.kernel.org/io-uring/a08121be-f481-e9f8-b28d-3eb5d4f a5...@gmail.com/ Signed-off-by: Palash Oswal --- fs/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 95b4a89dad4e..82a89ff315a4 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8995,7 +8995,7 @@ static void io_disable_sqo_submit(struct io_ring_ctx *ctx) { mutex_lock(&ctx->uring_lock); ctx->sqo_dead = 1; - if (ctx->flags & IORING_SETUP_R_DISABLED) + if (ctx->flags & IORING_SETUP_R_DISABLED && ctx->sq_data) io_sq_offload_start(ctx); mutex_unlock(&ctx->uring_lock); base-commit: 2aa8861eab092599ad566c5b20d7452d9ec0ca8e -- 2.27.0
Re: kernel panic: Attempted to kill init!
> The kernel stack is not very useful in this case, it's a common faulting > stack. > Maybe it will shed some light if you install gdb in the image, attach > it to the systemd process, then trigger the segfault and then unwind > stack in the systemd process at the time of fault, dump registers, > etc. However, I don't know if gdb will get the signal first, or the > kernel will panic first... Here's the gdb trace from the end of open_by_handle_at to the panic. I will try to attach gdb to systemd and report back. Thread 1 hit Breakpoint 3, __x64_sys_open_by_handle_at (regs=0xc9933f58) at fs/fhandle.c:271 271return ret; do_syscall_64 (nr=, regs=0xc9933f58) at arch/x86/entry/common.c:56 56syscall_exit_to_user_mode(regs); entry_SYSCALL_64 () at arch/x86/entry/entry_64.S:127 127movqRCX(%rsp), %rcx 128movqRIP(%rsp), %r11 130cmpq%rcx, %r11/* SYSRET requires RCX == RIP */ 131jneswapgs_restore_regs_and_return_to_usermode 145ALTERNATIVE "shl $(64 - 48), %rcx; sar $(64 - 48), %rcx", \ 153cmpq%rcx, %r11 154jneswapgs_restore_regs_and_return_to_usermode 156cmpq$__USER_CS, CS(%rsp)/* CS must match SYSRET */ 157jneswapgs_restore_regs_and_return_to_usermode 159movqR11(%rsp), %r11 160cmpq%r11, EFLAGS(%rsp)/* R11 == RFLAGS */ 161jneswapgs_restore_regs_and_return_to_usermode 181testq$(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11 182jnzswapgs_restore_regs_and_return_to_usermode 186cmpq$__USER_DS, SS(%rsp)/* SS must match SYSRET */ 187jneswapgs_restore_regs_and_return_to_usermode 195POP_REGS pop_rdi=0 skip_r11rcx=1 entry_SYSCALL_64 () at arch/x86/entry/entry_64.S:201 201movq%rsp, %rdi 202movqPER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp entry_SYSCALL_64 () at arch/x86/entry/entry_64.S:205 205pushqRSP-RDI(%rdi)/* RSP */ entry_SYSCALL_64 () at arch/x86/entry/entry_64.S:206 206pushq(%rdi)/* RDI */ entry_SYSCALL_64 () at arch/x86/entry/entry_64.S:214 214SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi 216popq%rdi entry_SYSCALL_64 () at arch/x86/entry/entry_64.S:217 217popq%rsp entry_SYSCALL_64 () at arch/x86/entry/entry_64.S:218 218USERGS_SYSRET64 native_io_apic_read (apic=, reg=24) at arch/x86/kernel/apic/io_apic.c:277 277return readl(&io_apic->data); 59build_mmio_read(readl, "l", unsigned int, "=r", :"memory") __ioapic_read_entry (apic=0, pin=) at arch/x86/kernel/apic/io_apic.c:294 294entry.w2 = io_apic_read(apic, 0x11 + 2 * pin); 296return entry; ioapic_irq_get_chip_state (irqd=, which=, state=0xc9247a8f) at arch/x86/kernel/apic/io_apic.c:1960 1960if (rentry.irr && rentry.is_level) { 1952for_each_irq_pin(p, mcd->irq_2_pin) { 1965raw_spin_unlock(&ioapic_lock); 1966return 0; __synchronize_hardirq (desc=desc@entry=0x888003d36c00, sync_chip=sync_chip@entry=true) at kernel/irq/manage.c:71 71raw_spin_unlock_irqrestore(&desc->lock, flags); 74} while (inprogress); synchronize_irq (irq=4) at kernel/irq/manage.c:138 138wait_event(desc->wait_for_threads, 138wait_event(desc->wait_for_threads, serial8250_do_shutdown (port=port@entry=0x836b62a0 ) at drivers/tty/serial/8250/8250_port.c:2449 2449if (up->dma) 329return &lock->rlock; 2453if (port->flags & UPF_FOURPORT) { 2458port->mctrl &= ~TIOCM_OUT2; 2460serial8250_set_mctrl(port, port->mctrl); 2461spin_unlock_irqrestore(&port->lock, flags); 2467serial_port_in(port, UART_LCR) & ~UART_LCR_SBC); 2466serial_port_out(port, UART_LCR, 2468serial8250_clear_fifos(up); 2474disable_rsa(up); 2481serial_port_in(port, UART_RX); 2482serial8250_rpm_put(up); 2484up->ops->release_irq(up); uart_port_shutdown (port=port@entry=0x8880054e) at drivers/tty/serial/serial_core.c:1716 1716synchronize_irq(uport->irq); uart_shutdown (tty=tty@entry=0x88800451c400, state=state@entry=0x8880054e) at drivers/tty/serial/serial_core.c:307 307tty_port_set_suspended(port, 0); 315uart_port_lock(state, flags); 316xmit_buf = state->xmit.buf; 317state->xmit.buf = NULL; 318uart_port_unlock(uport, flags); 320if (xmit_buf) 321free_page((unsigned long)xmit_buf); uart_hangup (tty=tty@entry=0x88800451c400) at ./include/linux/spinlock.h:329 329return &lock->rlock; 1680spin_unlock_irqrestore(&port->lock, flags); 1681tty_port_set_active(port, 0); 1682tty_port_tty_set(port, NULL); 1683if (uport && !uart_console(uport)) 1685wake_up_interruptible(&port->open_wait); 1686wake_up_interruptible(&port->delta_ms
Re: kernel panic: Attempted to kill init!
On Tue, Mar 9, 2021 at 7:58 PM Al Viro wrote: > Lovely. So something in that sequence of syscalls manages to trigger > segfault in unrelated process. What happens if you put it to sleep > right after open_by_handle_at() (e.g. by read(2) from fd 0, etc.)? Added read(2) call in the reproducer, and there's no longer a segfault in systemd, but the process is still killed syscall(__NR_open_by_handle_at, r[0], 0x2000ul, 0x2f00ul); + unsigned char buffer[1]; + read(0, buffer, 1); return 0; root@sandbox:~# gcc -pthread repro.c -o repro root@sandbox:~# ./repro [ 450.676798] got to 221 [ 450.676881] got to 183 [ 450.677655] got to 201 [ 450.678042] got to 208 [ 450.678349] got to 210 [ 450.681404] got to 270 [ 450.707100] Kernel panic - not syncing: Attempted to kill init! exitcode=0x000b [ 450.708393] CPU: 0 PID: 1 Comm: systemd Not tainted 5.11.2+ #22 [ 450.709105] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1 04/01/2014 [ 450.710117] Call Trace: [ 450.710440] dump_stack+0xb2/0xe4 [ 450.710902] panic+0x196/0x502 [ 450.711277] do_exit.cold+0x70/0x108 [ 450.711710] do_group_exit+0x78/0x120 [ 450.712161] get_signal+0x22e/0xd60 [ 450.712588] arch_do_signal_or_restart+0xef/0x890 [ 450.713165] exit_to_user_mode_prepare+0x102/0x190 [ 450.713744] irqentry_exit_to_user_mode+0x9/0x20 [ 450.714340] irqentry_exit+0x19/0x30 [ 450.714817] exc_page_fault+0xc3/0x240 [ 450.715275] ? asm_exc_page_fault+0x8/0x30 [ 450.715805] asm_exc_page_fault+0x1e/0x30 [ 450.716295] RIP: 0033:0x7febb8036f10 [ 450.716738] Code: Unable to access opcode bytes at RIP 0x7febb8036ee6. [ 450.717512] RSP: 002b:7ffd91fec2f8 EFLAGS: 00010246 [ 450.718139] RAX: RBX: 55c6cc268f40 RCX: 7febb80672e3 [ 450.719030] RDX: 7ffd91fec480 RSI: 7ffd91fec5b0 RDI: 0007 [ 450.719877] RBP: 0007 R08: 431bde82d7b634db R09: 000b [ 450.720681] R10: R11: 0246 R12: 7ffd927eb4d0 [ 450.721527] R13: 0001 R14: R15: 0002 [ 450.722470] Kernel Offset: disabled [ 450.722941] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x000b ]--- Added a hb at panic() and here's the backtrace from gdb: (gdb) hb kernel/panic.c:177 Hardware assisted breakpoint 1 at 0x82201bd7: file kernel/panic.c, line 178. (gdb) c Continuing. Thread 1 hit Breakpoint 1, panic (fmt=fmt@entry=0x82bcd850 "Attempted to kill init! exitcode=0x%08x\n") at kernel/panic.c:178 178{ (gdb) bt #0 panic (fmt=fmt@entry=0x82bcd850 "Attempted to kill init! exitcode=0x%08x\n") at kernel/panic.c:178 #1 0x822025a3 in do_exit (code=code@entry=11) at kernel/exit.c:794 #2 0x810e6e98 in do_group_exit (exit_code=11) at kernel/exit.c:922 #3 0x810febae in get_signal (ksig=ksig@entry=0xc9013e38) at kernel/signal.c:2773 #4 0x8104fa8f in arch_do_signal_or_restart (regs=0xc9013f58, has_signal=) at arch/x86/kernel/signal.c:831 #5 0x811a0602 in handle_signal_work (ti_work=, regs=0xc9013f58) at kernel/entry/common.c:147 #6 exit_to_user_mode_loop (ti_work=, regs=) at kernel/entry/common.c:171 #7 exit_to_user_mode_prepare (regs=0xc9013f58) at kernel/entry/common.c:201 #8 0x8227a299 in irqentry_exit_to_user_mode (regs=) at kernel/entry/common.c:307 #9 0x8227a2c9 in irqentry_exit (regs=regs@entry=0xc9013f58, state=..., state@entry=...) at kernel/entry/common.c:395 #10 0x82279c83 in exc_page_fault (regs=0xc9013f58, error_code=20) at arch/x86/mm/fault.c:1509 #11 0x82400ade in asm_exc_page_fault () at ./arch/x86/include/asm/idtentry.h:580 #12 0x0002 in fixed_percpu_data () #13 0x in ?? () #14 0x0001 in fixed_percpu_data () #15 0x7ffdef6e1480 in ?? () #16 0x0007 in fixed_percpu_data () #17 0x55a7e97caf40 in ?? () #18 0x0246 in ?? () #19 0x in ?? () #20 0x000b in fixed_percpu_data () #21 0x in ?? ()
Re: kernel panic: Attempted to kill init!
On Tue, Mar 9, 2021 at 8:36 PM Dmitry Vyukov wrote: > FWIW the code looks reasonable: > > All code > >0: 00 00add%al,(%rax) >2: 00 00add%al,(%rax) >4: 41 57push %r15 >6: 41 56push %r14 >8: 41 55push %r13 >a: 41 54push %r12 >c: 55push %rbp >d: 53push %rbx >e: 89 fdmov%edi,%ebp > 10: 48 81 ec 48 01 00 00 sub$0x148,%rsp > 17: 64 48 8b 04 25 28 00 mov%fs:0x28,%rax > 1e: 00 00 > 20: 48 89 84 24 38 01 00 mov%rax,0x138(%rsp) > 27: 00 > 28: 31 c0xor%eax,%eax > 2a:* e8 f5 bf f7 ffcallq 0xfff7c024 <-- trapping > instruction > 2f: 83 f8 01 cmp$0x1,%eax > 32: 0f 84 b7 00 00 00je 0xef > 38: 48rex.W > 39: 8d.byte 0x8d > 3a: 9cpushfq > 3b: 40rex > > This is a PC-relative call to a reasonable address, right? > I wonder if it always traps on this instruction or not. Maybe the > executable is corrupted and has a page missing in the image or > something similar. But also if we suspect a badly corrupted image, is > it worth pursuing it?... I copied over a new systemd binary from a fresh disk image generated using tools/create-image.sh in syzkaller (debootstrap) and the bug was still reproducible. root@sandbox:~# md5sum /lib/systemd/systemd 12b20bfd8321ef7884b4dbf974a91213 /lib/systemd/systemd root@sandbox:~# md5sum /lib/systemd/systemd_orig 12b20bfd8321ef7884b4dbf974a91213 /lib/systemd/systemd_orig root@sandbox:~# gcc -pthread hax.c -o repro root@sandbox:~# ./repro [ 115.515840] got to 221 [ 115.515853] got to 183 [ 115.516400] got to 201 [ 115.516935] got to 208 [ 115.517475] got to 210 [ 115.521008] got to 270 [ 115.544984] systemd[1]: segfault at 7ffe972adfb8 ip 5560fb079466 sp 7ffe972adfc0 error 6 in systemd[5560fafcd000+ed000] [ 115.546554] Code: 00 00 00 00 41 57 41 56 41 55 41 54 55 53 89 fd 48 81 ec 48 01 00 00 64 48 8b 04 25 28 00 00 00 48 89 84 24 38 01 00 00 31 c0 f5 bf f7 ff 83 f8 01 0f 84 b7 00 00 00 48 8d 9c 240 [ 115.548575] Kernel panic - not syncing: Attempted to kill init! exitcode=0x000b [ 115.549352] CPU: 0 PID: 1 Comm: systemd Not tainted 5.11.2+ #22 [ 115.549994] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1 04/01/2014 [ 115.550834] Call Trace: [ 115.551090] dump_stack+0xb2/0xe4 [ 115.551438] panic+0x196/0x502 [ 115.551798] do_exit.cold+0x70/0x108 [ 115.552170] do_group_exit+0x78/0x120 [ 115.552552] get_signal+0x22e/0xd60 [ 115.552916] arch_do_signal_or_restart+0xef/0x890 [ 115.553407] exit_to_user_mode_prepare+0x102/0x190 [ 115.553920] irqentry_exit_to_user_mode+0x9/0x20 [ 115.554412] irqentry_exit+0x19/0x30 [ 115.554781] exc_page_fault+0xc3/0x240 [ 115.555168] ? asm_exc_page_fault+0x8/0x30 [ 115.555626] asm_exc_page_fault+0x1e/0x30 [ 115.556092] RIP: 0033:0x5560fb079466 [ 115.556476] Code: 00 00 00 00 41 57 41 56 41 55 41 54 55 53 89 fd 48 81 ec 48 01 00 00 64 48 8b 04 25 28 00 00 00 48 89 84 24 38 01 00 00 31 c0 f5 bf f7 ff 83 f8 01 0f 84 b7 00 00 00 48 8d 9c 240 [ 115.558399] RSP: 002b:7ffe972adfc0 EFLAGS: 00010246 [ 115.558947] RAX: RBX: 5560fcaa7f40 RCX: 7ff6fb1c22e3 [ 115.559720] RDX: 7ffe972ae140 RSI: 7ffe972ae270 RDI: 0007 [ 115.560475] RBP: 0007 R08: 431bde82d7b634db R09: 000b [ 115.561219] R10: R11: 0246 R12: 7ffe97aad190 [ 115.561963] R13: 0001 R14: R15: 0002 [ 115.562768] Kernel Offset: disabled [ 115.563148] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x000b ]--- For sanity, I created a new disk image altogether, made a replica of the image and ran syzkaller on the first copy of the image to find a new reproducer for this bug. [NEW IMAGE] [NEW IMAGE REPLICA] Used by syzkallerUsed for testing the reproducer manually After discovering the new reproducer for this fresh image, I triggered the new reproducer on the *untainted* replica of the image and the bug was reproducible. This would invalidate the assumption that the image/binaries on the image are corrupted.
Re: kernel panic: Attempted to kill init!
On Mon, Mar 8, 2021 at 10:50 PM Al Viro wrote: > I'd suggest to add printk(KERN_ERR "got to %d", __LINE__); in fs/fhandle.c at > beginning of do_handle_open() > right before each copy_from_user() in handle_to_path() > right before and right after the call of do_handle_to_path() (in the > same) > and try your reproducers on the resulting kernel. While applying this diff and re-running the reproducer, I see the following: diff --git a/fs/fhandle.c b/fs/fhandle.c index 01263ffbc4c0..4e0b171ec9af 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -180,6 +180,7 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh, retval = -EPERM; goto out_err; } + printk(KERN_ERR "got to %d", __LINE__); if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) { retval = -EFAULT; goto out_err; @@ -197,14 +198,16 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh, } /* copy the full handle */ *handle = f_handle; + printk(KERN_ERR "got to %d", __LINE__); if (copy_from_user(&handle->f_handle, &ufh->f_handle, f_handle.handle_bytes)) { retval = -EFAULT; goto out_handle; } - + printk(KERN_ERR "got to %d", __LINE__); retval = do_handle_to_path(mountdirfd, handle, path); + printk(KERN_ERR "got to %d", __LINE__); out_handle: kfree(handle); @@ -215,6 +218,7 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh, static long do_handle_open(int mountdirfd, struct file_handle __user *ufh, int open_flag) { + printk(KERN_ERR "got to %d", __LINE__); long retval = 0; struct path path; struct file *file root@sandbox:~# ./repro [8.325247] got to 221 [8.325270] got to 183 [8.326433] got to 201 [8.327620] got to 208 [8.328983] got to 210 [8.360955] Kernel panic - not syncing: Attempted to kill init! exitcode=0x000b [8.362261] CPU: 0 PID: 1 Comm: systemd Not tainted 5.11.2+ #20 [8.363015] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1 04/01/2014 [8.364044] Call Trace: [8.364357] dump_stack+0xb2/0xe4 [8.364782] panic+0x196/0x502 [8.365171] do_exit.cold+0x70/0x108 [8.365624] do_group_exit+0x78/0x120 [8.366087] get_signal+0x22e/0xd60 [8.366528] arch_do_signal_or_restart+0xef/0x890 [8.367120] exit_to_user_mode_prepare+0x102/0x190 [8.367724] irqentry_exit_to_user_mode+0x9/0x20 [8.368303] irqentry_exit+0x19/0x30 [8.368759] exc_page_fault+0xc3/0x240 [8.369220] ? asm_exc_page_fault+0x8/0x30 [8.369726] asm_exc_page_fault+0x1e/0x30 [8.370217] RIP: 0033:0x7fa902b4cf10 [8.370661] Code: Unable to access opcode bytes at RIP 0x7fa902b4cee6. [8.371444] RSP: 002b:7ffc391b20b8 EFLAGS: 00010246 [8.372081] RAX: RBX: 559276a67f40 RCX: 7fa902b7d2e3 [8.372935] RDX: 7ffc391b2240 RSI: 7ffc391b2370 RDI: 0007 [8.373860] RBP: 0007 R08: R09: 000b [8.374714] R10: R11: 0246 R12: 7ffc399afaa0 [8.375568] R13: 0001 R14: R15: 0002 [8.376574] Kernel Offset: disabled [8.376992] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x000b ]--- When I add this change on top of the previous diff: @@ -263,6 +267,7 @@ SYSCALL_DEFINE3(open_by_handle_at, int, mountdirfd, flags |= O_LARGEFILE; ret = do_handle_open(mountdirfd, handle, flags); + printk(KERN_ERR "got to %d", __LINE__); return ret; } I observe the following result(notice the segfault in systemd): root@sandbox:~# ./repro [9.457767] got to 221 [9.457791] got to 183 [9.459144] got to 201 [9.459471] got to 208 [9.459773] got to 210 [9.462602] got to 270 [9.488551] systemd[1]: segfault at 7ffe59fd7fb8 ip 55be8f20b466 sp 7ffe59fd7fc0 error 6 in systemd[55be8f15f000+ed000] [9.490723] Code: 00 00 00 00 41 57 41 56 41 55 41 54 55 53 89 fd 48 81 ec 48 01 00 00 64 48 8b 04 25 28 00 00 00 48 89 84 24 38 01 00 00 31 c0 f5 bf f7 ff 83 f8 01 0f 84 b7 00 00 00 48 8d 9c 240 [9.492637] Kernel panic - not syncing: Attempted to kill init! exitcode=0x000b [9.493421] CPU: 0 PID: 1 Comm: systemd Not tainted 5.11.2+ #22 [9.494067] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1 04/01/2014 [9.495082] Call Trace: [9.495348] dump_stack+0xb2/0xe4 [9.495709] panic+0x196/0x502 [9.496041] do_exit.cold+0x70/0x108 [9.496429] do_group_exit+0x78/0x120 [9.496822] get_signal+0x22e/0xd60 [9.497205] arch_do_signal_or_restart+0xef/0x890 [9.497708] exit_to_user_mode_prepare+0
kernel panic: Attempted to kill init!
I was running syzkaller and I found the following issue : Head Commit : 27e543cca13fab05689b2d0d61d200a83cfb00b6 ( v5.11.2 ) Git Tree : stable Console Logs: Kernel panic - not syncing: Attempted to kill init! exitcode=0x000b CPU: 0 PID: 1 Comm: systemd Not tainted 5.11.2 #13 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:79 [inline] dump_stack+0xb2/0xe4 lib/dump_stack.c:120 panic+0x196/0x502 kernel/panic.c:231 do_exit.cold+0x70/0x108 kernel/exit.c:794 do_group_exit+0x78/0x120 kernel/exit.c:922 get_signal+0x22e/0xd60 kernel/signal.c:2773 arch_do_signal_or_restart+0xef/0x890 arch/x86/kernel/signal.c:811 handle_signal_work kernel/entry/common.c:147 [inline] exit_to_user_mode_loop kernel/entry/common.c:171 [inline] exit_to_user_mode_prepare+0x102/0x190 kernel/entry/common.c:201 irqentry_exit_to_user_mode+0x9/0x20 kernel/entry/common.c:307 irqentry_exit+0x19/0x30 kernel/entry/common.c:395 exc_page_fault+0xc3/0x240 arch/x86/mm/fault.c:1509 asm_exc_page_fault+0x1e/0x30 arch/x86/include/asm/idtentry.h:580 RIP: 0033:0x7feb52656f10 Code: Unable to access opcode bytes at RIP 0x7feb52656ee6. RSP: 002b:7ffec42704b8 EFLAGS: 00010246 RAX: RBX: 5604dc566f40 RCX: 7feb526872e3 RDX: 7ffec4270640 RSI: 7ffec4270770 RDI: 0007 RBP: 0007 R08: 35237084f6f94f9c R09: 1410 R10: R11: 0246 R12: 7ffec4a6ed00 R13: 0001 R14: R15: 0002 Dumping ftrace buffer: (ftrace buffer empty) Kernel Offset: disabled Rebooting in 1 seconds.. Syzkaller reproducer: # {Threaded:false Collide:false Repeat:false RepeatTimes:0 Procs:1 Slowdown:1 Sandbox: Fault:false FaultCall:-1 FaultNth:0 Leak:false NetInjection:false NetDevices:false NetReset:false Cgroups:false BinfmtMisc:false CloseFDs:false KCSAN:false DevlinkPCI:false USB:false VhciInjection:false Wifi:false IEEE802154:false Sysctl:false UseTmpDir:false HandleSegv:false Repro:false Trace:false} r0 = creat(&(0x7f0001c0)='./file0\x00', 0x0) open_by_handle_at(r0, &(0x7f00)=ANY=[@ANYBLOB="0a0002004b0d"], 0x2f00) C reproducer: // autogenerated by syzkaller (https://github.com/google/syzkaller) #define _GNU_SOURCE #include #include #include #include #include #include #include #include uint64_t r[1] = {0x}; int main(void) { syscall(__NR_mmap, 0x1000ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul); syscall(__NR_mmap, 0x2000ul, 0x100ul, 7ul, 0x32ul, -1, 0ul); syscall(__NR_mmap, 0x2100ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul); intptr_t res = 0; memcpy((void*)0x21c0, "./file0\000", 8); res = syscall(__NR_creat, 0x21c0ul, 0ul); if (res != -1) r[0] = res; memcpy((void*)0x2000, "\x0a\x00\x00\x00\x02\x00\x00\x00\x4b\x0d", 10); syscall(__NR_open_by_handle_at, r[0], 0x2000ul, 0x2f00ul); return 0; } This reproducer only worked on the syzkaller instance disk image that I was using. I am adding the syzkaller report from a second instance for the same issue: Report #2 Syzkaller hit 'kernel panic: Attempted to kill init!' bug. Kernel panic - not syncing: Attempted to kill init! exitcode=0x000b CPU: 1 PID: 1 Comm: systemd Not tainted 5.11.2 #5 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:79 [inline] dump_stack+0xb9/0xef lib/dump_stack.c:120 panic+0x196/0x502 kernel/panic.c:231 do_exit.cold+0x89/0x113 kernel/exit.c:794 do_group_exit+0x78/0x120 kernel/exit.c:922 get_signal+0x230/0xd70 kernel/signal.c:2773 arch_do_signal_or_restart+0xef/0x890 arch/x86/kernel/signal.c:811 handle_signal_work kernel/entry/common.c:147 [inline] exit_to_user_mode_loop kernel/entry/common.c:171 [inline] exit_to_user_mode_prepare+0x115/0x1a0 kernel/entry/common.c:201 irqentry_exit_to_user_mode+0x9/0x20 kernel/entry/common.c:307 irqentry_exit+0x19/0x30 kernel/entry/common.c:395 exc_page_fault+0xc3/0x240 arch/x86/mm/fault.c:1509 asm_exc_page_fault+0x1e/0x30 arch/x86/include/asm/idtentry.h:580 RIP: 0033:0x7f51a89bc320 Code: Unable to access opcode bytes at RIP 0x7f51a89bc2f6. RSP: 002b:7ffca659b7f8 EFLAGS: 00010246 RAX: 7f51a9de3ee0 RBX: 7ffca659b8a0 RCX: RDX: RSI: 7ffca659b8a0 RDI: 0011 RBP: 0007 R08: 0008 R09: 559120f63478 R10: 559120f63440 R11: 0246 R12: 559120f63440 R13: R14: R15: 0002 Dumping ftrace buffer: (ftrace buffer empty) Kernel Offset: disabled Rebooting in 1 seconds.. Syzkaller reproducer: # {Threaded:false Collide:false Repeat:false RepeatTimes:0 Procs:1 Slowdown:1 Sandbox: Fault:false FaultCall:-1 FaultNth:0 Leak:false NetInjection:false NetDevices:false NetReset:false Cgroups:false BinfmtMisc:false CloseFDs:false KCSAN:false DevlinkPCI:false USB:false
BUG: soft lockup in corrupted
Hello, I was running syzkaller and I found the following issue : Head Commit : 27e543cca13fab05689b2d0d61d200a83cfb00b6 ( v5.11.2 ) Git Tree : stable Console logs: watchdog: BUG: soft lockup - CPU#0 stuck for 23s! [syz-executor497:423] Modules linked in: CPU: 0 PID: 423 Comm: syz-executor497 Not tainted 5.11.2 #13 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1 04/01/2014 RIP: 0010:__io_cqring_events fs/io_uring.c:1732 [inline] RIP: 0010:io_cqring_events fs/io_uring.c:2399 [inline] RIP: 0010:io_should_wake fs/io_uring.c:7190 [inline] RIP: 0010:io_cqring_wait fs/io_uring.c:7283 [inline] RIP: 0010:__do_sys_io_uring_enter+0x6b9/0x1040 fs/io_uring.c:9389 Code: 00 00 e8 ea 9a cd ff 31 ff 44 89 e6 e8 30 9d cd ff 45 85 e4 0f 85 5c 08 00 00 e8 d2 9a cd ff 48 8b 5d c0 48 8b 83 c0 00 00 00 <8b> 88 80 00 00 00 8b 83 00 02 00 00 29 c8 8b 4d c8 89 c7 89 85 78 watchdog: BUG: soft lockup - CPU#1 stuck for 23s! [syz-executor497:416] RSP: 0018:c91efe58 EFLAGS: 0293 Modules linked in: CPU: 1 PID: 416 Comm: syz-executor497 Not tainted 5.11.2 #13 RAX: 888006d3e000 RBX: 8880059cb400 RCX: Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1 04/01/2014 RDX: 888006da98c0 RSI: 81543cde RDI: 0003 RIP: 0010:__sanitizer_cov_trace_const_cmp8+0x78/0x90 kernel/kcov.c:293 RBP: c91eff18 R08: 8880059cb680 R09: 2cc0 Code: 0c fd 28 00 00 00 48 39 ce 72 1f 48 83 c2 01 4c 89 64 08 e8 48 c7 44 08 e0 07 00 00 00 48 89 5c 08 f0 4c 89 74 f8 20 48 89 10 <5b> 41 5c 41 5d 41 5e 5d c3 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f R10: 0001 R11: R12: RSP: 0018:c99f7e08 EFLAGS: 0246 R13: 888005a68700 R14: 8880059cb400 R15: 8880059cb6a0 FS: 015ed380() GS:88803ec0() knlGS: RAX: RBX: RCX: CS: 0010 DS: ES: CR0: 80050033 RDX: RSI: 888007e98000 RDI: 0003 CR2: 004bc0f0 CR3: 07cd6003 CR4: 00370ef0 RBP: c99f7e28 R08: 888006f39a80 R09: 2cc0 Call Trace: R10: 0001 R11: R12: R13: 888007e98000 R14: 8152e286 R15: 888006f39aa0 FS: 015ed380() GS:88803ed0() knlGS: __se_sys_io_uring_enter fs/io_uring.c:9306 [inline] __x64_sys_io_uring_enter+0x2f/0x40 fs/io_uring.c:9306 CS: 0010 DS: ES: CR0: 80050033 do_syscall_64+0x38/0x90 arch/x86/entry/common.c:46 CR2: 004bc0f0 CR3: 06c12006 CR4: 00370ee0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Call Trace: RIP: 0033:0x44508d signal_pending include/linux/sched/signal.h:369 [inline] io_run_task_work_sig+0x66/0x110 fs/io_uring.c:7213 Code: 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 c0 ff ff ff f7 d8 64 89 01 48 io_cqring_wait fs/io_uring.c:7276 [inline] __do_sys_io_uring_enter+0x67b/0x1040 fs/io_uring.c:9389 RSP: 002b:77178208 EFLAGS: 0246 ORIG_RAX: 01aa RAX: ffda RBX: 0003 RCX: 0044508d __se_sys_io_uring_enter fs/io_uring.c:9306 [inline] __x64_sys_io_uring_enter+0x2f/0x40 fs/io_uring.c:9306 RDX: 0001 RSI: 0001 RDI: 0003 do_syscall_64+0x38/0x90 arch/x86/entry/common.c:46 RBP: R08: R09: entry_SYSCALL_64_after_hwframe+0x44/0xa9 R10: 0001 R11: 0246 R12: 004040c0 RIP: 0033:0x44508d R13: R14: 77178240 R15: 77178230 Code: 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 c0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:77178208 EFLAGS: 0246 ORIG_RAX: 01aa RAX: ffda RBX: 0003 RCX: 0044508d RDX: 0001 RSI: 0001 RDI: 0003 RBP: R08: R09: R10: 0001 R11: 0246 R12: 004040c0 R13: R14: 77178240 R15: 77178230 Syzkaller Reproducer : # {Threaded:false Collide:false Repeat:true RepeatTimes:0 Procs:8 Slowdown:1 Sandbox: Fault:false FaultCall:-1 FaultNth:0 Leak:false NetInjection:false NetDevices:false NetReset:false Cgroups:false BinfmtMisc:false CloseFDs:false KCSAN:false DevlinkPCI:false USB:false VhciInjection:false Wifi:false IEEE802154:false Sysctl:false UseTmpDir:false HandleSegv:false Repro:false Trace:false} r0 = syz_io_uring_setup(0x1, &(0x7f80)={0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, &(0x7f0a)=nil, &(0x7f0b)=nil, &(0x7f000100)=0x0, &(0x7f000140)=0x0) syz_io_u
Re: [PATCH] ima: Replacing deprecated strlcpy with strscpy ~~~~~~~~~ Replace
Apologies for the in-accurate description. This patch is covered under the patch-set by Romain Perier ( https://lkml.org/lkml/2021/2/22/739 ) and can be disregarded. Best Regards, Palash On Fri, Feb 19, 2021 at 11:09 PM Jarkko Sakkinen wrote: > > Reply-To: > In-Reply-To: <20210219084038.ga7...@g3.oswalpalash.com> > > On Fri, Feb 19, 2021 at 02:10:38PM +0530, Palash Oswal wrote: > > The strlcpy() function is unsafe in that the source buffer length > > is unbounded or possibly be non NULL terminated. This can cause > > memory over-reads, crashes, etc. > > > > Link: https://github.com/KSPP/linux/issues/89 > > Signed-off-by: Palash Oswal > > The long description does not explain what the commit does, and > does not include any details about deprecation of strlcpy(), which > at least I'm not aware of. > > I don't think *length* ever is NULL terminated. The first sentence > is somewhat weird. Also strlcpy() does have a bounds check. > > Generally, the description and reasoning is sloppy to say the > least. > > /Jarkko > > > > --- > > security/integrity/ima/ima_api.c| 2 +- > > security/integrity/ima/ima_policy.c | 2 +- > > 2 files changed, 2 insertions(+), 2 deletions(-) > > > > diff --git a/security/integrity/ima/ima_api.c > > b/security/integrity/ima/ima_api.c > > index 1dd70dc68ffd..2f3b8257181d 100644 > > --- a/security/integrity/ima/ima_api.c > > +++ b/security/integrity/ima/ima_api.c > > @@ -399,7 +399,7 @@ const char *ima_d_path(const struct path *path, char > > **pathbuf, char *namebuf) > > } > > > > if (!pathname) { > > - strlcpy(namebuf, path->dentry->d_name.name, NAME_MAX); > > + strscpy(namebuf, path->dentry->d_name.name, NAME_MAX); > > pathname = namebuf; > > } > > > > diff --git a/security/integrity/ima/ima_policy.c > > b/security/integrity/ima/ima_policy.c > > index 9b45d064a87d..010839aef6ba 100644 > > --- a/security/integrity/ima/ima_policy.c > > +++ b/security/integrity/ima/ima_policy.c > > @@ -791,7 +791,7 @@ static int __init ima_init_arch_policy(void) > > char rule[255]; > > int result; > > > > - result = strlcpy(rule, *rules, sizeof(rule)); > > + strscpy(rule, *rules, sizeof(rule)); > > > > INIT_LIST_HEAD(&arch_policy_entry[i].list); > > result = ima_parse_rule(rule, &arch_policy_entry[i]); > > > > base-commit: f6692213b5045dc461ce0858fb18cf46f328c202 > > -- > > 2.27.0 > > > >
[PATCH] ima: Replacing deprecated strlcpy with strscpy
The strlcpy() function is unsafe in that the source buffer length is unbounded or possibly be non NULL terminated. This can cause memory over-reads, crashes, etc. Link: https://github.com/KSPP/linux/issues/89 Signed-off-by: Palash Oswal --- security/integrity/ima/ima_api.c| 2 +- security/integrity/ima/ima_policy.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c index 1dd70dc68ffd..2f3b8257181d 100644 --- a/security/integrity/ima/ima_api.c +++ b/security/integrity/ima/ima_api.c @@ -399,7 +399,7 @@ const char *ima_d_path(const struct path *path, char **pathbuf, char *namebuf) } if (!pathname) { - strlcpy(namebuf, path->dentry->d_name.name, NAME_MAX); + strscpy(namebuf, path->dentry->d_name.name, NAME_MAX); pathname = namebuf; } diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index 9b45d064a87d..010839aef6ba 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -791,7 +791,7 @@ static int __init ima_init_arch_policy(void) char rule[255]; int result; - result = strlcpy(rule, *rules, sizeof(rule)); + strscpy(rule, *rules, sizeof(rule)); INIT_LIST_HEAD(&arch_policy_entry[i].list); result = ima_parse_rule(rule, &arch_policy_entry[i]); base-commit: f6692213b5045dc461ce0858fb18cf46f328c202 -- 2.27.0
Re: INFO: task hung in __io_uring_task_cancel
On Mon, Jan 4, 2021 at 12:22 PM Hillf Danton wrote: > It is now updated. Hello Hilf, Thanks for the new diff. I tested by applying the diff on 5.10.4 with the original reproducer, and the issue still persists. root@syzkaller:~# [ 242.925799] INFO: task repro:416 blocked for more than 120 seconds. [ 242.928095] Not tainted 5.10.4+ #12 [ 242.929034] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 242.930825] task:repro state:D stack:0 pid: 416 ppid: 415 flags:0x0004 [ 242.933404] Call Trace: [ 242.934365] __schedule+0x28d/0x7e0 [ 242.935199] ? __percpu_counter_sum+0x75/0x90 [ 242.936265] schedule+0x4f/0xc0 [ 242.937159] __io_uring_task_cancel+0xc0/0xf0 [ 242.938340] ? wait_woken+0x80/0x80 [ 242.939380] bprm_execve+0x67/0x8a0 [ 242.940163] do_execveat_common+0x1d2/0x220 [ 242.941090] __x64_sys_execveat+0x5d/0x70 [ 242.942056] do_syscall_64+0x38/0x90 [ 242.943088] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 242.944511] RIP: 0033:0x7fd0b781e469 [ 242.945422] RSP: 002b:7fffda20e9c8 EFLAGS: 0246 ORIG_RAX: 0142 [ 242.947289] RAX: ffda RBX: RCX: 7fd0b781e469 [ 242.949031] RDX: RSI: 2180 RDI: [ 242.950683] RBP: 7fffda20e9e0 R08: R09: 7fffda20e9e0 [ 242.952450] R10: R11: 0246 R12: 556068200bf0 [ 242.954045] R13: 7fffda20eb00 R14: R15: linux git:(b1313fe517ca) git diff diff --git a/fs/io_uring.c b/fs/io_uring.c index 0fcd065baa76..e0c5424e28b1 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1867,8 +1867,7 @@ static void __io_free_req(struct io_kiocb *req) io_dismantle_req(req); percpu_counter_dec(&tctx->inflight); -if (atomic_read(&tctx->in_idle)) -wake_up(&tctx->wait); +wake_up(&tctx->wait); put_task_struct(req->task); if (likely(!io_is_fallback_req(req))) @@ -8853,12 +8852,11 @@ void __io_uring_task_cancel(void) * If we've seen completions, retry. This avoids a race where * a completion comes in before we did prepare_to_wait(). */ -if (inflight != tctx_inflight(tctx)) -continue; -schedule(); +if (inflight == tctx_inflight(tctx)) +schedule(); +finish_wait(&tctx->wait, &wait); } while (1); -finish_wait(&tctx->wait, &wait); atomic_dec(&tctx->in_idle); }
Re: INFO: task hung in __io_uring_task_cancel
Hillf - > Can you reproduce it again against 5.11-rc1 with the tiny diff applied > to see if there is a missing wakeup in the mainline? Hey Hillf, thanks for sharing the diff. It seems like the reproducer that I had sent did not work on 5.11-rc1 itself, so I'm trying to get an updated reproducer for that. I'm not well versed with the io_uring code yet, and therefore it'll take me longer to get the reproducer going for 5.11-rc1. Jens - > Can you see if this helps? The reproducer is pretty brutal, it'll fork > thousands of tasks with rings! But should work of course. I think this > one is pretty straight forward, and actually an older issue with the > poll rewaiting. Hey Jens, I applied your diff to 5.10.4 ( b1313fe517ca3703119dcc99ef3bbf75ab42bcfb ), and unfortunately, I'm still seeing the task being hung. Here's the console log if this helps further - root@syzkaller:~# [ 242.840696] INFO: task repro:395 blocked for more than 120 seconds. [ 242.846353] Not tainted 5.10.4+ #9 [ 242.849951] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 242.857665] task:repro state:D stack:0 pid: 395 ppid: 394 flags:0x0004 [ 242.867346] Call Trace: [ 242.870521] __schedule+0x28d/0x7e0 [ 242.873597] ? __percpu_counter_sum+0x75/0x90 [ 242.876794] schedule+0x4f/0xc0 [ 242.878803] __io_uring_task_cancel+0xad/0xf0 [ 242.880952] ? wait_woken+0x80/0x80 [ 242.882330] bprm_execve+0x67/0x8a0 [ 242.884142] do_execveat_common+0x1d2/0x220 [ 242.885610] __x64_sys_execveat+0x5d/0x70 [ 242.886708] do_syscall_64+0x38/0x90 [ 242.887727] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 242.889298] RIP: 0033:0x7ffabedd6469 [ 242.890265] RSP: 002b:7ffc56b8bc78 EFLAGS: 0246 ORIG_RAX: 0142 [ 242.892055] RAX: ffda RBX: RCX: 7ffabedd6469 [ 242.893776] RDX: RSI: 2180 RDI: [ 242.895400] RBP: 7ffc56b8bc90 R08: R09: 7ffc56b8bc90 [ 242.896879] R10: R11: 0246 R12: 559c19400bf0 [ 242.898335] R13: 7ffc56b8bdb0 R14: R15: [ 363.691144] INFO: task repro:395 blocked for more than 241 seconds. [ 363.693724] Not tainted 5.10.4+ #9 [ 363.695513] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 363.700543] task:repro state:D stack:0 pid: 395 ppid: 394 flags:0x0004 [ 363.705747] Call Trace: [ 363.707359] __schedule+0x28d/0x7e0 [ 363.709603] ? __percpu_counter_sum+0x75/0x90 [ 363.712900] schedule+0x4f/0xc0 [ 363.715002] __io_uring_task_cancel+0xad/0xf0 [ 363.718026] ? wait_woken+0x80/0x80 [ 363.720137] bprm_execve+0x67/0x8a0 [ 363.721992] do_execveat_common+0x1d2/0x220 [ 363.723997] __x64_sys_execveat+0x5d/0x70 [ 363.725857] do_syscall_64+0x38/0x90 [ 363.727501] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 363.729510] RIP: 0033:0x7ffabedd6469 [ 363.730913] RSP: 002b:7ffc56b8bc78 EFLAGS: 0246 ORIG_RAX: 0142 [ 363.733747] RAX: ffda RBX: RCX: 7ffabedd6469 [ 363.736138] RDX: RSI: 2180 RDI: [ 363.738431] RBP: 7ffc56b8bc90 R08: R09: 7ffc56b8bc90 [ 363.740504] R10: R11: 0246 R12: 559c19400bf0 [ 363.742560] R13: 7ffc56b8bdb0 R14: R15: