Re: events: possible deadlock in __perf_event_task_sched_out

2017-08-21 Thread Peter Zijlstra
On Mon, Aug 21, 2017 at 01:58:13PM +0530, Shubham Bansal wrote:
> > This is a WARN, printk is a pig.
> 
> So, its not a bug?

No, triggering the WARN is the problem, this is just fallout after that.


Re: events: possible deadlock in __perf_event_task_sched_out

2017-08-21 Thread Peter Zijlstra
On Mon, Aug 21, 2017 at 01:58:13PM +0530, Shubham Bansal wrote:
> > This is a WARN, printk is a pig.
> 
> So, its not a bug?

No, triggering the WARN is the problem, this is just fallout after that.


Re: events: possible deadlock in __perf_event_task_sched_out

2017-08-21 Thread Shubham Bansal
> This is a WARN, printk is a pig.

So, its not a bug?


Re: events: possible deadlock in __perf_event_task_sched_out

2017-08-21 Thread Shubham Bansal
> This is a WARN, printk is a pig.

So, its not a bug?


Re: events: possible deadlock in __perf_event_task_sched_out

2017-08-21 Thread Peter Zijlstra
On Mon, Aug 21, 2017 at 01:56:27AM +0530, Shubham Bansal wrote:
> -> #1 ((console_sem).lock){..}:
>lock_acquire+0x173/0x470
>_raw_spin_lock_irqsave+0x42/0x60
>down_trylock+0x13/0x70
>__down_trylock_console_sem+0x38/0xc0
>console_trylock+0x17/0xb0
>vprintk_emit+0x410/0x480
>vprintk_default+0x28/0x30
>vprintk_func+0x57/0xbe
>printk+0x9f/0xbb
>ex_handler_wrmsr_unsafe.cold.1+0xca/0xe2
>fixup_exception+0x8b/0xb3
>do_general_protection+0x18f/0x2b0
>general_protection+0x28/0x30
>native_write_msr+0x4/0x30
>x86_pmu_enable+0x347/0xab0
>ctx_resched+0x1e6/0x2b0
>__perf_install_in_context+0x244/0x2c0
>remote_function+0x8e/0x190
>generic_exec_single+0x292/0x410
>smp_call_function_single+0x339/0x510
>task_function_call+0x162/0x220
>perf_install_in_context+0x247/0x460
>SYSC_perf_event_open+0x207c/0x2ea0
>SyS_perf_event_open+0x39/0x50
>entry_SYSCALL_64_fastpath+0x1f/0xbe

This is a WARN, printk is a pig.


Re: events: possible deadlock in __perf_event_task_sched_out

2017-08-21 Thread Peter Zijlstra
On Mon, Aug 21, 2017 at 01:56:27AM +0530, Shubham Bansal wrote:
> -> #1 ((console_sem).lock){..}:
>lock_acquire+0x173/0x470
>_raw_spin_lock_irqsave+0x42/0x60
>down_trylock+0x13/0x70
>__down_trylock_console_sem+0x38/0xc0
>console_trylock+0x17/0xb0
>vprintk_emit+0x410/0x480
>vprintk_default+0x28/0x30
>vprintk_func+0x57/0xbe
>printk+0x9f/0xbb
>ex_handler_wrmsr_unsafe.cold.1+0xca/0xe2
>fixup_exception+0x8b/0xb3
>do_general_protection+0x18f/0x2b0
>general_protection+0x28/0x30
>native_write_msr+0x4/0x30
>x86_pmu_enable+0x347/0xab0
>ctx_resched+0x1e6/0x2b0
>__perf_install_in_context+0x244/0x2c0
>remote_function+0x8e/0x190
>generic_exec_single+0x292/0x410
>smp_call_function_single+0x339/0x510
>task_function_call+0x162/0x220
>perf_install_in_context+0x247/0x460
>SYSC_perf_event_open+0x207c/0x2ea0
>SyS_perf_event_open+0x39/0x50
>entry_SYSCALL_64_fastpath+0x1f/0xbe

This is a WARN, printk is a pig.


events: possible deadlock in __perf_event_task_sched_out

2017-08-20 Thread Shubham Bansal
Hi all,

While fuzzing with syzkaller inside Qemu I've stumbled on the
following warning which is reproducible with

==

// autogenerated by syzkaller (http://github.com/google/syzkaller)

#define _GNU_SOURCE

#include 
#include 
#include 
#include 

static void test();

void loop()
{
  while (1) {
test();
  }
}

long r[29];
void test()
{
  memset(r, -1, sizeof(r));
  r[0] = syscall(__NR_mmap, 0x2000ul, 0x1d000ul, 0x3ul, 0x32ul,
0xul, 0x0ul);
  *(uint32_t*)0x2001bf88 = (uint32_t)0x0;
  *(uint32_t*)0x2001bf8c = (uint32_t)0x78;
  *(uint8_t*)0x2001bf90 = (uint8_t)0x0;
  *(uint8_t*)0x2001bf91 = (uint8_t)0x0;
  *(uint8_t*)0x2001bf92 = (uint8_t)0x0;
  *(uint8_t*)0x2001bf93 = (uint8_t)0x0;
  *(uint32_t*)0x2001bf94 = (uint32_t)0x0;
  *(uint64_t*)0x2001bf98 = (uint64_t)0x9;
  *(uint64_t*)0x2001bfa0 = (uint64_t)0x0;
  *(uint64_t*)0x2001bfa8 = (uint64_t)0x0;
  *(uint8_t*)0x2001bfb0 = (uint8_t)0x3fe;
  *(uint8_t*)0x2001bfb1 = (uint8_t)0x0;
  *(uint8_t*)0x2001bfb2 = (uint8_t)0x8;
  *(uint8_t*)0x2001bfb3 = (uint8_t)0x0;
  *(uint32_t*)0x2001bfb4 = (uint32_t)0x0;
  *(uint32_t*)0x2001bfb8 = (uint32_t)0x0;
  *(uint32_t*)0x2001bfbc = (uint32_t)0x0;
  *(uint64_t*)0x2001bfc0 = (uint64_t)0x0;
  *(uint64_t*)0x2001bfc8 = (uint64_t)0x0;
  *(uint64_t*)0x2001bfd0 = (uint64_t)0x0;
  *(uint64_t*)0x2001bfd8 = (uint64_t)0x0;
  *(uint64_t*)0x2001bfe0 = (uint64_t)0x2;
  *(uint32_t*)0x2001bfe8 = (uint32_t)0x0;
  *(uint64_t*)0x2001bff0 = (uint64_t)0x0;
  *(uint32_t*)0x2001bff8 = (uint32_t)0x0;
  *(uint16_t*)0x2001bffc = (uint16_t)0x0;
  *(uint16_t*)0x2001bffe = (uint16_t)0x0;
  r[28] = syscall(__NR_perf_event_open, 0x2001bf88ul, 0x0ul,
0xfffeul, 0xul, 0x2ul);
}

int main()
{
  int i; for (i = 0; i < 2; i++) {
if (fork() == 0) {
  loop();
  return 0;
}
  }
  sleep(100);
  return 0;
}



==
WARNING: possible circular locking dependency detected
4.13.0-rc4+ #1 Not tainted
--
syz-executor0/2793 is trying to acquire lock:
 (>lock){..}, at: []
__perf_event_task_sched_out+0x639/0x1000

but task is already holding lock:
 (>lock){-.-.-.}, at: [] __schedule+0x1d3/0x1ea0

which lock already depends on the new lock.


the existing dependency chain (in reverse order) is:

-> #3 (>lock){-.-.-.}:
   lock_acquire+0x173/0x470
   _raw_spin_lock+0x2a/0x40
   task_fork_fair+0x7a/0x760
   sched_fork+0x43e/0xf60
   copy_process.part.43+0x18a5/0x26a0
   _do_fork+0x1ea/0x1030
   kernel_thread+0x34/0x40
   rest_init+0x22/0xfe
   start_kernel+0x71e/0x744
   x86_64_start_reservations+0x24/0x26
   x86_64_start_kernel+0x134/0x143
   verify_cpu+0x0/0xf1

-> #2 (>pi_lock){-.-.-.}:
   lock_acquire+0x173/0x470
   _raw_spin_lock_irqsave+0x42/0x60
   try_to_wake_up+0xc0/0x14c0
   wake_up_process+0x10/0x20
   __up.isra.2+0x1a1/0x290
   up+0x12f/0x1b0
   __up_console_sem+0x48/0x90
   console_unlock+0x61a/0xc20
   do_con_write+0xfb3/0x1f20
   con_write+0x25/0xb0
   n_tty_write+0x5fc/0xed0
   tty_write+0x3be/0x800
   __vfs_write+0x10d/0x820
   vfs_write+0x189/0x510
   SyS_write+0xef/0x220
   entry_SYSCALL_64_fastpath+0x1f/0xbe

-> #1 ((console_sem).lock){..}:
   lock_acquire+0x173/0x470
   _raw_spin_lock_irqsave+0x42/0x60
   down_trylock+0x13/0x70
   __down_trylock_console_sem+0x38/0xc0
   console_trylock+0x17/0xb0
   vprintk_emit+0x410/0x480
   vprintk_default+0x28/0x30
   vprintk_func+0x57/0xbe
   printk+0x9f/0xbb
   ex_handler_wrmsr_unsafe.cold.1+0xca/0xe2
   fixup_exception+0x8b/0xb3
   do_general_protection+0x18f/0x2b0
   general_protection+0x28/0x30
   native_write_msr+0x4/0x30
   x86_pmu_enable+0x347/0xab0
   ctx_resched+0x1e6/0x2b0
   __perf_install_in_context+0x244/0x2c0
   remote_function+0x8e/0x190
   generic_exec_single+0x292/0x410
   smp_call_function_single+0x339/0x510
   task_function_call+0x162/0x220
   perf_install_in_context+0x247/0x460
   SYSC_perf_event_open+0x207c/0x2ea0
   SyS_perf_event_open+0x39/0x50
   entry_SYSCALL_64_fastpath+0x1f/0xbe

-> #0 (>lock){..}:
   __lock_acquire+0x2926/0x4120
   lock_acquire+0x173/0x470
   _raw_spin_lock+0x2a/0x40
   __perf_event_task_sched_out+0x639/0x1000
   __schedule+0xec3/0x1ea0
   preempt_schedule_common+0x35/0x60
   _cond_resched+0x17/0x20
   __mutex_lock+0x143/0x17f0
   mutex_lock_nested+0x16/0x20
   SYSC_perf_event_open+0x2123/0x2ea0
   SyS_perf_event_open+0x39/0x50
   entry_SYSCALL_64_fastpath+0x1f/0xbe

other info that might help us debug this:

Chain exists of:
  >lock --> >pi_lock --> >lock

 Possible unsafe locking scenario:

   CPU0CPU1
   
  lock(>lock);
 

events: possible deadlock in __perf_event_task_sched_out

2017-08-20 Thread Shubham Bansal
Hi all,

While fuzzing with syzkaller inside Qemu I've stumbled on the
following warning which is reproducible with

==

// autogenerated by syzkaller (http://github.com/google/syzkaller)

#define _GNU_SOURCE

#include 
#include 
#include 
#include 

static void test();

void loop()
{
  while (1) {
test();
  }
}

long r[29];
void test()
{
  memset(r, -1, sizeof(r));
  r[0] = syscall(__NR_mmap, 0x2000ul, 0x1d000ul, 0x3ul, 0x32ul,
0xul, 0x0ul);
  *(uint32_t*)0x2001bf88 = (uint32_t)0x0;
  *(uint32_t*)0x2001bf8c = (uint32_t)0x78;
  *(uint8_t*)0x2001bf90 = (uint8_t)0x0;
  *(uint8_t*)0x2001bf91 = (uint8_t)0x0;
  *(uint8_t*)0x2001bf92 = (uint8_t)0x0;
  *(uint8_t*)0x2001bf93 = (uint8_t)0x0;
  *(uint32_t*)0x2001bf94 = (uint32_t)0x0;
  *(uint64_t*)0x2001bf98 = (uint64_t)0x9;
  *(uint64_t*)0x2001bfa0 = (uint64_t)0x0;
  *(uint64_t*)0x2001bfa8 = (uint64_t)0x0;
  *(uint8_t*)0x2001bfb0 = (uint8_t)0x3fe;
  *(uint8_t*)0x2001bfb1 = (uint8_t)0x0;
  *(uint8_t*)0x2001bfb2 = (uint8_t)0x8;
  *(uint8_t*)0x2001bfb3 = (uint8_t)0x0;
  *(uint32_t*)0x2001bfb4 = (uint32_t)0x0;
  *(uint32_t*)0x2001bfb8 = (uint32_t)0x0;
  *(uint32_t*)0x2001bfbc = (uint32_t)0x0;
  *(uint64_t*)0x2001bfc0 = (uint64_t)0x0;
  *(uint64_t*)0x2001bfc8 = (uint64_t)0x0;
  *(uint64_t*)0x2001bfd0 = (uint64_t)0x0;
  *(uint64_t*)0x2001bfd8 = (uint64_t)0x0;
  *(uint64_t*)0x2001bfe0 = (uint64_t)0x2;
  *(uint32_t*)0x2001bfe8 = (uint32_t)0x0;
  *(uint64_t*)0x2001bff0 = (uint64_t)0x0;
  *(uint32_t*)0x2001bff8 = (uint32_t)0x0;
  *(uint16_t*)0x2001bffc = (uint16_t)0x0;
  *(uint16_t*)0x2001bffe = (uint16_t)0x0;
  r[28] = syscall(__NR_perf_event_open, 0x2001bf88ul, 0x0ul,
0xfffeul, 0xul, 0x2ul);
}

int main()
{
  int i; for (i = 0; i < 2; i++) {
if (fork() == 0) {
  loop();
  return 0;
}
  }
  sleep(100);
  return 0;
}



==
WARNING: possible circular locking dependency detected
4.13.0-rc4+ #1 Not tainted
--
syz-executor0/2793 is trying to acquire lock:
 (>lock){..}, at: []
__perf_event_task_sched_out+0x639/0x1000

but task is already holding lock:
 (>lock){-.-.-.}, at: [] __schedule+0x1d3/0x1ea0

which lock already depends on the new lock.


the existing dependency chain (in reverse order) is:

-> #3 (>lock){-.-.-.}:
   lock_acquire+0x173/0x470
   _raw_spin_lock+0x2a/0x40
   task_fork_fair+0x7a/0x760
   sched_fork+0x43e/0xf60
   copy_process.part.43+0x18a5/0x26a0
   _do_fork+0x1ea/0x1030
   kernel_thread+0x34/0x40
   rest_init+0x22/0xfe
   start_kernel+0x71e/0x744
   x86_64_start_reservations+0x24/0x26
   x86_64_start_kernel+0x134/0x143
   verify_cpu+0x0/0xf1

-> #2 (>pi_lock){-.-.-.}:
   lock_acquire+0x173/0x470
   _raw_spin_lock_irqsave+0x42/0x60
   try_to_wake_up+0xc0/0x14c0
   wake_up_process+0x10/0x20
   __up.isra.2+0x1a1/0x290
   up+0x12f/0x1b0
   __up_console_sem+0x48/0x90
   console_unlock+0x61a/0xc20
   do_con_write+0xfb3/0x1f20
   con_write+0x25/0xb0
   n_tty_write+0x5fc/0xed0
   tty_write+0x3be/0x800
   __vfs_write+0x10d/0x820
   vfs_write+0x189/0x510
   SyS_write+0xef/0x220
   entry_SYSCALL_64_fastpath+0x1f/0xbe

-> #1 ((console_sem).lock){..}:
   lock_acquire+0x173/0x470
   _raw_spin_lock_irqsave+0x42/0x60
   down_trylock+0x13/0x70
   __down_trylock_console_sem+0x38/0xc0
   console_trylock+0x17/0xb0
   vprintk_emit+0x410/0x480
   vprintk_default+0x28/0x30
   vprintk_func+0x57/0xbe
   printk+0x9f/0xbb
   ex_handler_wrmsr_unsafe.cold.1+0xca/0xe2
   fixup_exception+0x8b/0xb3
   do_general_protection+0x18f/0x2b0
   general_protection+0x28/0x30
   native_write_msr+0x4/0x30
   x86_pmu_enable+0x347/0xab0
   ctx_resched+0x1e6/0x2b0
   __perf_install_in_context+0x244/0x2c0
   remote_function+0x8e/0x190
   generic_exec_single+0x292/0x410
   smp_call_function_single+0x339/0x510
   task_function_call+0x162/0x220
   perf_install_in_context+0x247/0x460
   SYSC_perf_event_open+0x207c/0x2ea0
   SyS_perf_event_open+0x39/0x50
   entry_SYSCALL_64_fastpath+0x1f/0xbe

-> #0 (>lock){..}:
   __lock_acquire+0x2926/0x4120
   lock_acquire+0x173/0x470
   _raw_spin_lock+0x2a/0x40
   __perf_event_task_sched_out+0x639/0x1000
   __schedule+0xec3/0x1ea0
   preempt_schedule_common+0x35/0x60
   _cond_resched+0x17/0x20
   __mutex_lock+0x143/0x17f0
   mutex_lock_nested+0x16/0x20
   SYSC_perf_event_open+0x2123/0x2ea0
   SyS_perf_event_open+0x39/0x50
   entry_SYSCALL_64_fastpath+0x1f/0xbe

other info that might help us debug this:

Chain exists of:
  >lock --> >pi_lock --> >lock

 Possible unsafe locking scenario:

   CPU0CPU1
   
  lock(>lock);