from:"Dmitry Vyukov"

WARNING in crypto_wait_for_test

2015-12-08 Thread Dmitry Vyukov

Hello,

The following program triggers a WARNING in crypto_wait_for_test:

// autogenerated by syzkaller (http://github.com/google/syzkaller)
#include 
#include 
#include 

int main()
{
long r0 = syscall(SYS_mmap, 0x2000ul, 0x1000ul, 0x3ul,
0x32ul, 0xul, 0x0ul);
long r1 = syscall(SYS_socket, 0x26ul, 0x5ul, 0x0ul, 0, 0, 0);
*(uint16_t*)0x2000 = 0x26;
memcpy((void*)0x2002,
"\x73\x6b\x63\x69\x70\x68\x65\x72\x00\x00\x00\x00\x00\x00", 14);
*(uint32_t*)0x2010 = 0x1008;
*(uint32_t*)0x2014 = 0x469b167b45d89a6;
memcpy((void*)0x2018,
"\x63\x74\x72\x28\x64\x65\x73\x33\x5f\x65\x64\x65\x29\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
64);
long r7 = syscall(SYS_bind, r1, 0x2000ul, 0x58ul, 0, 0, 0);
return 0;
}


[ cut here ]
WARNING: CPU: 1 PID: 11087 at crypto/algapi.c:343
crypto_wait_for_test+0xc4/0xf0()
Modules linked in:
CPU: 1 PID: 11087 Comm: a.out Tainted: GW   4.4.0-rc3+ #151
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
 0001 88006ca07a78 82e0f4b8 41b58ab3
 87a9a265 82e0f406 88003711e080 
 89913aa0 0001 0001 2b4f
Call Trace:
 [< inline >] __dump_stack lib/dump_stack.c:15
 [] dump_stack+0xb2/0xfa lib/dump_stack.c:50
 [] warn_slowpath_common+0xe6/0x170 kernel/panic.c:460
 [] warn_slowpath_null+0x29/0x30 kernel/panic.c:493
 [] crypto_wait_for_test+0xc4/0xf0 crypto/algapi.c:343
 [] crypto_register_instance+0x220/0x350 crypto/algapi.c:558
 [] crypto_givcipher_default+0x4f4/0x620
crypto/ablkcipher.c:601
 [] crypto_lookup_skcipher+0x1ba/0x2f0 crypto/ablkcipher.c:658
 [] crypto_alloc_ablkcipher+0x5e/0x1f0 crypto/ablkcipher.c:693
 [] skcipher_bind+0x25/0x30 crypto/algif_skcipher.c:754
 [] alg_bind+0x1a9/0x410 crypto/af_alg.c:155
 [] SYSC_bind+0x20a/0x2c0 net/socket.c:1383
 [] SyS_bind+0x24/0x30 net/socket.c:1369
 [] entry_SYSCALL_64_fastpath+0x16/0x7a
arch/x86/entry/entry_64.S:185
---[ end trace 49f86739a736fa2b ]---


strace:
socket(PF_ALG, SOCK_SEQPACKET, 0)   = 3
bind(3, {sa_family=AF_ALG, sa_data="skcipher\0\0\0\0\0\0"}, 88) = -1
ENOENT (No such file or directory)


On commit 31ade3b83e1821da5fbb2f11b5b3d4ab2ec39db8 (Nov 29).
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

GPF in gf128mul_64k_bbe

2015-12-17 Thread Dmitry Vyukov

Hello,

The following program causes GPF in gf128mul_64k_bbe:

// autogenerated by syzkaller (http://github.com/google/syzkaller)
#include 
#include 
#include 

int main()
{
long r0 = syscall(SYS_socket, 0x26ul, 0x5ul, 0x0ul, 0, 0, 0);
long r1 = syscall(SYS_mmap, 0x2000ul, 0x1ul, 0x3ul,
0x32ul, 0xul, 0x0ul);
*(uint16_t*)0x20001000 = 0x26;
memcpy((void*)0x20001002,
"\x73\x6b\x63\x69\x70\x68\x65\x72\x00\x00\x00\x00\x00\x00", 14);
*(uint32_t*)0x20001010 = 0xf;
*(uint32_t*)0x20001014 = 0x100;
memcpy((void*)0x20001018,
"\x6c\x72\x77\x28\x63\x61\x6d\x65\x6c\x6c\x69\x61\x29\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
64);
long r7 = syscall(SYS_bind, r0, 0x20001000ul, 0x58ul, 0, 0, 0);
long r8 = syscall(SYS_accept4, r0, 0x0ul, 0x200023fdul, 0x800ul, 0, 0);
*(uint32_t*)0x20002000 = 0x12;
*(uint32_t*)0x20002004 = 0x8;
*(uint64_t*)0x20002008 = 0x0;
*(uint16_t*)0x20002010 = 0x4d;
long r13 = syscall(SYS_write, r8, 0x20002000ul, 0x12ul, 0, 0, 0);
memcpy((void*)0x2a56,
"\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
128);
long r15 = syscall(SYS_recvfrom, r8, 0x2000ul, 0xb6ul,
0x0ul, 0x2a56ul, 0x80ul);
return 0;
}


general protection fault:  [#2] SMP KASAN
Modules linked in:
CPU: 0 PID: 6955 Comm: executor Not tainted 4.4.0-rc3+ #151
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
task: 88005fc88000 ti: 880062478000 task.ti: 880062478000
RIP: 0010:[]
  [] gf128mul_64k_bbe+0x89/0x3f0 crypto/gf128mul.c:379
RSP: 0018:88006247f720  EFLAGS: 00010246
RAX: dc00 RBX: 0001 RCX: 
RDX: ed000c48fed1 RSI:  RDI: ed000c48fedd
RBP: 88006247f7e0 R08: 88003dfd1158 R09: 1000
R10: 0010 R11: 1100075eeb92 R12: 88006247fa00
R13:  R14:  R15: 11000c48feea
FS:  7f21e795f700() GS:88003ec0() knlGS:
CS:  0010 DS:  ES:  CR0: 8005003b
CR2: 2a56 CR3: 60c4b000 CR4: 06f0
Stack:
 88006247f8c0 88006247f8b8 11000c48feea 88006247fa00
 88006247f8a0  41b58ab3 87ab3a79
 82bf9580 82bafa9b  88006247f7a0
Call Trace:
 [] lrw_crypt+0x29d/0xd20 crypto/lrw.c:248
 [] lrw_decrypt+0xeb/0x140
arch/x86/crypto/serpent_avx2_glue.c:270
 [] async_decrypt+0x1c1/0x2c0 crypto/blkcipher.c:443
 [< inline >] crypto_ablkcipher_decrypt include/linux/crypto.h:921
 [< inline >] skcipher_recvmsg_sync crypto/algif_skcipher.c:676
 [] skcipher_recvmsg+0x1029/0x1f10 crypto/algif_skcipher.c:706
 [< inline >] sock_recvmsg_nosec net/socket.c:712
 [] sock_recvmsg+0xaa/0xe0 net/socket.c:720
 [] SYSC_recvfrom+0x1e4/0x370 net/socket.c:1707
 [] SyS_recvfrom+0x40/0x50 net/socket.c:1681
 [] entry_SYSCALL_64_fastpath+0x16/0x7a
arch/x86/entry/entry_64.S:185
Code: 04 00 00 f4 f4 c7 40 08 f3 f3 f3 f3 e8 d1 e9 a0 fe 4d 85 f6 0f
84 f6 01 00 00 4c 89 f1 48 b8 00 00 00 00 00 fc ff df 48 c1 e9 03 <80>
3c 01 00 0f 85 48 03 00 00 48 8b 5c 24 18 4d 8b 26 48 83 c3
RIP  [] gf128mul_64k_bbe+0x89/0x3f0 crypto/gf128mul.c:379
 RSP 
---[ end trace 722d56533c7c99a7 ]---


On upstream commit 31ade3b83e1821da5fbb2f11b5b3d4ab2ec39db8 (Nov 29).
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

GPF in lrw_crypt

2015-12-17 Thread Dmitry Vyukov

Hello,

The following program causes GPF in lrw_crypt:

// autogenerated by syzkaller (http://github.com/google/syzkaller)
#include 
#include 
#include 

int main()
{
long r0 = syscall(SYS_socket, 0x26ul, 0x5ul, 0x0ul, 0, 0, 0);
long r1 = syscall(SYS_mmap, 0x2000ul, 0x1ul, 0x2ul,
0x32ul, 0xul, 0x0ul);
*(uint16_t*)0x20001000 = 0x26;
memcpy((void*)0x20001002,
"\x73\x6b\x63\x69\x70\x68\x65\x72\x00\x00\x00\x00\x00\x00", 14);
*(uint32_t*)0x20001010 = 0xf;
*(uint32_t*)0x20001014 = 0x100;
memcpy((void*)0x20001018,
"\x6c\x72\x77\x28\x74\x77\x6f\x66\x69\x73\x68\x29\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
64);
long r7 = syscall(SYS_bind, r0, 0x20001000ul, 0x58ul, 0, 0, 0);
long r8 = syscall(SYS_accept4, r0, 0x0ul, 0x200023fdul, 0x800ul, 0, 0);
memcpy((void*)0x20002fd8,
"\x09\xf1\x98\x36\x3f\x7d\x29\x96\x55\xe6\xa2\x42\x45\x67\x8f\x7c\x27\x44\x51\x6f\xbe\x4d\x52\x6f\x40\xaf\xe0\xd6\x04\x8d\x86\x36\x08\xc8\x55\xb8\xfe\x6e\x89\xef\x15\x2d\x07\x9a\x74\xab\xc7\x47\x26\xb5\x00\x93\x3b\x59\xe2\x1f\x6a\x29\x76\x7f\x9d\x3a\x86\x38\xda\x3e\xfb\xbe\x63\x2e\x38\x2f\x5c\x1c\x4d\xb8\x53\xf9\x97\xdb\x4a\xcc\xad\x55\xb3\xb5\xa0\xb4\xad\xfb\x39\xe5\x44\x12\x0b\x5f\x03\xbf\xc7\x28\x36\x1a\x7b\x4a\xff\x3e\x71\x17\x44\xf3\x09\xfb\x44\x41\x55\x1b\x6e\x6c\xcd\x03\x39\x66\xe2\x87\x65\xdd\x66\x7c\x00\x9f\x46\x54\x66\xf1\xa8\x4b\xd9\x10\xdc\x45\xd0\x57\x5c\x5e\x97\x42\x3a\xc9\x26\x5a\x55\x57\x65\x5f\x38\x54\xdd\x1e\xd8\x89\xe0\x34\xf0\x9e\x65\xf8\x89\x8e\xe4\x02\xdc\xa2\xa8\x45\x9c\xe9\xca\xef\xad\xdc\x74\xb5",
182);
long r10 = syscall(SYS_write, r8, 0x20002fd8ul, 0xb6ul, 0, 0, 0);
*(uint64_t*)0x2fb8 = 0x20004fff;
*(uint64_t*)0x2fc0 = 0x94;
*(uint64_t*)0x2fc8 = 0x2000;
*(uint64_t*)0x2fd0 = 0x5d;
*(uint64_t*)0x2fd8 = 0x2f1b;
*(uint64_t*)0x2fe0 = 0xe5;
*(uint64_t*)0x2fe8 = 0x20004b08;
*(uint64_t*)0x2ff0 = 0xe9;
*(uint64_t*)0x2ff8 = 0x20002000;
*(uint64_t*)0x20001000 = 0x1000;
long r21 = syscall(SYS_readv, r8, 0x2fb8ul, 0x5ul, 0, 0, 0);
return 0;
}



kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory
accessgeneral protection fault:  [#1] SMP KASAN
Modules linked in:
CPU: 2 PID: 6912 Comm: a.out Not tainted 4.4.0-rc3+ #151
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
task: 88005ee61680 ti: 88005fef task.ti: 88005fef
RIP: 0010:[]  [] gf128mul_64k_bbe+0x89/0x3f0
RSP: 0018:88005fef7590  EFLAGS: 00010246
RAX: dc00 RBX: 0001 RCX: 
RDX: ed000bfdee9f RSI: 88005ee61e40 RDI: ed000bfdeeab
RBP: 88005fef7650 R08: 0001 R09: 
R10:  R11:  R12: 88005fef7870
R13:  R14:  R15: 11000bfdeeb8
FS:  026a3880(0063) GS:88006ce0() knlGS:
CS:  0010 DS:  ES:  CR0: 8005003b
CR2: 2fb8 CR3: 5ffa CR4: 06e0
Stack:
 88005fef7730 88000fff 11000bfdeeb8 88005fef7870
 88005fef7710  41b58ab3 87ab3a79
 82bf9580 82bafa9b 88005ee61680 0001
Call Trace:
 [] lrw_crypt+0x29d/0xd20 crypto/lrw.c:248
 [] lrw_decrypt+0xf2/0x150
arch/x86/crypto/serpent_avx2_glue.c:270
 [] async_decrypt+0x1c1/0x2c0 crypto/blkcipher.c:443
 [< inline >] crypto_ablkcipher_decrypt include/linux/crypto.h:921
 [< inline >] skcipher_recvmsg_sync crypto/algif_skcipher.c:676
 [] skcipher_recvmsg+0x1029/0x1f10 crypto/algif_skcipher.c:706
 [< inline >] sock_recvmsg_nosec net/socket.c:712
 [] sock_recvmsg+0xaa/0xe0 net/socket.c:720
 [] sock_read_iter+0x273/0x3d0 net/socket.c:797
 [] do_iter_readv_writev+0x14e/0x2c0 fs/read_write.c:664
 [] do_readv_writev+0x2e2/0x880 fs/read_write.c:808
 [] vfs_readv+0x95/0xd0 fs/read_write.c:834
 [< inline >] SYSC_readv fs/read_write.c:860
 [] SyS_readv+0x111/0x2f0 fs/read_write.c:852
 [] entry_SYSCALL_64_fastpath+0x16/0x7a
arch/x86/entry/entry_64.S:185
Code: 04 00 00 f4 f4 c7 40 08 f3 f3 f3 f3 e8 d1 e9 a0 fe 4d 85 f6 0f
84 f6 01 00 00 4c 89 f1 48 b8 00 00 00 00 00 fc ff df 48 c1 e9 03 <80>
3c 01 00 0f 85 48 03 00 00 48 8b 5c 24 18 4d 8b 26 48 83 c3
RIP  [] gf128mul_64k_bbe+0x89/0x3f0 crypto/gf128mul.c:379
 RSP 
---[ end trace 018c54843b88ec1d ]---


On upstream commit 31ade3b83e1821da5fbb2f11b5b3d4ab2ec39db8 (Nov 29).
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to

use-after-free in hash_sock_destruct

2015-12-17 Thread Dmitry Vyukov

Hello,

The following program causes use-after-free in hash_sock_destruct:

// autogenerated by syzkaller (http://github.com/google/syzkaller)
#include 
#include 
#include 
#include 
#include 
#include 

struct sockaddr_alg {
unsigned short   salg_family;
charsalg_type[14];
unsigned   salg_feat;
unsigned   salg_mask;
charsalg_name[64];
};

int fd;

void *thr0(void *arg)
{
struct sockaddr_alg sa = {};
sa.salg_family = 0x26;
memcpy(sa.salg_type,
"\x68\x61\x73\x68\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", 14);
sa.salg_feat = 0xf;
sa.salg_mask = 0x100;
memcpy(sa.salg_name,
"\x6d\x64\x34\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
64);
bind(fd, (struct sockaddr*), sizeof(sa));
return 0;
}

void *thr1(void *arg)
{
accept4(fd, 0, 0, 0x800ul);
return 0;
}

int main()
{
fd = socket(PF_ALG, SOCK_SEQPACKET, 0);
pthread_t th[4];
pthread_create([0], 0, thr0, 0);
pthread_create([1], 0, thr1, 0);
pthread_create([2], 0, thr0, 0);
pthread_create([3], 0, thr1, 0);
pthread_join(th[0], 0);
pthread_join(th[1], 0);
pthread_join(th[2], 0);
pthread_join(th[3], 0);
return 0;
}



==
BUG: KASAN: use-after-free in hash_sock_destruct+0x1de/0x260 at addr
88005f057090
Read of size 8 by task a.out/6844
=
BUG kmalloc-192 (Not tainted): kasan: bad access detected
-

Disabling lock debugging due to kernel taint
INFO: Allocated in crypto_create_tfm+0x87/0x2a0 age=3 cpu=2 pid=6845
[<  none  >] ___slab_alloc+0x648/0x8c0 mm/slub.c:2438
[<  none  >] __slab_alloc+0x4c/0x90 mm/slub.c:2467
[< inline >] slab_alloc_node mm/slub.c:2530
[< inline >] slab_alloc mm/slub.c:2572
[<  none  >] __kmalloc+0x2d9/0x480 mm/slub.c:3532
[< inline >] kmalloc include/linux/slab.h:463
[< inline >] kzalloc include/linux/slab.h:602
[<  none  >] crypto_create_tfm+0x87/0x2a0 crypto/api.c:470
[<  none  >] crypto_alloc_tfm+0x138/0x3f0 crypto/api.c:555
[<  none  >] crypto_alloc_ahash+0x2c/0x40 crypto/ahash.c:538
[<  none  >] hash_bind+0x25/0x30 crypto/algif_hash.c:240
[<  none  >] alg_bind+0x1a9/0x410 crypto/af_alg.c:155
[<  none  >] SYSC_bind+0x20a/0x2c0 net/socket.c:1383
[<  none  >] SyS_bind+0x24/0x30 net/socket.c:1369
[<  none  >] entry_SYSCALL_64_fastpath+0x16/0x7a
arch/x86/entry/entry_64.S:185

INFO: Freed in kzfree+0x28/0x30 age=2 cpu=1 pid=6847
[<  none  >] __slab_free+0x21e/0x3e0 mm/slub.c:2648
[< inline >] slab_free mm/slub.c:2803
[<  none  >] kfree+0x26f/0x3e0 mm/slub.c:3632
[<  none  >] kzfree+0x28/0x30 mm/slab_common.c:1270
[<  none  >] crypto_destroy_tfm+0xdd/0x1e0 crypto/api.c:596
[< inline >] crypto_free_ahash include/crypto/hash.h:258
[<  none  >] hash_release+0x19/0x20 crypto/algif_hash.c:245
[< inline >] alg_do_release crypto/af_alg.c:116
[<  none  >] alg_bind+0x26a/0x410 crypto/af_alg.c:170
[<  none  >] SYSC_bind+0x20a/0x2c0 net/socket.c:1383
[<  none  >] SyS_bind+0x24/0x30 net/socket.c:1369
[<  none  >] entry_SYSCALL_64_fastpath+0x16/0x7a
arch/x86/entry/entry_64.S:185

INFO: Slab 0xea00017c1580 objects=16 used=15 fp=0x88005f057000
flags=0x5fffc004080
INFO: Object 0x88005f057000 @offset=4096 fp=0x  (null)
CPU: 1 PID: 6844 Comm: a.out Tainted: GB   4.4.0-rc3+ #151
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
 0001 8800379779e0 82e0f4b8 41b58ab3
 87a9a265 82e0f406 88003d8c 87abb3aa
 88003e804a00 0008 88005f057000 8800379779e0

Call Trace:
 [< inline >] __dump_stack lib/dump_stack.c:15
 [] dump_stack+0xb2/0xfa lib/dump_stack.c:50
 [] print_trailer+0x12c/0x210 mm/slub.c:652
 [] object_err+0x2f/0x40 mm/slub.c:659
 [< inline >] print_address_description mm/kasan/report.c:138
 [] kasan_report_error+0x5d9/0x860 mm/kasan/report.c:251
 [< inline >] kasan_report mm/kasan/report.c:274
 [] __asan_report_load8_noabort+0x54/0x70
mm/kasan/report.c:295
 [< inline >] crypto_ahash_digestsize include/crypto/hash.h:290
 [] hash_sock_destruct+0x1de/0x260 crypto/algif_hash.c:259
 [] sk_destruct+0xa5/0x5f0 net/core/sock.c:1446
 [] __sk_free+0x8c/0x260 net/core/sock.c:1474
 [] sk_free+0x30/0x40 net/core/sock.c:1485
 [< inline >] sock_put

use-after-free in skcipher_sock_destruct

2015-12-17 Thread Dmitry Vyukov

Hello,

The following program triggers use-after-free in skcipher_sock_destruct:

// autogenerated by syzkaller (http://github.com/google/syzkaller)
#include 
#include 
#include 
#include 
#include 
#include 

struct sockaddr_alg {
unsigned short   salg_family;
charsalg_type[14];
unsigned   salg_feat;
unsigned   salg_mask;
charsalg_name[64];
};

int fd;

void *thr0(void *arg)
{
struct sockaddr_alg sa = {};
sa.salg_family = 0x26;
memcpy(sa.salg_type,
"\x73\x6b\x63\x69\x70\x68\x65\x72\x00\x00\x00\x00\x00\x00", 14);
sa.salg_feat = 0xf;
sa.salg_mask = 0x100;
memcpy(sa.salg_name,
"\x65\x63\x62\x28\x73\x65\x72\x70\x65\x6e\x74\x29\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
64);
bind(fd, (struct sockaddr*), sizeof(sa));
return 0;
}

void *thr1(void *arg)
{
accept4(fd, 0, 0, 0x800ul);
return 0;
}

int main()
{
fd = socket(PF_ALG, SOCK_SEQPACKET, 0);
pthread_t th[4];
pthread_create([0], 0, thr0, 0);
pthread_create([1], 0, thr1, 0);
pthread_create([2], 0, thr0, 0);
pthread_create([3], 0, thr1, 0);
pthread_join(th[0], 0);
pthread_join(th[1], 0);
pthread_join(th[2], 0);
pthread_join(th[3], 0);
return 0;
}


==
BUG: KASAN: use-after-free in skcipher_sock_destruct+0x39c/0x3e0 at
addr 8800387a2d38
Read of size 4 by task executor/27423
=
BUG kmalloc-96 (Not tainted): kasan: bad access detected
-

Disabling lock debugging due to kernel taint
INFO: Allocated in __crypto_alloc_tfm+0xd9/0x470 age=36 cpu=0 pid=27396
[<  none  >] __kmalloc+0x2d9/0x480 mm/slub.c:3532
[< inline >] kmalloc include/linux/slab.h:463
[< inline >] kzalloc include/linux/slab.h:602
[<  none  >] __crypto_alloc_tfm+0xd9/0x470 crypto/api.c:374
[<  none  >] crypto_alloc_ablkcipher+0x7b/0x1f0 crypto/ablkcipher.c:699
[<  none  >] skcipher_bind+0x25/0x30 crypto/algif_skcipher.c:754
[<  none  >] alg_bind+0x1a9/0x410 crypto/af_alg.c:155
[<  none  >] SYSC_bind+0x20a/0x2c0 net/socket.c:1383
[<  none  >] SyS_bind+0x24/0x30 net/socket.c:1369
[<  none  >] entry_SYSCALL_64_fastpath+0x16/0x7a
arch/x86/entry/entry_64.S:185

INFO: Freed in kzfree+0x28/0x30 age=26 cpu=2 pid=27421
[<  none  >] kfree+0x26f/0x3e0 mm/slub.c:3632
[<  none  >] kzfree+0x28/0x30 mm/slab_common.c:1270
[<  none  >] crypto_destroy_tfm+0xdd/0x1e0 crypto/api.c:596
[< inline >] crypto_free_tfm include/linux/crypto.h:623
[< inline >] crypto_free_ablkcipher include/linux/crypto.h:769
[<  none  >] skcipher_release+0x18/0x20 crypto/algif_skcipher.c:759
[< inline >] alg_do_release crypto/af_alg.c:116
[<  none  >] alg_bind+0x26a/0x410 crypto/af_alg.c:170
[<  none  >] SYSC_bind+0x20a/0x2c0 net/socket.c:1383
[<  none  >] SyS_bind+0x24/0x30 net/socket.c:1369
[<  none  >] entry_SYSCALL_64_fastpath+0x16/0x7a
arch/x86/entry/entry_64.S:185

INFO: Slab 0xeae1e880 objects=19 used=8 fp=0x8800387a3380
flags=0x1fffc004080
INFO: Object 0x8800387a2d00 @offset=3328 fp=0x8800387a3520
CPU: 0 PID: 27423 Comm: executor Tainted: GB   4.4.0-rc3+ #151
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
  880035c67700 82e0f4b8 41b58ab3
 87a9a265 82e0f406 880037f15a00 87abb3aa
 88003e804780 0008 8800387a2d00 880035c67700

Call Trace:
 [] __asan_report_load4_noabort+0x54/0x70
mm/kasan/report.c:294
 [< inline >] crypto_ablkcipher_ivsize include/linux/crypto.h:807
 [] skcipher_sock_destruct+0x39c/0x3e0
crypto/algif_skcipher.c:787
 [] sk_destruct+0xa5/0x5f0 net/core/sock.c:1446
 [] __sk_free+0x8c/0x260 net/core/sock.c:1474
 [] sk_free+0x30/0x40 net/core/sock.c:1485
 [< inline >] sock_put include/net/sock.h:1625
 [] af_alg_release+0x60/0x90 crypto/af_alg.c:123
 [] sock_release+0x96/0x260 net/socket.c:571
 [] sock_close+0x16/0x20 net/socket.c:1022
 [] __fput+0x244/0x860 fs/file_table.c:208
 [] fput+0x15/0x20 fs/file_table.c:244
 [] task_work_run+0x130/0x240 kernel/task_work.c:115
 [< inline >] exit_task_work include/linux/task_work.h:21
 [] do_exit+0x885/0x3050 kernel/exit.c:750
 [] do_group_exit+0xec/0x390 kernel/exit.c:880
 [] get_signal+0x677/0x1bf0 kernel/signal.c:2307
 [] do_signal+0x7e/0x2170 arch/x86/kernel/signal.c:709
 [] exit_to_usermode_loop+0xfe/0x1e0
arch/x86/entry/common.c:247
 [< inline

bad page state due to PF_ALG socket

2015-12-17 Thread Dmitry Vyukov

Hello,

The following program triggers multiple bugs including bad page state
warnings and GPFs:


// autogenerated by syzkaller (http://github.com/google/syzkaller)
#include 
#include 
#include 
#include 

void foo()
{
long r0 = syscall(SYS_socket, 0x26ul, 0x5ul, 0x0ul, 0, 0, 0);
long r1 = syscall(SYS_mmap, 0x2000ul, 0x1ul, 0x3ul,
0x32ul, 0xul, 0x0ul);
*(uint16_t*)0x20001000 = 0x26;
memcpy((void*)0x20001002,
"\x73\x6b\x63\x69\x70\x68\x65\x72\x00\x00\x00\x00\x00\x00", 14);
*(uint32_t*)0x20001010 = 0xf;
*(uint32_t*)0x20001014 = 0x100;
memcpy((void*)0x20001018,
"\x65\x63\x62\x28\x73\x65\x72\x70\x65\x6e\x74\x29\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
64);
long r7 = syscall(SYS_bind, r0, 0x20001000ul, 0x58ul, 0, 0, 0);
long r8 = syscall(SYS_accept4, r0, 0x0ul, 0x200023fdul, 0x800ul, 0, 0);
memcpy((void*)0x2000,

Re: use-after-free in hash_sock_destruct

2015-12-29 Thread Dmitry Vyukov

On Tue, Dec 29, 2015 at 3:40 PM, Herbert Xu <herb...@gondor.apana.org.au> wrote:
> On Thu, Dec 17, 2015 at 01:59:50PM +0100, Dmitry Vyukov wrote:
>>
>> The following program causes use-after-free in hash_sock_destruct:
>
> This patch should fix the problem.  AFAIK everything that you have
> reported should now be fixed.  If you still have issues please
> resubmit them (and please cc me).  Thanks!

Thanks Herbert!
I will do another round of crypto testing with this patch (on top of
the other two patches) and report if I see any bugs.



> ---8<---
> Subject: crypto: af_alg - Disallow bind/setkey/... after accept(2)
>
> Each af_alg parent socket obtained by socket(2) corresponds to a
> tfm object once bind(2) has succeeded.  An accept(2) call on that
> parent socket creates a context which then uses the tfm object.
>
> Therefore as long as any child sockets created by accept(2) exist
> the parent socket must not be modified or freed.
>
> This patch guarantees this by using locks and a reference count
> on the parent socket.  Any attempt to modify the parent socket will
> fail with EBUSY.
>
> Cc: sta...@vger.kernel.org
> Reported-by: Dmitry Vyukov <dvyu...@google.com>
> Signed-off-by: Herbert Xu <herb...@gondor.apana.org.au>
>
> diff --git a/crypto/af_alg.c b/crypto/af_alg.c
> index a8e7aa3..f5a2426 100644
> --- a/crypto/af_alg.c
> +++ b/crypto/af_alg.c
> @@ -125,6 +125,23 @@ int af_alg_release(struct socket *sock)
>  }
>  EXPORT_SYMBOL_GPL(af_alg_release);
>
> +void af_alg_release_parent(struct sock *sk)
> +{
> +   struct alg_sock *ask = alg_sk(sk);
> +   bool last;
> +
> +   sk = ask->parent;
> +   ask = alg_sk(sk);
> +
> +   lock_sock(sk);
> +   last = !--ask->refcnt;
> +   release_sock(sk);
> +
> +   if (last)
> +   sock_put(sk);
> +}
> +EXPORT_SYMBOL_GPL(af_alg_release_parent);
> +
>  static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int 
> addr_len)
>  {
> const u32 forbidden = CRYPTO_ALG_INTERNAL;
> @@ -133,6 +150,7 @@ static int alg_bind(struct socket *sock, struct sockaddr 
> *uaddr, int addr_len)
> struct sockaddr_alg *sa = (void *)uaddr;
> const struct af_alg_type *type;
> void *private;
> +   int err;
>
> if (sock->state == SS_CONNECTED)
> return -EINVAL;
> @@ -160,16 +178,22 @@ static int alg_bind(struct socket *sock, struct 
> sockaddr *uaddr, int addr_len)
> return PTR_ERR(private);
> }
>
> +   err = -EBUSY;
> lock_sock(sk);
> +   if (ask->refcnt)
> +   goto unlock;
>
> swap(ask->type, type);
> swap(ask->private, private);
>
> +   err = 0;
> +
> +unlock:
> release_sock(sk);
>
> alg_do_release(type, private);
>
> -   return 0;
> +   return err;
>  }
>
>  static int alg_setkey(struct sock *sk, char __user *ukey,
> @@ -188,14 +212,41 @@ static int alg_setkey(struct sock *sk, char __user 
> *ukey,
> if (copy_from_user(key, ukey, keylen))
> goto out;
>
> +   err = -EBUSY;
> +   lock_sock(sk);
> +   if (ask->refcnt)
> +   goto unlock;
> +
> err = type->setkey(ask->private, key, keylen);
>
> +unlock:
> +   release_sock(sk);
> +
>  out:
> sock_kzfree_s(sk, key, keylen);
>
> return err;
>  }
>
> +static int alg_setauthsize(struct sock *sk, unsigned int size)
> +{
> +   int err;
> +   struct alg_sock *ask = alg_sk(sk);
> +   const struct af_alg_type *type = ask->type;
> +
> +   err = -EBUSY;
> +   lock_sock(sk);
> +   if (ask->refcnt)
> +   goto unlock;
> +
> +   err = type->setauthsize(ask->private, size);
> +
> +unlock:
> +   release_sock(sk);
> +
> +   return err;
> +}
> +
>  static int alg_setsockopt(struct socket *sock, int level, int optname,
>   char __user *optval, unsigned int optlen)
>  {
> @@ -224,7 +275,7 @@ static int alg_setsockopt(struct socket *sock, int level, 
> int optname,
> goto unlock;
> if (!type->setauthsize)
> goto unlock;
> -   err = type->setauthsize(ask->private, optlen);
> +   err = alg_setauthsize(sk, optlen);
> }
>
>  unlock:
> @@ -264,7 +315,8 @@ int af_alg_accept(struct sock *sk, struct socket *newsock)
>
> sk2->sk_family = PF_ALG;
>
> -   sock_hold(sk);
> +   if (!ask->refcnt++)
> +

crypto: deadlock in alg_setsockopt

2015-12-29 Thread Dmitry Vyukov

Hello,

On commit 8513342170278468bac126640a5d2d12ffbff106
+ crypto: algif_skcipher - Use new skcipher interface
+ crypto: algif_skcipher - Require setkey before accept(2)
+ crypto: af_alg - Disallow bind/setkey/... after accept(2)

The following program creates an unkillable, deadlocked process:

// autogenerated by syzkaller (http://github.com/google/syzkaller)
#include 
#include 
#include 
#include 
#include 

long r[10];

int main()
{
memset(r, -1, sizeof(r));
r[0] = syscall(SYS_mmap, 0x2000ul, 0x1ul, 0x3ul,
0x32ul, 0xul, 0x0ul);
r[1] = syscall(SYS_socket, 0x26ul, 0x5ul, 0x0ul, 0, 0, 0);
*(uint16_t*)0x2112 = (uint16_t)0x26;
memcpy((void*)0x2114,
"\x73\x6b\x63\x69\x70\x68\x65\x72\x00\x00\x00\x00\x00\x00", 14);
*(uint32_t*)0x2122 = (uint32_t)0x209;
*(uint32_t*)0x2126 = (uint32_t)0x4e;
memcpy((void*)0x212a,
"\x65\x63\x62\x28\x61\x72\x63\x34\x29\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
64);
r[7] = syscall(SYS_bind, r[1], 0x2112ul, 0x58ul, 0, 0, 0);
r[9] = syscall(SYS_setsockopt, r[1], 0x117ul, 0x1ul,
0x20003000ul, 0xd2ul, 0);
return 0;
}

root 28768  0.0  0.0   1144 4 pts/0D+   18:25   0:00  |
   \_ ./a.out

# cat /proc/28768/stack
[] __lock_sock+0xe6/0x160
[] lock_sock_nested+0xfb/0x120
[] alg_setsockopt+0x2a9/0x3d0
[] SyS_setsockopt+0x158/0x240
[] entry_SYSCALL_64_fastpath+0x16/0x7a
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: use-after-free in hash_sock_destruct

2015-12-29 Thread Dmitry Vyukov

On Tue, Dec 29, 2015 at 4:28 PM, Dmitry Vyukov <dvyu...@google.com> wrote:
> On Tue, Dec 29, 2015 at 3:40 PM, Herbert Xu <herb...@gondor.apana.org.au> 
> wrote:
>> On Thu, Dec 17, 2015 at 01:59:50PM +0100, Dmitry Vyukov wrote:
>>>
>>> The following program causes use-after-free in hash_sock_destruct:
>>
>> This patch should fix the problem.  AFAIK everything that you have
>> reported should now be fixed.  If you still have issues please
>> resubmit them (and please cc me).  Thanks!
>
> Thanks Herbert!
> I will do another round of crypto testing with this patch (on top of
> the other two patches) and report if I see any bugs.

Doh! Now +Herbert

>> ---8<---
>> Subject: crypto: af_alg - Disallow bind/setkey/... after accept(2)
>>
>> Each af_alg parent socket obtained by socket(2) corresponds to a
>> tfm object once bind(2) has succeeded.  An accept(2) call on that
>> parent socket creates a context which then uses the tfm object.
>>
>> Therefore as long as any child sockets created by accept(2) exist
>> the parent socket must not be modified or freed.
>>
>> This patch guarantees this by using locks and a reference count
>> on the parent socket.  Any attempt to modify the parent socket will
>> fail with EBUSY.
>>
>> Cc: sta...@vger.kernel.org
>> Reported-by: Dmitry Vyukov <dvyu...@google.com>
>> Signed-off-by: Herbert Xu <herb...@gondor.apana.org.au>
>>
>> diff --git a/crypto/af_alg.c b/crypto/af_alg.c
>> index a8e7aa3..f5a2426 100644
>> --- a/crypto/af_alg.c
>> +++ b/crypto/af_alg.c
>> @@ -125,6 +125,23 @@ int af_alg_release(struct socket *sock)
>>  }
>>  EXPORT_SYMBOL_GPL(af_alg_release);
>>
>> +void af_alg_release_parent(struct sock *sk)
>> +{
>> +   struct alg_sock *ask = alg_sk(sk);
>> +   bool last;
>> +
>> +   sk = ask->parent;
>> +   ask = alg_sk(sk);
>> +
>> +   lock_sock(sk);
>> +   last = !--ask->refcnt;
>> +   release_sock(sk);
>> +
>> +   if (last)
>> +   sock_put(sk);
>> +}
>> +EXPORT_SYMBOL_GPL(af_alg_release_parent);
>> +
>>  static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int 
>> addr_len)
>>  {
>> const u32 forbidden = CRYPTO_ALG_INTERNAL;
>> @@ -133,6 +150,7 @@ static int alg_bind(struct socket *sock, struct sockaddr 
>> *uaddr, int addr_len)
>> struct sockaddr_alg *sa = (void *)uaddr;
>> const struct af_alg_type *type;
>> void *private;
>> +   int err;
>>
>> if (sock->state == SS_CONNECTED)
>> return -EINVAL;
>> @@ -160,16 +178,22 @@ static int alg_bind(struct socket *sock, struct 
>> sockaddr *uaddr, int addr_len)
>> return PTR_ERR(private);
>> }
>>
>> +   err = -EBUSY;
>> lock_sock(sk);
>> +   if (ask->refcnt)
>> +   goto unlock;
>>
>> swap(ask->type, type);
>> swap(ask->private, private);
>>
>> +   err = 0;
>> +
>> +unlock:
>> release_sock(sk);
>>
>> alg_do_release(type, private);
>>
>> -   return 0;
>> +   return err;
>>  }
>>
>>  static int alg_setkey(struct sock *sk, char __user *ukey,
>> @@ -188,14 +212,41 @@ static int alg_setkey(struct sock *sk, char __user 
>> *ukey,
>> if (copy_from_user(key, ukey, keylen))
>> goto out;
>>
>> +   err = -EBUSY;
>> +   lock_sock(sk);
>> +   if (ask->refcnt)
>> +   goto unlock;
>> +
>> err = type->setkey(ask->private, key, keylen);
>>
>> +unlock:
>> +   release_sock(sk);
>> +
>>  out:
>> sock_kzfree_s(sk, key, keylen);
>>
>> return err;
>>  }
>>
>> +static int alg_setauthsize(struct sock *sk, unsigned int size)
>> +{
>> +   int err;
>> +   struct alg_sock *ask = alg_sk(sk);
>> +   const struct af_alg_type *type = ask->type;
>> +
>> +   err = -EBUSY;
>> +   lock_sock(sk);
>> +   if (ask->refcnt)
>> +   goto unlock;
>> +
>> +   err = type->setauthsize(ask->private, size);
>> +
>> +unlock:
>> +   release_sock(sk);
>> +
>> +   return err;
>> +}
>> +
>>  static int alg_setsockopt(struct socket *sock, int level, int optname,
>>   char __

crypto: use-after-free in alg_bind

2015-12-29 Thread Dmitry Vyukov

Hello,

On commit 8513342170278468bac126640a5d2d12ffbff106
+ crypto: algif_skcipher - Use new skcipher interface
+ crypto: algif_skcipher - Require setkey before accept(2)
+ crypto: af_alg - Disallow bind/setkey/... after accept(2)

The following program causes use-after-free in alg_bind and later
terminates kernel:

// autogenerated by syzkaller (http://github.com/google/syzkaller)
#include 
#include 
#include 
#include 
#include 

int fd;

void *thr(void *arg)
{
switch ((long)arg) {
case 0:
fd = syscall(SYS_socket, 0x26ul, 0x5ul, 0x0ul, 0, 0, 0);
case 1:
*(uint16_t*)0x2000 = (uint16_t)0x26;
memcpy((void*)0x2002,
"\x73\x6b\x63\x69\x70\x68\x65\x72\x00\x00\x00\x00\x00\x00", 14);
*(uint32_t*)0x2010 = (uint32_t)0x2a;
*(uint32_t*)0x2014 = (uint32_t)0x8;
memcpy((void*)0x2018,
"\x65\x63\x62\x28\x61\x65\x73\x29\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
64);
syscall(SYS_bind, fd, 0x2000ul, 0x58ul, 0, 0, 0);
break;
case 2:
syscall(SYS_accept4, fd, 0, 0, 0x8ul, 0, 0);
break;
}
return 0;
}

int main()
{
long i;
pthread_t th[6];

syscall(SYS_mmap, 0x2000ul, 0x1000ul, 0x3ul, 0x32ul,
0xul, 0x0ul);
for (i = 0; i < 6; i++)
pthread_create([i], 0, thr, (void*)(i%3));
usleep(1);
return 0;
}


==
BUG: KASAN: use-after-free in __lock_acquire+0x39db/0x3ca0 at addr
880033e94f60
Read of size 8 by task a.out/7532
=
BUG kmalloc-2048 (Not tainted): kasan: bad access detected
-

INFO: Allocated in sk_prot_alloc+0x1ed/0x340 age=1 cpu=1 pid=7532
[< inline >] kmalloc include/linux/slab.h:463
[<  none  >] sk_prot_alloc+0x1ed/0x340 net/core/sock.c:1354
[<  none  >] sk_alloc+0x3a/0x6b0 net/core/sock.c:1419
[<  none  >] alg_create+0x93/0x170 crypto/af_alg.c:370
[<  none  >] __sock_create+0x37c/0x640 net/socket.c:1162
[< inline >] sock_create net/socket.c:1202
[< inline >] SYSC_socket net/socket.c:1232
[<  none  >] SyS_socket+0xef/0x1b0 net/socket.c:1212

INFO: Freed in sk_destruct+0x3d7/0x490 age=1 cpu=0 pid=7531
[<  none  >] kfree+0x26a/0x290 mm/slub.c:3662
[< inline >] sk_prot_free net/core/sock.c:1391
[<  none  >] sk_destruct+0x3d7/0x490 net/core/sock.c:1467
[<  none  >] __sk_free+0x57/0x200 net/core/sock.c:1475
[<  none  >] sk_free+0x30/0x40 net/core/sock.c:1486
[< inline >] sock_put include/net/sock.h:1627
[<  none  >] af_alg_release+0x5b/0x70 crypto/af_alg.c:123
[<  none  >] sock_release+0x8d/0x1d0 net/socket.c:571
[<  none  >] sock_close+0x16/0x20 net/socket.c:1022
[<  none  >] __fput+0x233/0x780 fs/file_table.c:208
[<  none  >] fput+0x15/0x20 fs/file_table.c:244
[<  none  >] task_work_run+0x16b/0x200 kernel/task_work.c:115
[< inline >] tracehook_notify_resume include/linux/tracehook.h:191
[<  none  >] exit_to_usermode_loop+0x180/0x1a0
arch/x86/entry/common.c:251
[< inline >] prepare_exit_to_usermode arch/x86/entry/common.c:282
[<  none  >] syscall_return_slowpath+0x19f/0x210
arch/x86/entry/common.c:344
[<  none  >] int_ret_from_sys_call+0x25/0x9f
arch/x86/entry/entry_64.S:281

INFO: Slab 0xeacfa400 objects=13 used=8 fp=0x880033e94ec0
flags=0x1fffc004080
INFO: Object 0x880033e94ec0 @offset=20160 fp=0x880033e96c48
CPU: 1 PID: 7532 Comm: a.out Tainted: GB   4.4.0-rc7+ #181
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
  88006a49fa10 8289d9dd 88003e805200
 880033e94ec0 880033e9 88006a49fa40 816c8e24
 88003e805200 eacfa400 880033e94ec0 88b866e0

Call Trace:
 [] __asan_report_load8_noabort+0x3e/0x40
mm/kasan/report.c:295
 [] __lock_acquire+0x39db/0x3ca0 kernel/locking/lockdep.c:3092
 [] lock_acquire+0x19f/0x3c0 kernel/locking/lockdep.c:3585
 [< inline >] __raw_spin_lock_bh include/linux/spinlock_api_smp.h:137
 [] _raw_spin_lock_bh+0x3f/0x50 kernel/locking/spinlock.c:175
 [< inline >] spin_lock_bh include/linux/spinlock.h:307
 [] lock_sock_nested+0x48/0x120 net/core/sock.c:2434
 [< inline >] lock_sock include/net/sock.h:1481
 [] alg_bind+0x1aa/0x3f0 crypto/af_alg.c:182
 [] SYSC_bind+0x1ea/0x250 net/socket.c:1376
 [] SyS_bind+0x24/0x30 net/socket.c:1362

crypto: use-after-free in rng_recvmsg

2015-12-28 Thread Dmitry Vyukov

Hello,

On commit a88164345b81292b55a8d4829fdd35c8d611cd7d (Dec 23)
+ crypto: algif_skcipher - Use new skcipher interface
+ crypto: algif_skcipher - Require setkey before accept(2)

The following program triggers use-after-free in rng_recvmsg:

// autogenerated by syzkaller (http://github.com/google/syzkaller)
#include 
#include 
#include 
#include 
#include 

#ifndef SYS_memfd_create
#define SYS_memfd_create 319
#endif

long r[13];

void *thr(void *arg)
{
switch ((long)arg) {
case 0:
r[0] = syscall(SYS_mmap, 0x2000ul, 0x11000ul,
0x3ul, 0x32ul, 0xul, 0x0ul);
break;
case 1:
r[1] = syscall(SYS_socket, 0x26ul, 0x5ul, 0x0ul, 0, 0, 0);
break;
case 2:
*(uint16_t*)0x20003000 = (uint16_t)0x26;
memcpy((void*)0x20003002,
"\x72\x6e\x67\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", 14);
*(uint32_t*)0x20003010 = (uint32_t)0x8;
*(uint32_t*)0x20003014 = (uint32_t)0x800;
memcpy((void*)0x20003018,
"\x73\x74\x64\x72\x6e\x67\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
64);
r[7] = syscall(SYS_bind, r[1], 0x20003000ul, 0x58ul, 0, 0, 0);
break;
case 3:
memcpy((void*)0x20003ff0,
"\x26\x73\x79\x73\x74\x65\x6d\x73\x65\x63\x75\x72\x69\x74\x79\x00",
16);
r[9] = syscall(SYS_memfd_create, 0x20003ff0ul, 0x2ul,
0, 0, 0, 0);
break;
case 4:
r[10] = syscall(SYS_ioctl, r[9], 0x540bul, 0x4ul, 0, 0, 0);
break;
case 5:
r[11] = syscall(SYS_accept, r[1], 0x0ul, 0x2000affcul, 0, 0, 0);
break;
case 6:
r[12] = syscall(SYS_read, r[11], 0x200102a3ul, 0x54ul, 0, 0, 0);
break;
}
return 0;
}

int main()
{
long i;
pthread_t th[7];

memset(r, -1, sizeof(r));
for (i = 0; i < 7; i++) {
pthread_create([i], 0, thr, (void*)i);
usleep(1);
}
for (i = 0; i < 7; i++) {
pthread_create([i], 0, thr, (void*)i);
if (i%2==0)
usleep(1);
}
usleep(10);
return 0;
}


==
BUG: KASAN: use-after-free in rng_recvmsg+0x20f/0x230 at addr 880061c4d758
Read of size 8 by task a.out/8875
=
BUG kmalloc-512 (Not tainted): kasan: bad access detected
-

INFO: Allocated in crypto_create_tfm+0x7e/0x240 age=97 cpu=2 pid=8833
[< inline >] kmalloc include/linux/slab.h:463
[< inline >] kzalloc include/linux/slab.h:602
[<  none  >] crypto_create_tfm+0x7e/0x240 crypto/api.c:470
[<  none  >] crypto_alloc_tfm+0x13c/0x370 crypto/api.c:555
[<  none  >] crypto_alloc_rng+0x2c/0x40 crypto/rng.c:120
[<  none  >] rng_bind+0x25/0x30 crypto/algif_rng.c:119
[<  none  >] alg_bind+0x18e/0x3a0 crypto/af_alg.c:155
[<  none  >] SYSC_bind+0x1ea/0x250 net/socket.c:1376
[<  none  >] SyS_bind+0x24/0x30 net/socket.c:1362
[<  none  >] entry_SYSCALL_64_fastpath+0x16/0x7a
arch/x86/entry/entry_64.S:185

INFO: Freed in kzfree+0x28/0x30 age=46 cpu=1 pid=8859
[<  none  >] kfree+0x26a/0x290 mm/slub.c:3662
[<  none  >] kzfree+0x28/0x30 mm/slab_common.c:1270
[<  none  >] crypto_destroy_tfm+0xcb/0x190 crypto/api.c:596
[< inline >] crypto_free_rng include/crypto/rng.h:122
[<  none  >] rng_release+0x18/0x20 crypto/algif_rng.c:124
[< inline >] alg_do_release crypto/af_alg.c:116
[<  none  >] alg_bind+0x246/0x3a0 crypto/af_alg.c:170
[<  none  >] SYSC_bind+0x1ea/0x250 net/socket.c:1376
[<  none  >] SyS_bind+0x24/0x30 net/socket.c:1362
[<  none  >] entry_SYSCALL_64_fastpath+0x16/0x7a
arch/x86/entry/entry_64.S:185

INFO: Slab 0xea0001871300 objects=16 used=14 fp=0x880061c4e670
flags=0x5fffc004080
INFO: Object 0x880061c4d710 @offset=5904 fp=0x  (null)
CPU: 1 PID: 8875 Comm: a.out Tainted: GB   4.4.0-rc6+ #178
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
  880036edf9a0 8289db7d 88003e804f00
 880061c4d710 880061c4c000 880036edf9d0 816c9064
 88003e804f00 ea0001871300 880061c4d710 880036edfaf0

Call Trace:
 [] __asan_report_load8_noabort+0x3e/0x40
mm/kasan/report.c:295
 [< inline >] crypto_rng_get_bytes include/crypto/rng.h:112
 [] rng_recvmsg+0x20f/0x230

Re: [PATCH v2] crypto: algif_skcipher - Require setkey before accept(2)

2015-12-28 Thread Dmitry Vyukov

On Fri, Dec 25, 2015 at 8:40 AM, Herbert Xu <herb...@gondor.apana.org.au> wrote:
> Dmitry Vyukov <dvyu...@google.com> wrote:
>>
>> I am testing with your two patches:
>> crypto: algif_skcipher - Use new skcipher interface
>> crypto: algif_skcipher - Require setkey before accept(2)
>> on top of a88164345b81292b55a8d4829fdd35c8d611cd7d (Dec 23).
>
> You sent the email to everyone on the original CC list except me.
> Please don't do that.

Hi Herbert,

My email client just followed instructions in your email. You've said
to Reply-to: syzkal...@googlegroups.com.

This version of the patch does fix the crash.

Tested-by: Dmitry Vyukov <dvyu...@google.com>

However, crypto is still considerably unstable. I will post reports
that I see separately.


>> Now the following program causes a bunch of use-after-frees and them
>> kills kernel:
>
> Yes there is an obvious bug in the patch that Julia Lawall has
> responded to in another thread.  Here is a fixed version.
>
> ---8<--
> Some cipher implementations will crash if you try to use them
> without calling setkey first.  This patch adds a check so that
> the accept(2) call will fail with -ENOKEY if setkey hasn't been
> done on the socket yet.
>
> Cc: sta...@vger.kernel.org
> Reported-by: Dmitry Vyukov <dvyu...@google.com>
> Signed-off-by: Herbert Xu <herb...@gondor.apana.org.au>
>
> diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
> index 5c756b3..f4431bc 100644
> --- a/crypto/algif_skcipher.c
> +++ b/crypto/algif_skcipher.c
> @@ -31,6 +31,11 @@ struct skcipher_sg_list {
> struct scatterlist sg[0];
>  };
>
> +struct skcipher_tfm {
> +   struct crypto_skcipher *skcipher;
> +   bool has_key;
> +};
> +
>  struct skcipher_ctx {
> struct list_head tsgl;
> struct af_alg_sgl rsgl;
> @@ -750,17 +755,41 @@ static struct proto_ops algif_skcipher_ops = {
>
>  static void *skcipher_bind(const char *name, u32 type, u32 mask)
>  {
> -   return crypto_alloc_skcipher(name, type, mask);
> +   struct skcipher_tfm *tfm;
> +   struct crypto_skcipher *skcipher;
> +
> +   tfm = kzalloc(sizeof(*tfm), GFP_KERNEL);
> +   if (!tfm)
> +   return ERR_PTR(-ENOMEM);
> +
> +   skcipher = crypto_alloc_skcipher(name, type, mask);
> +   if (IS_ERR(skcipher)) {
> +   kfree(tfm);
> +   return ERR_CAST(skcipher);
> +   }
> +
> +   tfm->skcipher = skcipher;
> +
> +   return tfm;
>  }
>
>  static void skcipher_release(void *private)
>  {
> -   crypto_free_skcipher(private);
> +   struct skcipher_tfm *tfm = private;
> +
> +   crypto_free_skcipher(tfm->skcipher);
> +   kfree(tfm);
>  }
>
>  static int skcipher_setkey(void *private, const u8 *key, unsigned int keylen)
>  {
> -   return crypto_skcipher_setkey(private, key, keylen);
> +   struct skcipher_tfm *tfm = private;
> +   int err;
> +
> +   err = crypto_skcipher_setkey(tfm->skcipher, key, keylen);
> +   tfm->has_key = !err;
> +
> +   return err;
>  }
>
>  static void skcipher_wait(struct sock *sk)
> @@ -792,20 +821,25 @@ static int skcipher_accept_parent(void *private, struct 
> sock *sk)
>  {
> struct skcipher_ctx *ctx;
> struct alg_sock *ask = alg_sk(sk);
> -   unsigned int len = sizeof(*ctx) + crypto_skcipher_reqsize(private);
> +   struct skcipher_tfm *tfm = private;
> +   struct crypto_skcipher *skcipher = tfm->skcipher;
> +   unsigned int len = sizeof(*ctx) + crypto_skcipher_reqsize(skcipher);
> +
> +   if (!tfm->has_key)
> +   return -ENOKEY;
>
> ctx = sock_kmalloc(sk, len, GFP_KERNEL);
> if (!ctx)
> return -ENOMEM;
>
> -   ctx->iv = sock_kmalloc(sk, crypto_skcipher_ivsize(private),
> +   ctx->iv = sock_kmalloc(sk, crypto_skcipher_ivsize(skcipher),
>GFP_KERNEL);
> if (!ctx->iv) {
> sock_kfree_s(sk, ctx, len);
> return -ENOMEM;
> }
>
> -   memset(ctx->iv, 0, crypto_skcipher_ivsize(private));
> +   memset(ctx->iv, 0, crypto_skcipher_ivsize(skcipher));
>
> INIT_LIST_HEAD(>tsgl);
> ctx->len = len;
> @@ -818,7 +852,7 @@ static int skcipher_accept_parent(void *private, struct 
> sock *sk)
>
> ask->private = ctx;
>
> -   skcipher_request_set_tfm(>req, private);
> +   skcipher_request_set_tfm(>req, skcipher);
> skcipher_request_set_callback(>req, CRYPTO_TFM_REQ_MAY_BACKLOG,
>

Re: GPF in lrw_crypt

2015-12-24 Thread Dmitry Vyukov

On Thu, Dec 24, 2015 at 10:39 AM, Herbert Xu
<herb...@gondor.apana.org.au> wrote:
> On Thu, Dec 17, 2015 at 01:59:11PM +0100, Dmitry Vyukov wrote:
>>
>> The following program causes GPF in lrw_crypt:
>
> OK, this is a result of certain implementations (such as lrw)
> not coping with there being no key gracefully.
>
> I think the easiest way is probably to check this in algif_skcipher.
>
> ---8<---
> Subject: crypto: algif_skcipher - Require setkey before accept(2)
>
> Some cipher implementations will crash if you try to use them
> without calling setkey first.  This patch adds a check so that
> the accept(2) call will fail with -ENOKEY if setkey hasn't been
> done on the socket yet.
>
> Cc: sta...@vger.kernel.org
> Reported-by: Dmitry Vyukov <dvyu...@google.com>
> Signed-off-by: Herbert Xu <herb...@gondor.apana.org.au>
>
> diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
> index 5c756b3..b62c973 100644
> --- a/crypto/algif_skcipher.c
> +++ b/crypto/algif_skcipher.c
> @@ -31,6 +31,11 @@ struct skcipher_sg_list {
> struct scatterlist sg[0];
>  };
>
> +struct skcipher_tfm {
> +   struct crypto_skcipher *skcipher;
> +   bool has_key;
> +};
> +
>  struct skcipher_ctx {
> struct list_head tsgl;
> struct af_alg_sgl rsgl;
> @@ -750,17 +755,38 @@ static struct proto_ops algif_skcipher_ops = {
>
>  static void *skcipher_bind(const char *name, u32 type, u32 mask)
>  {
> -   return crypto_alloc_skcipher(name, type, mask);
> +   struct skcipher_tfm *tfm;
> +
> +   tfm = kzalloc(sizeof(*tfm), GFP_KERNEL);
> +   if (!tfm)
> +   return ERR_PTR(-ENOMEM);
> +
> +   tfm->skcipher = crypto_alloc_skcipher(name, type, mask);
> +   if (IS_ERR(tfm->skcipher)) {
> +   kfree(tfm);
> +   return ERR_CAST(tfm->skcipher);
> +   }
> +
> +   return tfm;
>  }
>
>  static void skcipher_release(void *private)
>  {
> -   crypto_free_skcipher(private);
> +   struct skcipher_tfm *tfm = private;
> +
> +   crypto_free_skcipher(tfm->skcipher);
> +   kfree(tfm);
>  }
>
>  static int skcipher_setkey(void *private, const u8 *key, unsigned int keylen)
>  {
> -   return crypto_skcipher_setkey(private, key, keylen);
> +   struct skcipher_tfm *tfm = private;
> +   int err;
> +
> +   err = crypto_skcipher_setkey(tfm->skcipher, key, keylen);
> +   tfm->has_key = !err;
> +
> +   return err;
>  }
>
>  static void skcipher_wait(struct sock *sk)
> @@ -792,20 +818,25 @@ static int skcipher_accept_parent(void *private, struct 
> sock *sk)
>  {
> struct skcipher_ctx *ctx;
> struct alg_sock *ask = alg_sk(sk);
> -   unsigned int len = sizeof(*ctx) + crypto_skcipher_reqsize(private);
> +   struct skcipher_tfm *tfm = private;
> +   struct crypto_skcipher *skcipher = tfm->skcipher;
> +   unsigned int len = sizeof(*ctx) + crypto_skcipher_reqsize(skcipher);
> +
> +   if (!tfm->has_key)
> +   return -ENOKEY;
>
> ctx = sock_kmalloc(sk, len, GFP_KERNEL);
> if (!ctx)
> return -ENOMEM;
>
> -   ctx->iv = sock_kmalloc(sk, crypto_skcipher_ivsize(private),
> +   ctx->iv = sock_kmalloc(sk, crypto_skcipher_ivsize(skcipher),
>GFP_KERNEL);
> if (!ctx->iv) {
> sock_kfree_s(sk, ctx, len);
> return -ENOMEM;
> }
>
> -   memset(ctx->iv, 0, crypto_skcipher_ivsize(private));
> +   memset(ctx->iv, 0, crypto_skcipher_ivsize(skcipher));
>
> INIT_LIST_HEAD(>tsgl);
> ctx->len = len;
> @@ -818,7 +849,7 @@ static int skcipher_accept_parent(void *private, struct 
> sock *sk)
>
> ask->private = ctx;
>
> -   skcipher_request_set_tfm(>req, private);
> +   skcipher_request_set_tfm(>req, skcipher);
> skcipher_request_set_callback(>req, CRYPTO_TFM_REQ_MAY_BACKLOG,
>   af_alg_complete, >completion);
>


Hi Herbert,

I am testing with your two patches:
crypto: algif_skcipher - Use new skcipher interface
crypto: algif_skcipher - Require setkey before accept(2)
on top of a88164345b81292b55a8d4829fdd35c8d611cd7d (Dec 23).

Now the following program causes a bunch of use-after-frees and them
kills kernel:

// autogenerated by syzkaller (http://github.com/google/syzkaller)
#include 
#include 
#include 
#include 
#include 

long r[10];

int main()
{
memset(r, -1, sizeof(r));
r[0] = syscall(SYS_mmap, 0x2000ul, 0xca7000ul, 0x3ul,
0x32ul, 0xu

Re: crypto: use-after-free in alg_bind

2015-12-30 Thread Dmitry Vyukov

On Wed, Dec 30, 2015 at 11:53 AM, Herbert Xu
<herb...@gondor.apana.org.au> wrote:
> On Wed, Dec 30, 2015 at 11:19:45AM +0100, Dmitry Vyukov wrote:
>>
>> This use-after-free does not reproduce on every run. It seems to be
>> triggered by some race. Try to run the program in a parallel loop.
>> I use stress tool for this:
>> https://github.com/golang/tools/blob/master/cmd/stress/stress.go
>> If you have Go toolchain installed, then then following will do:
>> $ go get golang.org/x/tools/cmd/stress
>> $ stress -p 16 ./a.out
>
> I've tried a few thousand instances of it but still no luck.
>>
>>
>> diff --git a/crypto/af_alg.c b/crypto/af_alg.c
>> index a8e7aa3..82a7dcd 100644
>
> There are a few missing hunks in your patch and the patch to
> if_alg.h is missing.
>
> So please start with the current crypto tree and then apply the
> latest version (v2) of "crypto: af_alg - Disallow bind/setkey/...
> after accept(2)" and try again.


I forgot to diff include/crypto/if_alg.h, but the changes are there
(otherwise all references to refcnt would not compile). Also I moved
ask->refcnt checks to alg_setsockopt to fix the deadlock, I believe
that's the missing chunks you refer to. I can retest if you wish, but
I don't think that my changes can affect the reported use-after-free.
Do you?


diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h
index 018afb2..589716f 100644
--- a/include/crypto/if_alg.h
+++ b/include/crypto/if_alg.h
@@ -30,6 +30,8 @@ struct alg_sock {

struct sock *parent;

+   unsigned int refcnt;
+
const struct af_alg_type *type;
void *private;
 };
@@ -67,6 +69,7 @@ int af_alg_register_type(const struct af_alg_type *type);
 int af_alg_unregister_type(const struct af_alg_type *type);

 int af_alg_release(struct socket *sock);
+void af_alg_release_parent(struct sock *sk);
 int af_alg_accept(struct sock *sk, struct socket *newsock);

 int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len);
@@ -83,11 +86,6 @@ static inline struct alg_sock *alg_sk(struct sock *sk)
return (struct alg_sock *)sk;
 }

-static inline void af_alg_release_parent(struct sock *sk)
-{
-   sock_put(alg_sk(sk)->parent);
-}
-
 static inline void af_alg_init_completion(struct af_alg_completion *completion)
 {
init_completion(>completion);
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: crypto: use-after-free in alg_bind

2015-12-30 Thread Dmitry Vyukov

On Wed, Dec 30, 2015 at 2:24 AM, Herbert Xu <herb...@gondor.apana.org.au> wrote:
> On Tue, Dec 29, 2015 at 09:19:22PM +0100, Dmitry Vyukov wrote:
>> Hello,
>>
>> On commit 8513342170278468bac126640a5d2d12ffbff106
>> + crypto: algif_skcipher - Use new skcipher interface
>> + crypto: algif_skcipher - Require setkey before accept(2)
>> + crypto: af_alg - Disallow bind/setkey/... after accept(2)
>>
>> The following program causes use-after-free in alg_bind and later
>> terminates kernel:
>
> Please double-check that you have the last patch applied correctly,
> as I cannot reproduce the crash with your program.

I am pretty sure I have the last patch applied. The use-after-frees
that it was supposed to fix have gone. Below are combined changed to
crypto/ files that I have on top of
8513342170278468bac126640a5d2d12ffbff106.

This use-after-free does not reproduce on every run. It seems to be
triggered by some race. Try to run the program in a parallel loop.
I use stress tool for this:
https://github.com/golang/tools/blob/master/cmd/stress/stress.go
If you have Go toolchain installed, then then following will do:
$ go get golang.org/x/tools/cmd/stress
$ stress -p 16 ./a.out


diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index a8e7aa3..82a7dcd 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -125,6 +125,23 @@ int af_alg_release(struct socket *sock)
 }
 EXPORT_SYMBOL_GPL(af_alg_release);

+void af_alg_release_parent(struct sock *sk)
+{
+struct alg_sock *ask = alg_sk(sk);
+bool last;
+
+sk = ask->parent;
+ask = alg_sk(sk);
+
+lock_sock(sk);
+last = !--ask->refcnt;
+release_sock(sk);
+
+if (last)
+sock_put(sk);
+}
+EXPORT_SYMBOL_GPL(af_alg_release_parent);
+
 static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 {
 const u32 forbidden = CRYPTO_ALG_INTERNAL;
@@ -133,6 +150,7 @@ static int alg_bind(struct socket *sock, struct
sockaddr *uaddr, int addr_len)
 struct sockaddr_alg *sa = (void *)uaddr;
 const struct af_alg_type *type;
 void *private;
+int err;

 if (sock->state == SS_CONNECTED)
 return -EINVAL;
@@ -160,16 +178,22 @@ static int alg_bind(struct socket *sock, struct
sockaddr *uaddr, int addr_len)
 return PTR_ERR(private);
 }

+err = -EBUSY;
 lock_sock(sk);
+if (ask->refcnt)
+goto unlock;

 swap(ask->type, type);
 swap(ask->private, private);

+err = 0;
+
+unlock:
 release_sock(sk);

 alg_do_release(type, private);

-return 0;
+return err;
 }

 static int alg_setkey(struct sock *sk, char __user *ukey,
@@ -196,6 +220,16 @@ out:
 return err;
 }

+static int alg_setauthsize(struct sock *sk, unsigned int size)
+{
+int err;
+struct alg_sock *ask = alg_sk(sk);
+const struct af_alg_type *type = ask->type;
+
+err = type->setauthsize(ask->private, size);
+return err;
+}
+
 static int alg_setsockopt(struct socket *sock, int level, int optname,
   char __user *optval, unsigned int optlen)
 {
@@ -210,6 +244,11 @@ static int alg_setsockopt(struct socket *sock,
int level, int optname,
 if (level != SOL_ALG || !type)
 goto unlock;

+if (ask->refcnt) {
+err = -EBUSY;
+goto unlock;
+}
+
 switch (optname) {
 case ALG_SET_KEY:
 if (sock->state == SS_CONNECTED)
@@ -224,7 +263,7 @@ static int alg_setsockopt(struct socket *sock, int
level, int optname,
 goto unlock;
 if (!type->setauthsize)
 goto unlock;
-err = type->setauthsize(ask->private, optlen);
+err = alg_setauthsize(sk, optlen);
 }

 unlock:
@@ -264,7 +303,8 @@ int af_alg_accept(struct sock *sk, struct socket *newsock)

 sk2->sk_family = PF_ALG;

-sock_hold(sk);
+if (!ask->refcnt++)
+sock_hold(sk);
 alg_sk(sk2)->parent = sk;
 alg_sk(sk2)->type = type;

diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
index 634b4d1..df483f9 100644
--- a/crypto/algif_skcipher.c
+++ b/crypto/algif_skcipher.c
@@ -31,6 +31,11 @@ struct skcipher_sg_list {
 struct scatterlist sg[0];
 };

+struct skcipher_tfm {
+struct crypto_skcipher *skcipher;
+bool has_key;
+};
+
 struct skcipher_ctx {
 struct list_head tsgl;
 struct af_alg_sgl rsgl;
@@ -750,17 +755,41 @@ static struct proto_ops algif_skcipher_ops = {

 static void *skcipher_bind(const char *name, u32 type, u32 mask)
 {
-return crypto_alloc_skcipher(name, type, mask);
+struct skcipher_tfm *tfm;
+struct crypto_skci

Re: crypto: use-after-free in alg_bind

2015-12-30 Thread Dmitry Vyukov

On Wed, Dec 30, 2015 at 1:24 PM, Herbert Xu <herb...@gondor.apana.org.au> wrote:
> On Wed, Dec 30, 2015 at 11:58:58AM +0100, Dmitry Vyukov wrote:
>>
>> I forgot to diff include/crypto/if_alg.h, but the changes are there
>> (otherwise all references to refcnt would not compile). Also I moved
>> ask->refcnt checks to alg_setsockopt to fix the deadlock, I believe
>> that's the missing chunks you refer to. I can retest if you wish, but
>> I don't think that my changes can affect the reported use-after-free.
>> Do you?
>
> OK I see the problem now.  When accept fails we free the socket
> twice.

Great!

This seems to be a zero-day. Should we CC sta...@vger.kernel.org ?

Code in 03c8efc1ffeb6b82a22c1af8dd908af349563314 (Oct 19, 2010) contained:

+   sock_init_data(newsock, sk2);
+
+   err = type->accept(ask->private, sk2);
+   if (err) {
+   sk_free(sk2);
+   goto unlock;
+   }

There were no sock_graft call, but sock_init_data also sets
newsock->sk = sk2, which seems to be enough for the double-free.




> ---8<---
> Subject: crypto: af_alg - Fix socket double-free when accept fails
>
> When we fail an accept(2) call we will end up freeing the socket
> twice, once due to the direct sk_free call and once again through
> newsock.
>
> This patch fixes this by removing the sk_free call.
>
> Cc: sta...@vger.kernel.org
> Reported-by: Dmitry Vyukov <dvyu...@google.com>
> Signed-off-by: Herbert Xu <herb...@gondor.apana.org.au>
>
> diff --git a/crypto/af_alg.c b/crypto/af_alg.c
> index 7b5b592..eaf98e2 100644
> --- a/crypto/af_alg.c
> +++ b/crypto/af_alg.c
> @@ -285,10 +285,8 @@ int af_alg_accept(struct sock *sk, struct socket 
> *newsock)
> security_sk_clone(sk, sk2);
>
> err = type->accept(ask->private, sk2);
> -   if (err) {
> -   sk_free(sk2);
> +   if (err)
> goto unlock;
> -   }
>
> sk2->sk_family = PF_ALG;
>
> --
> Email: Herbert Xu <herb...@gondor.apana.org.au>
> Home Page: http://gondor.apana.org.au/~herbert/
> PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: crypto: NULL deref in sha512_mb_mgr_get_comp_job_avx2

2017-02-02 Thread Dmitry Vyukov

On Wed, Feb 1, 2017 at 7:45 PM, Tim Chen <tim.c.c...@linux.intel.com> wrote:
> On Tue, Jan 31, 2017 at 02:16:31PM +0100, Dmitry Vyukov wrote:
>> Hello,
>>
>> I am getting the following reports with low frequency while running
>> syzkaller fuzzer. Unfortunately they are not reproducible and happen
>> in a background thread, so it is difficult to extract any context on
>> my side. I see only few such crashes per week, so most likely it is
>> some hard to trigger data race. The following reports are from mmotm
>> tree, commits 00e20cfc2bf04a0cbe1f5405f61c8426f43eee84 and
>> fff7e71eac7788904753136f09bcad7471f7799e. Any ideas as to how this can
>> happen?
>>
>> BUG: unable to handle kernel NULL pointer dereference at 0060
>> IP: [] sha512_mb_mgr_get_comp_job_avx2+0x6e/0xee
>> arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S:251
>> PGD 1d2395067 [  220.874864] PUD 1d2860067
>> Oops: 0002 [#1] SMP KASAN
>> Dumping ftrace buffer:
>>(ftrace buffer empty)
>> Modules linked in:
>> CPU: 0 PID: 516 Comm: kworker/0:1 Not tainted 4.9.0 #4
>> Hardware name: Google Google Compute Engine/Google Compute Engine,
>> BIOS Google 01/01/2011
>> Workqueue: crypto mcryptd_queue_worker
>> task: 8801d9f346c0 task.stack: 8801d9f08000
>> RIP: 0010:[]  []
>> sha512_mb_mgr_get_comp_job_avx2+0x6e/0xee
>> arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S:251
>> RSP: 0018:8801d9f0eef8  EFLAGS: 00010202
>> RAX:  RBX: 8801d7db1190 RCX: 0006
>> RDX: 0001 RSI: 8801d9f34ee8 RDI: 8801d7db1040
>> RBP: 8801d9f0f258 R08: 0001 R09: 0001
>> R10: 0002 R11: 0003 R12: 8801d9f0f230
>> R13: 8801c8bbc4e0 R14: 8801c8bbc530 R15: 8801d9f0ef70
>> FS:  () GS:8801dc00() knlGS:
>> CS:  0010 DS:  ES:  CR0: 80050033
>> CR2: 0060 CR3: 0001cc15a000 CR4: 001406f0
>> DR0:  DR1:  DR2: 
>> DR3:  DR6: fffe0ff0 DR7: 0400
>> Stack:
>>  8801d7db1040 813fa207 dc00 e8c0f238
>>  0002 11003b3e1dea e8c0f218 8801d9f0f190
>>  0282 e8c0f140 e8c0f220 41b58ab3
>> Call Trace:
>>  [] sha512_mb_update+0x2f7/0x4e0
>> arch/x86/crypto/sha512-mb/sha512_mb.c:588
>>  [] crypto_ahash_update include/crypto/hash.h:512 [inline]
>>  [] ahash_mcryptd_update crypto/mcryptd.c:627 [inline]
>>  [] mcryptd_hash_update+0xcd/0x1c0 crypto/mcryptd.c:373
>>  [] mcryptd_queue_worker+0xff/0x6a0 crypto/mcryptd.c:181
>>  [] process_one_work+0xbd0/0x1c10 kernel/workqueue.c:2096
>>  [] worker_thread+0x223/0x1990 kernel/workqueue.c:2230
>>  [] kthread+0x323/0x3e0 kernel/kthread.c:209
>>  [] ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:433
>> Code: 49 0f 42 d3 48 f7 c2 f0 ff ff ff 0f 85 9a 00 00 00 48 83 e2 0f
>> 48 6b da 08 48 8d 9c 1f 48 01 00 00 48 8b 03 48 c7 03 00 00 00 00 
>> 40 60 02 00 00 00 48 8b 9f 40 01 00 00 48 c1 e3 08 48 09 d3
>> RIP  [] sha512_mb_mgr_get_comp_job_avx2+0x6e/0xee
>> arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S:251
>>  RSP 
>> CR2: 0060
>> ---[ end trace 139fd4cda5dfe2c4 ]---
>>
>
> Dmitry,
>
> One theory that Mehga and I have is that perhaps the flusher
> and regular computaion updates are stepping on each other.
> Can you try this patch and see if it helps?


No, for this one I can't. Sorry.
It happens with very low frequency and only one fuzzer that tests
mmotm tree. If/when this is committed, I can keep an eye on these
reports and notify if I still see them.
If you have a hypothesis as to how it happens, perhaps you could write
a test that provokes the crash and maybe add some sleeps to kernel
code or alter timeouts to increase probability.



> --->8---
>
> From: Tim Chen <tim.c.c...@linux.intel.com>
> Subject: [PATCH] crypto/sha512-mb: Protect sha512 mb ctx mgr access
> To: Herbert Xu <herb...@gondor.apana.org.au>, Dmitry Vyukov 
> <dvyu...@google.com>
> Cc: Tim Chen <tim.c.c...@linux.intel.com>, David Miller 
> <da...@davemloft.net>, linux-crypto@vger.kernel.org, LKML 
> <linux-ker...@vger.kernel.org>, megha@linux.intel.com, 
> fenghua...@intel.com
>
> The flusher and regular multi-buffer computation via mcryptd may race with 
> another.
> Add here a lock and turn off interrupt to to access multi-buffer
> computation state cstate->mgr before a round of

crypto: NULL deref in sha512_mb_mgr_get_comp_job_avx2

2017-01-31 Thread Dmitry Vyukov

Hello,

I am getting the following reports with low frequency while running
syzkaller fuzzer. Unfortunately they are not reproducible and happen
in a background thread, so it is difficult to extract any context on
my side. I see only few such crashes per week, so most likely it is
some hard to trigger data race. The following reports are from mmotm
tree, commits 00e20cfc2bf04a0cbe1f5405f61c8426f43eee84 and
fff7e71eac7788904753136f09bcad7471f7799e. Any ideas as to how this can
happen?

BUG: unable to handle kernel NULL pointer dereference at 0060
IP: [] sha512_mb_mgr_get_comp_job_avx2+0x6e/0xee
arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S:251
PGD 1d2395067 [  220.874864] PUD 1d2860067
Oops: 0002 [#1] SMP KASAN
Dumping ftrace buffer:
   (ftrace buffer empty)
Modules linked in:
CPU: 0 PID: 516 Comm: kworker/0:1 Not tainted 4.9.0 #4
Hardware name: Google Google Compute Engine/Google Compute Engine,
BIOS Google 01/01/2011
Workqueue: crypto mcryptd_queue_worker
task: 8801d9f346c0 task.stack: 8801d9f08000
RIP: 0010:[]  []
sha512_mb_mgr_get_comp_job_avx2+0x6e/0xee
arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S:251
RSP: 0018:8801d9f0eef8  EFLAGS: 00010202
RAX:  RBX: 8801d7db1190 RCX: 0006
RDX: 0001 RSI: 8801d9f34ee8 RDI: 8801d7db1040
RBP: 8801d9f0f258 R08: 0001 R09: 0001
R10: 0002 R11: 0003 R12: 8801d9f0f230
R13: 8801c8bbc4e0 R14: 8801c8bbc530 R15: 8801d9f0ef70
FS:  () GS:8801dc00() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 0060 CR3: 0001cc15a000 CR4: 001406f0
DR0:  DR1:  DR2: 
DR3:  DR6: fffe0ff0 DR7: 0400
Stack:
 8801d7db1040 813fa207 dc00 e8c0f238
 0002 11003b3e1dea e8c0f218 8801d9f0f190
 0282 e8c0f140 e8c0f220 41b58ab3
Call Trace:
 [] sha512_mb_update+0x2f7/0x4e0
arch/x86/crypto/sha512-mb/sha512_mb.c:588
 [] crypto_ahash_update include/crypto/hash.h:512 [inline]
 [] ahash_mcryptd_update crypto/mcryptd.c:627 [inline]
 [] mcryptd_hash_update+0xcd/0x1c0 crypto/mcryptd.c:373
 [] mcryptd_queue_worker+0xff/0x6a0 crypto/mcryptd.c:181
 [] process_one_work+0xbd0/0x1c10 kernel/workqueue.c:2096
 [] worker_thread+0x223/0x1990 kernel/workqueue.c:2230
 [] kthread+0x323/0x3e0 kernel/kthread.c:209
 [] ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:433
Code: 49 0f 42 d3 48 f7 c2 f0 ff ff ff 0f 85 9a 00 00 00 48 83 e2 0f
48 6b da 08 48 8d 9c 1f 48 01 00 00 48 8b 03 48 c7 03 00 00 00 00 
40 60 02 00 00 00 48 8b 9f 40 01 00 00 48 c1 e3 08 48 09 d3
RIP  [] sha512_mb_mgr_get_comp_job_avx2+0x6e/0xee
arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S:251
 RSP 
CR2: 0060
---[ end trace 139fd4cda5dfe2c4 ]---

BUG: unable to handle kernel NULL pointer dereference at 0060
IP: [] sha512_mb_mgr_get_comp_job_avx2+0x6e/0xee
arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S:251
PGD 1c68ad067 [  624.973638] PUD 1d485a067
Oops: 0002 [#1] SMP KASAN
Dumping ftrace buffer:
   (ftrace buffer empty)
Modules linked in:
CPU: 0 PID: 517 Comm: kworker/0:1 Not tainted 4.9.0 #3
Hardware name: Google Google Compute Engine/Google Compute Engine,
BIOS Google 01/01/2011
Workqueue: crypto mcryptd_queue_worker
task: 8801d9e64700 task.stack: 8801d9838000
RIP: 0010:[]  []
sha512_mb_mgr_get_comp_job_avx2+0x6e/0xee
arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S:251
RSP: 0018:8801d983eef8  EFLAGS: 00010202
RAX:  RBX: 8801d7d96950 RCX: 0006
RDX: 0001 RSI: 8801d9e64f28 RDI: 8801d7d96800
RBP: 8801d983f258 R08: 0001 R09: 0001
R10: 0002 R11: 0003 R12: 8801d983f230
R13: 8801b67f5720 R14: 8801b67f5770 R15: 8801d983ef70
FS:  () GS:8801dc00() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 0060 CR3: 0001cee58000 CR4: 001406f0
Stack:
 8801d7d96800 813fa207 dc00 e8c0f238
 0002 11003b307dea e8c0f218 8801d983f190
 0282 e8c0f140 e8c0f220 41b58ab3
Call Trace:
 [] sha512_mb_update+0x2f7/0x4e0
arch/x86/crypto/sha512-mb/sha512_mb.c:588
 [] crypto_ahash_update include/crypto/hash.h:512 [inline]
 [] ahash_mcryptd_update crypto/mcryptd.c:627 [inline]
 [] mcryptd_hash_update+0xcd/0x1c0 crypto/mcryptd.c:373
 [] mcryptd_queue_worker+0xff/0x6a0 crypto/mcryptd.c:181
 [] process_one_work+0xbd0/0x1c10 kernel/workqueue.c:2096
 [] worker_thread+0x223/0x1990 kernel/workqueue.c:2230
 [] kthread+0x323/0x3e0 kernel/kthread.c:209
 [] ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:433
Code: 49 0f 42 d3 48 f7 c2 f0 ff ff ff 0f 85 9a

Re: [PATCH 6/7] md/raid10, LLVM: get rid of variable length array

2017-03-24 Thread Dmitry Vyukov

On Fri, Mar 17, 2017 at 9:04 PM,   wrote:
> On March 17, 2017 12:27:46 PM PDT, Peter Zijlstra  
> wrote:
>>On Fri, Mar 17, 2017 at 11:52:01AM -0700, Michael Davidson wrote:
>>> On Fri, Mar 17, 2017 at 5:44 AM, Peter Zijlstra
>> wrote:
>>> >
>>> > Be that as it may; what you construct above is disgusting. Surely
>>the
>>> > code can be refactored to not look like dog vomit?
>>> >
>>> > Also; its not immediately obvious conf->copies is 'small' and this
>>> > doesn't blow up the stack; I feel that deserves a comment
>>somewhere.
>>> >
>>>
>>> I agree that the code is horrible.
>>>
>>> It is, in fact, exactly the same solution that was used to remove
>>> variable length arrays in structs from several of the crypto drivers
>>a
>>> few years ago - see the definition of SHASH_DESC_ON_STACK() in
>>> "crypto/hash.h" - I did not, however, hide the horrors in a macro
>>> preferring to leave the implementation visible as a warning to
>>whoever
>>> might touch the code next.
>>>
>>> I believe that the actual stack usage is exactly the same as it was
>>previously.
>>>
>>> I can certainly wrap this  up in a macro and add comments with
>>> appropriately dire warnings in it if you feel that is both necessary
>>> and sufficient.
>>
>>We got away with ugly in the past, so we should get to do it again?
>
> Seriously, you should have taken the hack the first time that this needs to 
> be fixed.  Just because this is a fairly uncommon construct in the kernel 
> doesn't mean it is not in userspace.


There is a reason why it is fairly uncommon in kernel.
Initially it was used more widely, but then there was a decision to
drop all uses of this feature. Namely:
Linus: "We should definitely drop it. The feature is an abomination".
https://lkml.org/lkml/2013/9/23/500
I really don't understand why you cling onto this last use of the
feature. Having a single use of a compiler extension on an error path
of a non-mandatory driver does not look like a great idea to me. Let's
just kill it off outside of clang discussion.

Re: [PATCH 6/7] md/raid10, LLVM: get rid of variable length array

2017-03-24 Thread Dmitry Vyukov

On Fri, Mar 24, 2017 at 3:10 PM, Peter Zijlstra <pet...@infradead.org> wrote:
> On Fri, Mar 24, 2017 at 02:50:24PM +0100, Dmitry Vyukov wrote:
>> OK, I guess should not have referenced the llvm-linux page.
>> So here are reasons on our side that I am ready to vouch:
>>
>>  - clang make it possible to implement KMSAN (dynamic detection of
>> uses of uninit memory)
>
> How does GCC make this impossible?

Too complex and too difficult to implement correctly on all corner
cases. All other sanitizers were ported to gcc very quickly, but msan
wasn't. Nobody is brave enough to even approach it.

>>  - better code coverage for fuzzing
>
> How so? Why can't the same be achieved using GCC?

Same reason.

>>  - why simpler and faster development (e.g. we can port our user-space
>> hardening technologies -- CFI and SafeStack)
>
> That's just because you've already implemented this in clang, right? So
> less work for you. Not because its impossible.

I am not saying that it's impossible. It would just require
unreasonable amount of time, and then perpetual maintenance to fix
corner cases and regressions.

For background: I implemented the current fuzzing coverage (KCOV) in
gcc, and user-space tsan instrumentation in gcc.

Re: [PATCH 6/7] md/raid10, LLVM: get rid of variable length array

2017-03-24 Thread Dmitry Vyukov

On Fri, Mar 17, 2017 at 8:29 PM, Peter Zijlstra <pet...@infradead.org> wrote:
> On Fri, Mar 17, 2017 at 08:26:42PM +0100, Peter Zijlstra wrote:
>> On Fri, Mar 17, 2017 at 08:05:16PM +0100, Dmitry Vyukov wrote:
>> > You can also find some reasons in the Why section of LLVM-Linux project:
>> > http://llvm.linuxfoundation.org/index.php/Main_Page
>>
>> From that:
>>
>>  - LLVM/Clang is a fast moving project with many things fixed quickly
>>and features added.
>>
>> So what's the deal with that 5 year old bug you want us to work around?
>>
>> Also, clang doesn't support asm cc flags output and a few other
>> extensions last time I checked.
>>
>
> Another great one:
>
>  - BSD License (some people prefer this license to the GPL)
>
> Seems a very weak argument to make when talking about the Linux Kernel
> which is very explicitly GPLv2 (and not later).

OK, I guess should not have referenced the llvm-linux page.
So here are reasons on our side that I am ready to vouch:

 - clang make it possible to implement KMSAN (dynamic detection of
uses of uninit memory)
 - better code coverage for fuzzing
 - why simpler and faster development (e.g. we can port our user-space
hardening technologies -- CFI and SafeStack)

Michael is on a different team and has own reasons to do this.

Re: [PATCH 6/7] md/raid10, LLVM: get rid of variable length array

2017-03-17 Thread Dmitry Vyukov

On Fri, Mar 17, 2017 at 7:03 PM, Borislav Petkov  wrote:
> On Fri, Mar 17, 2017 at 01:32:00PM +0100, Alexander Potapenko wrote:
>> > IIUC there's only a handful of VLAIS instances in LLVM code, why not
>> Sorry, "kernel code", not "LLVM code".
>> > just drop them for the sake of better code portability?
>
> And what happens if someone else adds a variable thing like this
> somewhere else, builds with gcc, everything's fine and patch gets
> applied? Or something else llvm can't stomach.
>
> Does that mean there'll be the occasional, every-so-often whack-a-mole
> patchset from someone, fixing the kernel build with llvm yet again?

This problem is more general and is not specific to clang. It equally
applies to different versions of gcc, different arches and different
configs (namely, anything else than what a developer used for
testing). A known, reasonably well working solution to this problem is
a system of try bots that test patches before commit with different
compilers/configs/archs. We already have such system in the form of
0-day bots. It would be useful to extend it with clang as soon as
kernel builds.

Re: [PATCH 6/7] md/raid10, LLVM: get rid of variable length array

2017-03-17 Thread Dmitry Vyukov

On Fri, Mar 17, 2017 at 7:57 PM, Borislav Petkov <b...@alien8.de> wrote:
> On Fri, Mar 17, 2017 at 07:47:33PM +0100, Dmitry Vyukov wrote:
>> This problem is more general and is not specific to clang. It equally
>> applies to different versions of gcc, different arches and different
>> configs (namely, anything else than what a developer used for
>> testing).
>
> I guess. We do carry a bunch of gcc workarounds along with the cc-*
> macros in scripts/Kbuild.include.
>
>> A known, reasonably well working solution to this problem is
>> a system of try bots that test patches before commit with different
>> compilers/configs/archs. We already have such system in the form of
>> 0-day bots. It would be useful to extend it with clang as soon as
>> kernel builds.
>
> Has someone actually already talked to Fengguang about it?

+Fengguang

> Oh, and the stupid question: why the effort to build the kernel
> with clang at all? Just because or are there some actual, palpable
> advantages?

On our side it is:
 - clang make it possible to implement KMSAN (dynamic detection of
uses of uninit memory)
 - better code coverage for fuzzing
 - why simpler and faster development (e.g. we can port our user-space
hardening technologies -- CFI and SafeStack)

You can also find some reasons in the Why section of LLVM-Linux project:
http://llvm.linuxfoundation.org/index.php/Main_Page

Re: crypto: deadlock between crypto_alg_sem/rtnl_mutex/genl_mutex

2017-03-15 Thread Dmitry Vyukov

On Tue, Mar 14, 2017 at 4:25 PM, Sowmini Varadhan
<sowmini.varad...@oracle.com> wrote:
> On (03/14/17 09:14), Dmitry Vyukov wrote:
>> Another one now involving rds_tcp_listen_stop
>:
>> kworker/u4:1/19 is trying to acquire lock:
>>  (sk_lock-AF_INET){+.+.+.}, at: [] lock_sock
>> include/net/sock.h:1460 [inline]
>>  (sk_lock-AF_INET){+.+.+.}, at: []
>> rds_tcp_listen_stop+0x5c/0x150 net/rds/tcp_listen.c:288
>>
>> but task is already holding lock:
>>  (rtnl_mutex){+.+.+.}, at: [] rtnl_lock+0x17/0x20
>> net/core/rtnetlink.c:70
>
> Is this also a false positive?
>
> genl_lock_dumpit takes the genl_lock and then waits on the rtnl_lock
> (e.g., out of tipc_nl_bearer_dump).
>
> netdev_run_todo takes the rtnl_lock and then wants lock_sock()
> for the TCP/IPv4 socket.
>
> Why is lockdep seeing a circular dependancy here? Same pattern
> seems to be happening  for
>   http://www.spinics.net/lists/netdev/msg423368.html
> and maybe also http://www.spinics.net/lists/netdev/msg423323.html?
>
> --Sowmini
>
>> Chain exists of:
>>   sk_lock-AF_INET --> genl_mutex --> rtnl_mutex
>>
>>  Possible unsafe locking scenario:
>>
>>CPU0CPU1
>>
>>   lock(rtnl_mutex);
>>lock(genl_mutex);
>>lock(rtnl_mutex);
>>   lock(sk_lock-AF_INET);
>>
>>  *** DEADLOCK ***
>>
>> 4 locks held by kworker/u4:1/19:
>>  #0:  ("%s""netns"){.+.+.+}, at: []
>> __write_once_size include/linux/compiler.h:283 [inline]
>>  #0:  ("%s""netns"){.+.+.+}, at: [] atomic64_set
>> arch/x86/include/asm/atomic64_64.h:33 [inline]
>>  #0:  ("%s""netns"){.+.+.+}, at: [] atomic_long_set
>> include/asm-generic/atomic-long.h:56 [inline]
>>  #0:  ("%s""netns"){.+.+.+}, at: [] set_work_data
>> kernel/workqueue.c:617 [inline]
>>  #0:  ("%s""netns"){.+.+.+}, at: []
>> set_work_pool_and_clear_pending kernel/workqueue.c:644 [inline]
>>  #0:  ("%s""netns"){.+.+.+}, at: []
>> process_one_work+0xab3/0x1c10 kernel/workqueue.c:2089
>>  #1:  (net_cleanup_work){+.+.+.}, at: []
>> process_one_work+0xb07/0x1c10 kernel/workqueue.c:2093
>>  #2:  (net_mutex){+.+.+.}, at: []
>> cleanup_net+0x22b/0xa90 net/core/net_namespace.c:429
>>  #3:  (rtnl_mutex){+.+.+.}, at: []
>> rtnl_lock+0x17/0x20 net/core/rtnetlink.c:70



After I've applied the patch these reports stopped to happen, and I
have not seem any other reports that look relevant.
However, it there was one, but it looks like a different issue and it
was probably masked by massive amounts of original deadlock reports:


[ INFO: possible circular locking dependency detected ]
4.10.0+ #29 Not tainted
---
syz-executor5/29222 is trying to acquire lock:
 (genl_mutex){+.+.+.}, at: [] genl_lock
net/netlink/genetlink.c:32 [inline]
 (genl_mutex){+.+.+.}, at: []
genl_family_rcv_msg+0xdae/0x1040 net/netlink/genetlink.c:547

but task is already holding lock:
 (rtnl_mutex){+.+.+.}, at: [] rtnl_lock+0x17/0x20
net/core/rtnetlink.c:70

which lock already depends on the new lock.


the existing dependency chain (in reverse order) is:

-> #1 (rtnl_mutex){+.+.+.}:
   validate_chain kernel/locking/lockdep.c:2267 [inline]
   __lock_acquire+0x2149/0x3430 kernel/locking/lockdep.c:3340
   lock_acquire+0x2a1/0x630 kernel/locking/lockdep.c:3755
   __mutex_lock_common kernel/locking/mutex.c:756 [inline]
   __mutex_lock+0x172/0x1730 kernel/locking/mutex.c:893
   mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:908
   rtnl_lock+0x17/0x20 net/core/rtnetlink.c:70
   nl80211_dump_wiphy+0x45/0x6d0 net/wireless/nl80211.c:1946
   genl_lock_dumpit+0x68/0x90 net/netlink/genetlink.c:479
   netlink_dump+0x54d/0xd40 net/netlink/af_netlink.c:2168
   __netlink_dump_start+0x4e5/0x760 net/netlink/af_netlink.c:2258
   genl_family_rcv_msg+0xd9d/0x1040 net/netlink/genetlink.c:546
   genl_rcv_msg+0xa6/0x140 net/netlink/genetlink.c:620
   netlink_rcv_skb+0x2ab/0x390 net/netlink/af_netlink.c:2339
   genl_rcv+0x28/0x40 net/netlink/genetlink.c:631
   netlink_unicast_kernel net/netlink/af_netlink.c:1272 [inline]
   netlink_unicast+0x514/0x730 net/netlink/af_netlink.c:1298
   netlink_sendmsg+0xa9f/0xe50 net/netlink/af_netlink.c:1844
   sock_sendmsg_nosec net/socket.c:633 [inline]
   sock_sendmsg+0xca/0x110 net/socket.c:643
   ___sys_sendmsg+0x8fa/0x9f0 net/socket.c:1985
   __sys_sendmsg+0x138/0x300 net/socket.c:2019
   SYSC_sendmsg net/socket.c:2030 [inline]
   SyS_sendmsg+0x

Re: [PATCH 0/7] LLVM: make x86_64 kernel build with clang.

2017-03-17 Thread Dmitry Vyukov

On Fri, Mar 17, 2017 at 1:15 AM, Michael Davidson  wrote:
> This patch set is sufficient to get the x86_64 kernel to build
> and boot correctly with clang-3.8 or greater.
>
> The resulting build still has about 300 warnings, very few of
> which appear to be significant. Most of them should be fixable
> with some minor code refactoring although a few of them, such
> as the complaints about implict conversions between enumerated
> types may be candidates for just being disabled.

Thanks, Michael!
This will help us a lot with KMSAN (uninit use detector) and code coverage.


> Michael Davidson (7):
>   Makefile, LLVM: add -no-integrated-as to KBUILD_[AC]FLAGS
>   Makefile, x86, LLVM: disable unsupported optimization flags
>   x86, LLVM: suppress clang warnings about unaligned accesses
>   x86, boot, LLVM: #undef memcpy etc in string.c
>   x86, boot, LLVM: Use regparm=0 for memcpy and memset
>   md/raid10, LLVM: get rid of variable length array
>   crypto, x86, LLVM: aesni - fix token pasting
>
>  Makefile|  4 
>  arch/x86/Makefile   |  7 +++
>  arch/x86/boot/copy.S| 15 +--
>  arch/x86/boot/string.c  |  9 +
>  arch/x86/boot/string.h  | 13 +
>  arch/x86/crypto/aes_ctrby8_avx-x86_64.S |  7 ++-
>  drivers/md/raid10.c |  9 -
>  7 files changed, 52 insertions(+), 12 deletions(-)
>
> --
> 2.12.0.367.g23dc2f6d3c-goog
>

Re: crypto: deadlock between crypto_alg_sem/rtnl_mutex/genl_mutex

2017-03-14 Thread Dmitry Vyukov

On Tue, Mar 14, 2017 at 10:16 AM, Herbert Xu
<herb...@gondor.apana.org.au> wrote:
> On Sun, Mar 05, 2017 at 04:08:39PM +0100, Dmitry Vyukov wrote:
>>
>> -> #1 (genl_mutex){+.+.+.}:
>>validate_chain kernel/locking/lockdep.c:2267 [inline]
>>__lock_acquire+0x2149/0x3430 kernel/locking/lockdep.c:3340
>>lock_acquire+0x2a1/0x630 kernel/locking/lockdep.c:3755
>>__mutex_lock_common kernel/locking/mutex.c:756 [inline]
>>__mutex_lock+0x172/0x1730 kernel/locking/mutex.c:893
>>mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:908
>>genl_lock net/netlink/genetlink.c:32 [inline]
>>genl_lock_dumpit+0x41/0x90 net/netlink/genetlink.c:478
>>netlink_dump+0x54d/0xd40 net/netlink/af_netlink.c:2127
>>__netlink_dump_start+0x4e5/0x760 net/netlink/af_netlink.c:2217
>>genl_family_rcv_msg+0xd9d/0x1040 net/netlink/genetlink.c:546
>>genl_rcv_msg+0xa6/0x140 net/netlink/genetlink.c:620
>>netlink_rcv_skb+0x2ab/0x390 net/netlink/af_netlink.c:2298
>>genl_rcv+0x28/0x40 net/netlink/genetlink.c:631
>>netlink_unicast_kernel net/netlink/af_netlink.c:1231 [inline]
>>netlink_unicast+0x514/0x730 net/netlink/af_netlink.c:1257
>>netlink_sendmsg+0xa9f/0xe50 net/netlink/af_netlink.c:1803
>>sock_sendmsg_nosec net/socket.c:633 [inline]
>>sock_sendmsg+0xca/0x110 net/socket.c:643
>>sock_write_iter+0x326/0x600 net/socket.c:846
>>call_write_iter include/linux/fs.h:1733 [inline]
>>new_sync_write fs/read_write.c:497 [inline]
>>__vfs_write+0x483/0x740 fs/read_write.c:510
>>vfs_write+0x187/0x530 fs/read_write.c:558
>>SYSC_write fs/read_write.c:605 [inline]
>>SyS_write+0xfb/0x230 fs/read_write.c:597
>>entry_SYSCALL_64_fastpath+0x1f/0xc2
>>
>> -> #0 (nlk->cb_mutex){+.+.+.}:
>>check_prev_add kernel/locking/lockdep.c:1830 [inline]
>>check_prevs_add+0xa8f/0x19f0 kernel/locking/lockdep.c:1940
>>validate_chain kernel/locking/lockdep.c:2267 [inline]
>>__lock_acquire+0x2149/0x3430 kernel/locking/lockdep.c:3340
>>lock_acquire+0x2a1/0x630 kernel/locking/lockdep.c:3755
>>__mutex_lock_common kernel/locking/mutex.c:756 [inline]
>>__mutex_lock+0x172/0x1730 kernel/locking/mutex.c:893
>>mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:908
>>__netlink_dump_start+0xf4/0x760 net/netlink/af_netlink.c:2187
>>netlink_dump_start include/linux/netlink.h:165 [inline]
>>crypto_user_rcv_msg+0x2ad/0x4f0 crypto/crypto_user.c:517
>>netlink_rcv_skb+0x2ab/0x390 net/netlink/af_netlink.c:2298
>>crypto_netlink_rcv+0x2a/0x40 crypto/crypto_user.c:538
>>netlink_unicast_kernel net/netlink/af_netlink.c:1231 [inline]
>>netlink_unicast+0x514/0x730 net/netlink/af_netlink.c:1257
>>netlink_sendmsg+0xa9f/0xe50 net/netlink/af_netlink.c:1803
>>sock_sendmsg_nosec net/socket.c:633 [inline]
>>sock_sendmsg+0xca/0x110 net/socket.c:643
>>___sys_sendmsg+0x8fa/0x9f0 net/socket.c:1985
>>__sys_sendmsg+0x138/0x300 net/socket.c:2019
>>SYSC_sendmsg net/socket.c:2030 [inline]
>>SyS_sendmsg+0x2d/0x50 net/socket.c:2026
>>entry_SYSCALL_64_fastpath+0x1f/0xc2
>
> This looks like a false positive.  The cb_mutex in #1 is not the
> same as the cb_mutex in #0.  The cb_mutex in #0 comes is obtained
> by crypto_user which uses straight netlink.  The cb_mutex in #1
> is a genl netlink socket.
>
> I'll have a look to see if we can annotate this.

Yes, please.
Disregarding some reports is not a good way long term.

Re: crypto: deadlock between crypto_alg_sem/rtnl_mutex/genl_mutex

2017-03-14 Thread Dmitry Vyukov

On Tue, Mar 14, 2017 at 11:25 AM, Herbert Xu
<herb...@gondor.apana.org.au> wrote:
> On Tue, Mar 14, 2017 at 10:44:10AM +0100, Dmitry Vyukov wrote:
>>
>> Yes, please.
>> Disregarding some reports is not a good way long term.
>
> Please try this patch.

Applied on bots. I should have a conclusion within a day.
Thanks!


> ---8<---
> Subject: netlink: Annotate nlk cb_mutex by protocol
>
> Currently all occurences of nlk->cb_mutex are annotated by lockdep
> as a single class.  This causes a false lcokdep cycle involving
> genl and crypto_user.
>
> This patch fixes it by dividing cb_mutex into individual classes
> based on the netlink protocol.  As genl and crypto_user do not
> use the same netlink protocol this breaks the false dependency
> loop.
>
> Reported-by: Dmitry Vyukov <dvyu...@google.com>
> Signed-off-by: Herbert Xu <herb...@gondor.apana.org.au>
>
> diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
> index 7b73c7c..596eaff 100644
> --- a/net/netlink/af_netlink.c
> +++ b/net/netlink/af_netlink.c
> @@ -96,6 +96,44 @@ static inline int netlink_is_kernel(struct sock *sk)
>
>  static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
>
> +static struct lock_class_key nlk_cb_mutex_keys[MAX_LINKS];
> +
> +static const char *const nlk_cb_mutex_key_strings[MAX_LINKS + 1] = {
> +   "nlk_cb_mutex-ROUTE",
> +   "nlk_cb_mutex-1",
> +   "nlk_cb_mutex-USERSOCK",
> +   "nlk_cb_mutex-FIREWALL",
> +   "nlk_cb_mutex-SOCK_DIAG",
> +   "nlk_cb_mutex-NFLOG",
> +   "nlk_cb_mutex-XFRM",
> +   "nlk_cb_mutex-SELINUX",
> +   "nlk_cb_mutex-ISCSI",
> +   "nlk_cb_mutex-AUDIT",
> +   "nlk_cb_mutex-FIB_LOOKUP",
> +   "nlk_cb_mutex-CONNECTOR",
> +   "nlk_cb_mutex-NETFILTER",
> +   "nlk_cb_mutex-IP6_FW",
> +   "nlk_cb_mutex-DNRTMSG",
> +   "nlk_cb_mutex-KOBJECT_UEVENT",
> +   "nlk_cb_mutex-GENERIC",
> +   "nlk_cb_mutex-17",
> +   "nlk_cb_mutex-SCSITRANSPORT",
> +   "nlk_cb_mutex-ECRYPTFS",
> +   "nlk_cb_mutex-RDMA",
> +   "nlk_cb_mutex-CRYPTO",
> +   "nlk_cb_mutex-SMC",
> +   "nlk_cb_mutex-23",
> +   "nlk_cb_mutex-24",
> +   "nlk_cb_mutex-25",
> +   "nlk_cb_mutex-26",
> +   "nlk_cb_mutex-27",
> +   "nlk_cb_mutex-28",
> +   "nlk_cb_mutex-29",
> +   "nlk_cb_mutex-30",
> +   "nlk_cb_mutex-31",
> +   "nlk_cb_mutex-MAX_LINKS"
> +};
> +
>  static int netlink_dump(struct sock *sk);
>  static void netlink_skb_destructor(struct sk_buff *skb);
>
> @@ -585,6 +623,9 @@ static int __netlink_create(struct net *net, struct 
> socket *sock,
> } else {
> nlk->cb_mutex = >cb_def_mutex;
> mutex_init(nlk->cb_mutex);
> +   lockdep_set_class_and_name(nlk->cb_mutex,
> +  nlk_cb_mutex_keys + protocol,
> +  
> nlk_cb_mutex_key_strings[protocol]);
> }
> init_waitqueue_head(>wait);
>
> --
> Email: Herbert Xu <herb...@gondor.apana.org.au>
> Home Page: http://gondor.apana.org.au/~herbert/
> PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

Re: crypto: deadlock between crypto_alg_sem/rtnl_mutex/genl_mutex

2017-03-14 Thread Dmitry Vyukov

On Mon, Mar 6, 2017 at 10:36 AM, Dmitry Vyukov <dvyu...@google.com> wrote:
> On Sun, Mar 5, 2017 at 6:36 PM, Dmitry Vyukov <dvyu...@google.com> wrote:
>> On Sun, Mar 5, 2017 at 4:08 PM, Dmitry Vyukov <dvyu...@google.com> wrote:
>>> Hello,
>>>
>>> I am getting the following deadlock reports while running syzkaller
>>> fuzzer on net-next/8d70eeb84ab277377c017af6a21d0a337025dede:
>>>
>>> ==
>>> [ INFO: possible circular locking dependency detected ]
>>> 4.10.0+ #5 Not tainted
>>> ---
>>> syz-executor6/6143 is trying to acquire lock:
>>>  (nlk->cb_mutex){+.+.+.}, at: []
>>> __netlink_dump_start+0xf4/0x760 net/netlink/af_netlink.c:2187
>>>
>>> but task is already holding lock:
>>>  (crypto_alg_sem){+.}, at: []
>>> crypto_user_rcv_msg+0x136/0x4f0 crypto/crypto_user.c:507
>>>
>>> which lock already depends on the new lock.
>>>
>>>
>>> the existing dependency chain (in reverse order) is:
>>>
>>> -> #4 (crypto_alg_sem){+.}:
>>>validate_chain kernel/locking/lockdep.c:2267 [inline]
>>>__lock_acquire+0x2149/0x3430 kernel/locking/lockdep.c:3340
>>>lock_acquire+0x2a1/0x630 kernel/locking/lockdep.c:3755
>>>down_read+0x9b/0x150 kernel/locking/rwsem.c:23
>>>crypto_alg_lookup+0x23/0x50 crypto/api.c:199
>>>crypto_larval_lookup.part.10+0x9a/0x3b0 crypto/api.c:217
>>>crypto_larval_lookup crypto/api.c:211 [inline]
>>>crypto_alg_mod_lookup+0x77/0x1b0 crypto/api.c:270
>>>crypto_alloc_base+0x50/0x1e0 crypto/api.c:416
>>>crypto_alloc_cipher include/linux/crypto.h:1407 [inline]
>>>tcp_fastopen_reset_cipher+0xc2/0x2e0 net/ipv4/tcp_fastopen.c:48
>>>tcp_fastopen_init_key_once+0x114/0x120 net/ipv4/tcp_fastopen.c:29
>>>do_tcp_setsockopt.isra.36+0x140a/0x20a0 net/ipv4/tcp.c:2684
>>>tcp_setsockopt+0xb0/0xd0 net/ipv4/tcp.c:2733
>>>sock_common_setsockopt+0x95/0xd0 net/core/sock.c:2731
>>>SYSC_setsockopt net/socket.c:1786 [inline]
>>>SyS_setsockopt+0x25c/0x390 net/socket.c:1765
>>>entry_SYSCALL_64_fastpath+0x1f/0xc2
>>>
>>> -> #3 (sk_lock-AF_INET){+.+.+.}:
>>>validate_chain kernel/locking/lockdep.c:2267 [inline]
>>>__lock_acquire+0x2149/0x3430 kernel/locking/lockdep.c:3340
>>>lock_acquire+0x2a1/0x630 kernel/locking/lockdep.c:3755
>>>lock_sock_nested+0xcb/0x120 net/core/sock.c:2536
>>>lock_sock include/net/sock.h:1460 [inline]
>>>rds_tcp_listen_stop+0x57/0x140 net/rds/tcp_listen.c:284
>>>rds_tcp_kill_sock net/rds/tcp.c:529 [inline]
>>>rds_tcp_dev_event+0x383/0xc50 net/rds/tcp.c:568
>>>notifier_call_chain+0x1b5/0x2b0 kernel/notifier.c:93
>>>__raw_notifier_call_chain kernel/notifier.c:394 [inline]
>>>raw_notifier_call_chain+0x2d/0x40 kernel/notifier.c:401
>>>call_netdevice_notifiers_info+0x51/0x90 net/core/dev.c:1646
>>>call_netdevice_notifiers net/core/dev.c:1662 [inline]
>>>netdev_run_todo+0x3b2/0xa30 net/core/dev.c:7530
>>>rtnl_unlock+0xe/0x10 net/core/rtnetlink.c:104
>>>default_device_exit_batch+0x504/0x620 net/core/dev.c:8334
>>>ops_exit_list.isra.6+0x100/0x150 net/core/net_namespace.c:144
>>>cleanup_net+0x551/0xa90 net/core/net_namespace.c:463
>>>process_one_work+0xbd0/0x1c10 kernel/workqueue.c:2096
>>>worker_thread+0x223/0x1990 kernel/workqueue.c:2230
>>>kthread+0x326/0x3f0 kernel/kthread.c:229
>>>ret_from_fork+0x31/0x40 arch/x86/entry/entry_64.S:430
>>>
>>> -> #2 (rtnl_mutex){+.+.+.}:
>>>validate_chain kernel/locking/lockdep.c:2267 [inline]
>>>__lock_acquire+0x2149/0x3430 kernel/locking/lockdep.c:3340
>>>lock_acquire+0x2a1/0x630 kernel/locking/lockdep.c:3755
>>>__mutex_lock_common kernel/locking/mutex.c:756 [inline]
>>>__mutex_lock+0x172/0x1730 kernel/locking/mutex.c:893
>>>mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:908
>>>rtnl_lock+0x17/0x20 net/core/rtnetlink.c:70
>>>tipc_nl_bearer_dump+0x3ef/0x720 net/tipc/bearer.c:774
>>>genl_lock_dumpit+0x68/0x90 net/netlink/genetlink.c:479
>>>netlink_dump+0x54d/0

crypto: deadlock between crypto_alg_sem/rtnl_mutex/genl_mutex

2017-03-05 Thread Dmitry Vyukov

Hello,

I am getting the following deadlock reports while running syzkaller
fuzzer on net-next/8d70eeb84ab277377c017af6a21d0a337025dede:

==
[ INFO: possible circular locking dependency detected ]
4.10.0+ #5 Not tainted
---
syz-executor6/6143 is trying to acquire lock:
 (nlk->cb_mutex){+.+.+.}, at: []
__netlink_dump_start+0xf4/0x760 net/netlink/af_netlink.c:2187

but task is already holding lock:
 (crypto_alg_sem){+.}, at: []
crypto_user_rcv_msg+0x136/0x4f0 crypto/crypto_user.c:507

which lock already depends on the new lock.


the existing dependency chain (in reverse order) is:

-> #4 (crypto_alg_sem){+.}:
   validate_chain kernel/locking/lockdep.c:2267 [inline]
   __lock_acquire+0x2149/0x3430 kernel/locking/lockdep.c:3340
   lock_acquire+0x2a1/0x630 kernel/locking/lockdep.c:3755
   down_read+0x9b/0x150 kernel/locking/rwsem.c:23
   crypto_alg_lookup+0x23/0x50 crypto/api.c:199
   crypto_larval_lookup.part.10+0x9a/0x3b0 crypto/api.c:217
   crypto_larval_lookup crypto/api.c:211 [inline]
   crypto_alg_mod_lookup+0x77/0x1b0 crypto/api.c:270
   crypto_alloc_base+0x50/0x1e0 crypto/api.c:416
   crypto_alloc_cipher include/linux/crypto.h:1407 [inline]
   tcp_fastopen_reset_cipher+0xc2/0x2e0 net/ipv4/tcp_fastopen.c:48
   tcp_fastopen_init_key_once+0x114/0x120 net/ipv4/tcp_fastopen.c:29
   do_tcp_setsockopt.isra.36+0x140a/0x20a0 net/ipv4/tcp.c:2684
   tcp_setsockopt+0xb0/0xd0 net/ipv4/tcp.c:2733
   sock_common_setsockopt+0x95/0xd0 net/core/sock.c:2731
   SYSC_setsockopt net/socket.c:1786 [inline]
   SyS_setsockopt+0x25c/0x390 net/socket.c:1765
   entry_SYSCALL_64_fastpath+0x1f/0xc2

-> #3 (sk_lock-AF_INET){+.+.+.}:
   validate_chain kernel/locking/lockdep.c:2267 [inline]
   __lock_acquire+0x2149/0x3430 kernel/locking/lockdep.c:3340
   lock_acquire+0x2a1/0x630 kernel/locking/lockdep.c:3755
   lock_sock_nested+0xcb/0x120 net/core/sock.c:2536
   lock_sock include/net/sock.h:1460 [inline]
   rds_tcp_listen_stop+0x57/0x140 net/rds/tcp_listen.c:284
   rds_tcp_kill_sock net/rds/tcp.c:529 [inline]
   rds_tcp_dev_event+0x383/0xc50 net/rds/tcp.c:568
   notifier_call_chain+0x1b5/0x2b0 kernel/notifier.c:93
   __raw_notifier_call_chain kernel/notifier.c:394 [inline]
   raw_notifier_call_chain+0x2d/0x40 kernel/notifier.c:401
   call_netdevice_notifiers_info+0x51/0x90 net/core/dev.c:1646
   call_netdevice_notifiers net/core/dev.c:1662 [inline]
   netdev_run_todo+0x3b2/0xa30 net/core/dev.c:7530
   rtnl_unlock+0xe/0x10 net/core/rtnetlink.c:104
   default_device_exit_batch+0x504/0x620 net/core/dev.c:8334
   ops_exit_list.isra.6+0x100/0x150 net/core/net_namespace.c:144
   cleanup_net+0x551/0xa90 net/core/net_namespace.c:463
   process_one_work+0xbd0/0x1c10 kernel/workqueue.c:2096
   worker_thread+0x223/0x1990 kernel/workqueue.c:2230
   kthread+0x326/0x3f0 kernel/kthread.c:229
   ret_from_fork+0x31/0x40 arch/x86/entry/entry_64.S:430

-> #2 (rtnl_mutex){+.+.+.}:
   validate_chain kernel/locking/lockdep.c:2267 [inline]
   __lock_acquire+0x2149/0x3430 kernel/locking/lockdep.c:3340
   lock_acquire+0x2a1/0x630 kernel/locking/lockdep.c:3755
   __mutex_lock_common kernel/locking/mutex.c:756 [inline]
   __mutex_lock+0x172/0x1730 kernel/locking/mutex.c:893
   mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:908
   rtnl_lock+0x17/0x20 net/core/rtnetlink.c:70
   tipc_nl_bearer_dump+0x3ef/0x720 net/tipc/bearer.c:774
   genl_lock_dumpit+0x68/0x90 net/netlink/genetlink.c:479
   netlink_dump+0x54d/0xd40 net/netlink/af_netlink.c:2127
   __netlink_dump_start+0x4e5/0x760 net/netlink/af_netlink.c:2217
   genl_family_rcv_msg+0xd9d/0x1040 net/netlink/genetlink.c:546
   genl_rcv_msg+0xa6/0x140 net/netlink/genetlink.c:620
   netlink_rcv_skb+0x2ab/0x390 net/netlink/af_netlink.c:2298
   genl_rcv+0x28/0x40 net/netlink/genetlink.c:631
   netlink_unicast_kernel net/netlink/af_netlink.c:1231 [inline]
   netlink_unicast+0x514/0x730 net/netlink/af_netlink.c:1257
   netlink_sendmsg+0xa9f/0xe50 net/netlink/af_netlink.c:1803
   sock_sendmsg_nosec net/socket.c:633 [inline]
   sock_sendmsg+0xca/0x110 net/socket.c:643
   sock_write_iter+0x326/0x600 net/socket.c:846
   call_write_iter include/linux/fs.h:1733 [inline]
   new_sync_write fs/read_write.c:497 [inline]
   __vfs_write+0x483/0x740 fs/read_write.c:510
   vfs_write+0x187/0x530 fs/read_write.c:558
   SYSC_write fs/read_write.c:605 [inline]
   SyS_write+0xfb/0x230 fs/read_write.c:597
   entry_SYSCALL_64_fastpath+0x1f/0xc2

-> #1 (genl_mutex){+.+.+.}:
   validate_chain kernel/locking/lockdep.c:2267 [inline]
   __lock_acquire+0x2149/0x3430 kernel/locking/lockdep.c:3340
   lock_acquire+0x2a1/0x630 kernel/locking/lockdep.c:3755

Re: crypto: deadlock between crypto_alg_sem/rtnl_mutex/genl_mutex

2017-03-05 Thread Dmitry Vyukov

On Sun, Mar 5, 2017 at 4:08 PM, Dmitry Vyukov <dvyu...@google.com> wrote:
> Hello,
>
> I am getting the following deadlock reports while running syzkaller
> fuzzer on net-next/8d70eeb84ab277377c017af6a21d0a337025dede:
>
> ==
> [ INFO: possible circular locking dependency detected ]
> 4.10.0+ #5 Not tainted
> ---
> syz-executor6/6143 is trying to acquire lock:
>  (nlk->cb_mutex){+.+.+.}, at: []
> __netlink_dump_start+0xf4/0x760 net/netlink/af_netlink.c:2187
>
> but task is already holding lock:
>  (crypto_alg_sem){+.}, at: []
> crypto_user_rcv_msg+0x136/0x4f0 crypto/crypto_user.c:507
>
> which lock already depends on the new lock.
>
>
> the existing dependency chain (in reverse order) is:
>
> -> #4 (crypto_alg_sem){+.}:
>validate_chain kernel/locking/lockdep.c:2267 [inline]
>__lock_acquire+0x2149/0x3430 kernel/locking/lockdep.c:3340
>lock_acquire+0x2a1/0x630 kernel/locking/lockdep.c:3755
>down_read+0x9b/0x150 kernel/locking/rwsem.c:23
>crypto_alg_lookup+0x23/0x50 crypto/api.c:199
>crypto_larval_lookup.part.10+0x9a/0x3b0 crypto/api.c:217
>crypto_larval_lookup crypto/api.c:211 [inline]
>crypto_alg_mod_lookup+0x77/0x1b0 crypto/api.c:270
>crypto_alloc_base+0x50/0x1e0 crypto/api.c:416
>crypto_alloc_cipher include/linux/crypto.h:1407 [inline]
>tcp_fastopen_reset_cipher+0xc2/0x2e0 net/ipv4/tcp_fastopen.c:48
>tcp_fastopen_init_key_once+0x114/0x120 net/ipv4/tcp_fastopen.c:29
>do_tcp_setsockopt.isra.36+0x140a/0x20a0 net/ipv4/tcp.c:2684
>tcp_setsockopt+0xb0/0xd0 net/ipv4/tcp.c:2733
>sock_common_setsockopt+0x95/0xd0 net/core/sock.c:2731
>SYSC_setsockopt net/socket.c:1786 [inline]
>SyS_setsockopt+0x25c/0x390 net/socket.c:1765
>entry_SYSCALL_64_fastpath+0x1f/0xc2
>
> -> #3 (sk_lock-AF_INET){+.+.+.}:
>validate_chain kernel/locking/lockdep.c:2267 [inline]
>__lock_acquire+0x2149/0x3430 kernel/locking/lockdep.c:3340
>lock_acquire+0x2a1/0x630 kernel/locking/lockdep.c:3755
>lock_sock_nested+0xcb/0x120 net/core/sock.c:2536
>lock_sock include/net/sock.h:1460 [inline]
>rds_tcp_listen_stop+0x57/0x140 net/rds/tcp_listen.c:284
>rds_tcp_kill_sock net/rds/tcp.c:529 [inline]
>rds_tcp_dev_event+0x383/0xc50 net/rds/tcp.c:568
>notifier_call_chain+0x1b5/0x2b0 kernel/notifier.c:93
>__raw_notifier_call_chain kernel/notifier.c:394 [inline]
>raw_notifier_call_chain+0x2d/0x40 kernel/notifier.c:401
>call_netdevice_notifiers_info+0x51/0x90 net/core/dev.c:1646
>call_netdevice_notifiers net/core/dev.c:1662 [inline]
>netdev_run_todo+0x3b2/0xa30 net/core/dev.c:7530
>rtnl_unlock+0xe/0x10 net/core/rtnetlink.c:104
>default_device_exit_batch+0x504/0x620 net/core/dev.c:8334
>ops_exit_list.isra.6+0x100/0x150 net/core/net_namespace.c:144
>cleanup_net+0x551/0xa90 net/core/net_namespace.c:463
>process_one_work+0xbd0/0x1c10 kernel/workqueue.c:2096
>worker_thread+0x223/0x1990 kernel/workqueue.c:2230
>kthread+0x326/0x3f0 kernel/kthread.c:229
>ret_from_fork+0x31/0x40 arch/x86/entry/entry_64.S:430
>
> -> #2 (rtnl_mutex){+.+.+.}:
>validate_chain kernel/locking/lockdep.c:2267 [inline]
>__lock_acquire+0x2149/0x3430 kernel/locking/lockdep.c:3340
>lock_acquire+0x2a1/0x630 kernel/locking/lockdep.c:3755
>__mutex_lock_common kernel/locking/mutex.c:756 [inline]
>__mutex_lock+0x172/0x1730 kernel/locking/mutex.c:893
>mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:908
>rtnl_lock+0x17/0x20 net/core/rtnetlink.c:70
>tipc_nl_bearer_dump+0x3ef/0x720 net/tipc/bearer.c:774
>genl_lock_dumpit+0x68/0x90 net/netlink/genetlink.c:479
>netlink_dump+0x54d/0xd40 net/netlink/af_netlink.c:2127
>__netlink_dump_start+0x4e5/0x760 net/netlink/af_netlink.c:2217
>genl_family_rcv_msg+0xd9d/0x1040 net/netlink/genetlink.c:546
>genl_rcv_msg+0xa6/0x140 net/netlink/genetlink.c:620
>netlink_rcv_skb+0x2ab/0x390 net/netlink/af_netlink.c:2298
>genl_rcv+0x28/0x40 net/netlink/genetlink.c:631
>netlink_unicast_kernel net/netlink/af_netlink.c:1231 [inline]
>netlink_unicast+0x514/0x730 net/netlink/af_netlink.c:1257
>netlink_sendmsg+0xa9f/0xe50 net/netlink/af_netlink.c:1803
>sock_sendmsg_nosec net/socket.c:633 [inline]
>sock_sendmsg+0xca/0x110 net/socket.c:643
>sock_write_iter+0x326/0x600 net/socket.c:846
>call_write_iter inc

Re: crypto: deadlock between crypto_alg_sem/rtnl_mutex/genl_mutex

2017-03-06 Thread Dmitry Vyukov

On Sun, Mar 5, 2017 at 6:36 PM, Dmitry Vyukov <dvyu...@google.com> wrote:
> On Sun, Mar 5, 2017 at 4:08 PM, Dmitry Vyukov <dvyu...@google.com> wrote:
>> Hello,
>>
>> I am getting the following deadlock reports while running syzkaller
>> fuzzer on net-next/8d70eeb84ab277377c017af6a21d0a337025dede:
>>
>> ==
>> [ INFO: possible circular locking dependency detected ]
>> 4.10.0+ #5 Not tainted
>> ---
>> syz-executor6/6143 is trying to acquire lock:
>>  (nlk->cb_mutex){+.+.+.}, at: []
>> __netlink_dump_start+0xf4/0x760 net/netlink/af_netlink.c:2187
>>
>> but task is already holding lock:
>>  (crypto_alg_sem){+.}, at: []
>> crypto_user_rcv_msg+0x136/0x4f0 crypto/crypto_user.c:507
>>
>> which lock already depends on the new lock.
>>
>>
>> the existing dependency chain (in reverse order) is:
>>
>> -> #4 (crypto_alg_sem){+.}:
>>validate_chain kernel/locking/lockdep.c:2267 [inline]
>>__lock_acquire+0x2149/0x3430 kernel/locking/lockdep.c:3340
>>lock_acquire+0x2a1/0x630 kernel/locking/lockdep.c:3755
>>down_read+0x9b/0x150 kernel/locking/rwsem.c:23
>>crypto_alg_lookup+0x23/0x50 crypto/api.c:199
>>crypto_larval_lookup.part.10+0x9a/0x3b0 crypto/api.c:217
>>crypto_larval_lookup crypto/api.c:211 [inline]
>>crypto_alg_mod_lookup+0x77/0x1b0 crypto/api.c:270
>>crypto_alloc_base+0x50/0x1e0 crypto/api.c:416
>>crypto_alloc_cipher include/linux/crypto.h:1407 [inline]
>>tcp_fastopen_reset_cipher+0xc2/0x2e0 net/ipv4/tcp_fastopen.c:48
>>tcp_fastopen_init_key_once+0x114/0x120 net/ipv4/tcp_fastopen.c:29
>>do_tcp_setsockopt.isra.36+0x140a/0x20a0 net/ipv4/tcp.c:2684
>>tcp_setsockopt+0xb0/0xd0 net/ipv4/tcp.c:2733
>>sock_common_setsockopt+0x95/0xd0 net/core/sock.c:2731
>>SYSC_setsockopt net/socket.c:1786 [inline]
>>SyS_setsockopt+0x25c/0x390 net/socket.c:1765
>>entry_SYSCALL_64_fastpath+0x1f/0xc2
>>
>> -> #3 (sk_lock-AF_INET){+.+.+.}:
>>validate_chain kernel/locking/lockdep.c:2267 [inline]
>>__lock_acquire+0x2149/0x3430 kernel/locking/lockdep.c:3340
>>lock_acquire+0x2a1/0x630 kernel/locking/lockdep.c:3755
>>lock_sock_nested+0xcb/0x120 net/core/sock.c:2536
>>lock_sock include/net/sock.h:1460 [inline]
>>rds_tcp_listen_stop+0x57/0x140 net/rds/tcp_listen.c:284
>>rds_tcp_kill_sock net/rds/tcp.c:529 [inline]
>>rds_tcp_dev_event+0x383/0xc50 net/rds/tcp.c:568
>>notifier_call_chain+0x1b5/0x2b0 kernel/notifier.c:93
>>__raw_notifier_call_chain kernel/notifier.c:394 [inline]
>>raw_notifier_call_chain+0x2d/0x40 kernel/notifier.c:401
>>call_netdevice_notifiers_info+0x51/0x90 net/core/dev.c:1646
>>call_netdevice_notifiers net/core/dev.c:1662 [inline]
>>netdev_run_todo+0x3b2/0xa30 net/core/dev.c:7530
>>rtnl_unlock+0xe/0x10 net/core/rtnetlink.c:104
>>default_device_exit_batch+0x504/0x620 net/core/dev.c:8334
>>ops_exit_list.isra.6+0x100/0x150 net/core/net_namespace.c:144
>>cleanup_net+0x551/0xa90 net/core/net_namespace.c:463
>>process_one_work+0xbd0/0x1c10 kernel/workqueue.c:2096
>>worker_thread+0x223/0x1990 kernel/workqueue.c:2230
>>kthread+0x326/0x3f0 kernel/kthread.c:229
>>ret_from_fork+0x31/0x40 arch/x86/entry/entry_64.S:430
>>
>> -> #2 (rtnl_mutex){+.+.+.}:
>>validate_chain kernel/locking/lockdep.c:2267 [inline]
>>__lock_acquire+0x2149/0x3430 kernel/locking/lockdep.c:3340
>>lock_acquire+0x2a1/0x630 kernel/locking/lockdep.c:3755
>>__mutex_lock_common kernel/locking/mutex.c:756 [inline]
>>__mutex_lock+0x172/0x1730 kernel/locking/mutex.c:893
>>mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:908
>>rtnl_lock+0x17/0x20 net/core/rtnetlink.c:70
>>tipc_nl_bearer_dump+0x3ef/0x720 net/tipc/bearer.c:774
>>genl_lock_dumpit+0x68/0x90 net/netlink/genetlink.c:479
>>netlink_dump+0x54d/0xd40 net/netlink/af_netlink.c:2127
>>__netlink_dump_start+0x4e5/0x760 net/netlink/af_netlink.c:2217
>>genl_family_rcv_msg+0xd9d/0x1040 net/netlink/genetlink.c:546
>>genl_rcv_msg+0xa6/0x140 net/netlink/genetlink.c:620
>>netlink_rcv_skb+0x2ab/0x390 net/netlink/af_netlink.c:2298
>>genl_rcv+0x28/0x40 net/netlink/genetlink.c:631

Re: x509 parsing bug + fuzzing crypto in the userspace

2017-11-28 Thread Dmitry Vyukov

On Fri, Nov 24, 2017 at 5:31 PM, Stephan Mueller <smuel...@chronox.de> wrote:
> Am Freitag, 24. November 2017, 17:25:55 CET schrieb Dmitry Vyukov:
>
> Hi Dmitry,
>
>> Eric also pointed me to grep. But I can't say the code is intuitive.
>> I've spent way more time than I expected to just get a list of all
>> algorithms with their types. Say, in some cases algorithm descriptions
>> do not specify their type, it's somehow plumbed somewhere later.
>> Getting the list at runtime allowed me to get relevant ones with
>> proper types.
>
> Hehe, getting the list of ciphers in the script I sent the other day took me a
> long time.
>
> Maybe you leave it for now and come back later for a good grepping code to
> search the sources.


First bugs uncovered by the change started popping up:
https://groups.google.com/forum/#!forum/syzkaller-bugs
More coming!

Re: x509 parsing bug + fuzzing crypto in the userspace

2017-11-24 Thread Dmitry Vyukov

On Thu, Nov 23, 2017 at 1:35 PM, Stephan Mueller <smuel...@chronox.de> wrote:
> Am Donnerstag, 23. November 2017, 12:34:54 CET schrieb Dmitry Vyukov:
>
> Hi Dmitry,
>
>> Btw, I've started doing some minimal improvements, did not yet sorted
>> out alg types/names, and fuzzer started scratching surface:
>>
>> WARNING: kernel stack regs has bad 'bp' value 77 Nov 23 2017 12:29:36 CET
>> general protection fault in af_alg_free_areq_sgls 54 Nov 23 2017 12:23:30
>> CET general protection fault in crypto_chacha20_crypt 100 Nov 23 2017
>> 12:29:48 CET suspicious RCU usage at ./include/trace/events/kmem.h:LINE 88
>> Nov 23 2017 12:29:15 CET
>
> This all looks strange. Where would RCU come into play with
> af_alg_free_areq_sgls?
>
> Do you have a reproducer?
>>
>> This strongly suggests that we need to dig deeper.
>
> Absolutely. That is why I started my fuzzer that turned up already quite some
> issues.

I've cooked syzkaller change that teaches it to generate more
algorithm names. Probably not idea, but much better than was before:
https://github.com/google/syzkaller/blob/ddf7b3e0655cf6dfeacfe509e477c1486d2cc7db/sys/linux/alg.go
(if you see any obvious issues there, feedback is welcome, I still did
not figure out completely difference between e.g. HASH/AHASH,
BLKCIPHER/ABLKCIPHER as most of them seem to be interchangable; this
was mostly based on try and trial approach).

All bugs with details will soon be reported by syzbot
(https://goo.gl/tpsmEJ) to kernel mailing lists with all details.

Stephan, thanks for your help!

Re: x509 parsing bug + fuzzing crypto in the userspace

2017-11-24 Thread Dmitry Vyukov

On Fri, Nov 24, 2017 at 4:03 PM, Stephan Mueller <smuel...@chronox.de> wrote:
> Am Freitag, 24. November 2017, 14:49:49 CET schrieb Dmitry Vyukov:
>
> Hi Dmitry,
>
>> I've cooked syzkaller change that teaches it to generate more
>> algorithm names. Probably not idea, but much better than was before:
>> https://github.com/google/syzkaller/blob/ddf7b3e0655cf6dfeacfe509e477c1486d2
>> cc7db/sys/linux/alg.go
>
> I am not as fluent in GO, so I may miss a point here:
>
> {"authencesn", []int{ALG_HASH, ALG_BLKCIPHER}},
> {"authenc", []int{ALG_HASH, ALG_BLKCIPHER}},
>
> These both are templates to form an AEAD cipher. They allow you to specify the
> block cipher and the hash type.


Most of this works the way you describe. This is effectively production grammar.
authencesn is a template produces AEAD and consumes 2 args: ALG_HASH
and ALG_BLKCIPHER.



> {"rfc7539esp", []int{ALG_BLKCIPHER, ALG_HASH}},
> {"rfc7539", []int{ALG_BLKCIPHER, ALG_HASH}},
> {"rfc4543", []int{ALG_AEAD}},
> {"rfc4106", []int{ALG_AEAD}},
>
> These are no ciphers per se, but simply formatting mechanisms. For example, to
> make use of rfc4106, you must split the IV: the first four bytes need to be
> appended to the key and the trailing 8 bytes are used as the IV. Any other
> formatting should cause an error. Besides, these implementations should only
> work with some AEAD ciphers like GCM.



So rfc4543 consumes AEAD and itself is a AEAD (can be passed whenever
AEAD is requried), right? If yes, then it works the way you described
(minus the part that is works only with _some_ AEAD ciphers, fuzzer
will try to blindly combine it with all of them).

rfc7539 consumes 2 args, not 1, right? I figured out that it consumes
BLKCIPHER and HASH.



>
>
> {"pcrypt", []int{ALG_AEAD}},
> {"rfc4309", []int{ALG_AEAD}},
> {"gcm", []int{ALG_CIPHER}},
> {"gcm_base", []int{ALG_BLKCIPHER, ALG_HASH}},
> {"ccm", []int{ALG_CIPHER}},
> {"ccm_base", []int{ALG_BLKCIPHER, ALG_HASH}},
>
>
>
>
> {"echainiv", []int{ALG_AEAD}},
> {"seqiv", []int{ALG_AEAD}},
>
> These are IV generators and should be used with an AEAD cipher to internally
> generate IVs. Note, the use of IV generators have changed with 4.2 (see my
> script I sent you as part of this thread).
>
>
>
> {"gcm(aes)", nil},
>
> gcm() is also a template that can consume any block cipher, including aes-
> generic, serpent, ...
>
> {"gcm_base(ctr(aes-aesni),ghash-generic)", nil},
>
> Again, ctr() is a template that can consume other block ciphers.
>
> {"generic-gcm-aesni", nil},
>
> Does this exist?

I can create it:

strcpy(addr.salg_type, "aead");
strcpy(addr.salg_name, "generic-gcm-aesni");

bind(3, {sa_family=0x26 /* AF_??? */,
sa_data="aead\0\0\0\0\0\0\0\0\0\0"}, 88) = 0



>
> {"rfc4106(gcm(aes))", nil},
>
> rfc... is a template that can consume AEAD ciphers.
>
> {"rfc4106-gcm-aesni", nil},
> {"__gcm-aes-aesni", nil},
> {"__driver-gcm-aes-aesni", nil},
>
> Please specifically test that __driver_... names should NEVER be allocatable
> from user space.
>
>
>
> // algorithms:
> {"cbc(aes)", nil},
>
> cbc() is a template
>
>
> {"cbc(aes-aesni)", nil},
> {"chacha20", nil},
> {"chacha20-simd", nil},
> {"pcbc(aes)", nil},
> {"pcbc-aes-aesni", nil},
> {"fpu(pcbc(__aes))", nil},
> {"fpu(pcbc(__aes-aesni))", nil},
> {"pcbc(__aes)", nil},
> {"pcbc(__aes-aesni)", nil},
>
> They are all templates.
>
> {"xts(aes)", nil},
>
> xts() is a template.
>
> Note, starting with 4.9, you must use xts(ecb(aes)).

"xts(aes)" also works on upstream (4.15):

strcpy(addr.salg_type, "skcipher");
strcpy(addr.salg_name, "xts(aes)");

bind(3, {sa_family=0x26 /* AF_??? */, sa_data="skcipher\0\0\0\0\0\0"}, 88) = 0



> {"xts-aes-aesni", nil},
> {"ctr(aes)", nil},
>
> ctr() is a template
>
>
>
> In general, I would rather su

Re: x509 parsing bug + fuzzing crypto in the userspace

2017-11-24 Thread Dmitry Vyukov

On Fri, Nov 24, 2017 at 5:19 PM, Stephan Mueller <smuel...@chronox.de> wrote:
> Am Freitag, 24. November 2017, 17:10:59 CET schrieb Dmitry Vyukov:
>
> Hi Dmitry,
>
>> That's more-or-less what I did. Here:
>>
>> var allAlgs = map[int][]algDesc{
>>   ALG_AEAD: []algDesc{
>> // templates:
>> {"authencesn", []int{ALG_HASH, ALG_BLKCIPHER}},
>> {"gcm", []int{ALG_CIPHER}},
>>
>> ALG_AEAD means that all of these names produce ALG_AEAD as the result.
>> Names that has arguments after them (authencesn, gcm) are templates,
>> and the list of arguments denote number and types of arguments for the
>> template.
>>
>> So here authencesn is a template that produces AEAD and has 2
>> arguments: first is ALG_HASH, second is ALG_BLKCIPHER.
>> Similarly, gsm is template that produces AEAD and has 1 argument of type
>> CIPHER.
>>
>> ...
>> // algorithms:
>> {"gcm(aes)", nil},
>> {"__gcm-aes-aesni", nil},
>>
>> If second value is nil, that's a complete algorithm (also of type
>> AEAD). I pulled in all registered implementations, so the contain the
>> specialized implementations like "gcm(aes)", but note that gsm is also
>> described as template above so fuzzer can use other inner ALG_CIPHER
>> algorithms with gsm.
>
> Thanks for the clarification -- as said, I am not very fluent in GO yet. :-)
>>
>> > Start another test where you arbitrarily mix-n-match templates and ciphers
>> > or even bogus names, NULL, etc.
>> >
>> >
>> > One word of warning: some cipher names may look like templates, but they
>> > are not: gcm(aes) in arch/x86/crypto/ is a complete cipher and not
>> > consisting of templates. Thus, I would always use the driver names
>> > (gcm-aes-aesni for the given example) to ensure you test exactly the
>> > cipher you had in mind.
>> I've pulled all registered algs from kernel with the following code:
>>
>> void crypto_dumb(void)
>> {
>> struct crypto_alg *alg;
>> const char *type;
>>
>> down_write(_alg_sem);
>> list_for_each_entry(alg, _alg_list, cra_list) {
>> pr_err("name=%s driver=%s async=%d type=%d\n",
>> alg->cra_name, alg->cra_driver_name,
>> !!(alg->cra_flags & CRYPTO_ALG_ASYNC),
>> alg->cra_flags & CRYPTO_ALG_TYPE_MASK);
>> }
>> up_write(_alg_sem);
>> }
>
> I would not obtain the list from a running kernel. Many ciphers are
> implemented in modules that are loaded on demand (or sometimes even manually).
> This list therefore is not complete per definition. Thus, I would rather grep
> through the code and search for cra_driver_name.
>
> And once you get to templates, it is even more imperative to grep the code:
> Only full ciphers are listed in the given list. A template itself is never a
> cipher and will not show up there. Only once a template is combined into a
> full cipher, it will be registered in the list at runtime.
>
> Note, the traversed list is what forms /proc/crypto.
>
> Thus, after a boot, you will not see, say, rfc4106(gcm(aes)) in /proc/crypto
> (or that list). But after one allocation of that cipher, it suddenly pops up
> in /proc/crypto or that list.


I've enabled ALL crypto configs as not-modules. So I should have all
of them there (at least all x86 ones).
I've also dumped the templates the same way:

+void crypto_template_dumb(void)
+{
+   struct crypto_template *tmpl;
+
+   down_read(_alg_sem);
+   list_for_each_entry(tmpl, _template_list, list) {
+   pr_err("name=%s\n", tmpl->name);
+   }
+   up_read(_alg_sem);
+}


Eric also pointed me to grep. But I can't say the code is intuitive.
I've spent way more time than I expected to just get a list of all
algorithms with their types. Say, in some cases algorithm descriptions
do not specify their type, it's somehow plumbed somewhere later.
Getting the list at runtime allowed me to get relevant ones with
proper types.






>> so I've got these "gcm(aes)" as well. Which is why you see
>> {"gcm(aes)", nil} in algorithms section.
>>
>> However, the name was actually "__gcm-aes-aesni", not "gcm-aes-aesni".
>> But kernel does not let me allocate neither of them (EINVAL in both
>> cases).
>
> Very good. Please leave such test, because they must not be allocatable.
>
> Ciao
> Stephan

Re: x509 parsing bug + fuzzing crypto in the userspace

2017-11-24 Thread Dmitry Vyukov

On Fri, Nov 24, 2017 at 3:36 PM, Stephan Mueller <smuel...@chronox.de> wrote:
> Am Freitag, 24. November 2017, 14:49:49 CET schrieb Dmitry Vyukov:
>
> Hi Dmitry,
>
>> On Thu, Nov 23, 2017 at 1:35 PM, Stephan Mueller <smuel...@chronox.de>
> wrote:
>> > Am Donnerstag, 23. November 2017, 12:34:54 CET schrieb Dmitry Vyukov:
>> >
>> > Hi Dmitry,
>> >
>> >> Btw, I've started doing some minimal improvements, did not yet sorted
>> >> out alg types/names, and fuzzer started scratching surface:
>> >>
>> >> WARNING: kernel stack regs has bad 'bp' value 77 Nov 23 2017 12:29:36 CET
>> >> general protection fault in af_alg_free_areq_sgls 54 Nov 23 2017 12:23:30
>> >> CET general protection fault in crypto_chacha20_crypt 100 Nov 23 2017
>> >> 12:29:48 CET suspicious RCU usage at ./include/trace/events/kmem.h:LINE
>> >> 88
>> >> Nov 23 2017 12:29:15 CET
>> >
>> > This all looks strange. Where would RCU come into play with
>> > af_alg_free_areq_sgls?
>> >
>> > Do you have a reproducer?
>> >
>> >> This strongly suggests that we need to dig deeper.
>> >
>> > Absolutely. That is why I started my fuzzer that turned up already quite
>> > some issues.
>>
>> I've cooked syzkaller change that teaches it to generate more
>> algorithm names. Probably not idea, but much better than was before:
>> https://github.com/google/syzkaller/blob/ddf7b3e0655cf6dfeacfe509e477c1486d2
>> cc7db/sys/linux/alg.go (if you see any obvious issues there, feedback is
>> welcome,
>
> I will peek into that code shortly.
>
>> I still did not figure out completely difference between e.g.
>> HASH/AHASH,
>
> AHASH is the asynchronous hash. I.e. the implementation can sleep.
>
> HASH == SHASH and is the synchronous hash. I.e. that implementation will never
> sleep.
>
> An SHASH can be turned into an AHASH by using cryptd().
>
> An AHASH can never be turned into an SHASH.
>
> To use SHASH implementations, you use the *_shash API calls. This API does not
> require a request structure.
>
> To use AHASH implementations, you use the *_ahash API calls. This API requires
> the use of ahash_request_* calls. By transparently employing cryptd(), the
> kernel allows the use of SHASH implementations with the AHASH API.
>
> Currently there is only one real AHASH implementation outside specific
> hardware drivers: sha1_mb and sha2*_mb found in arch/x86/crypto/. This
> implementation can only be used with the AHASH API. All (other) SHASH
> implementations can be used with either the shash or the ahash API, because
> when using it as AHASH, the kernel automatically uses the cryptd() under the
> hood.


I am interested solely in user-space API because that's what fuzzer
uses. *_shash, ahash_request_* are of little help.
Your last sentence above means that there is _no_ difference between
HASH and AHASH from user-space?
I thrown all HASH/AHASH algs into a single bucket here:
https://github.com/google/syzkaller/blob/ddf7b3e0655cf6dfeacfe509e477c1486d2cc7db/sys/linux/alg.go
And similarly for BLKCIPHER/ABLKCIPHER.


Few additional questions:

1. just to double check: compress algs are not accessible from
user-space, right?

2. There is some setup for algorithms (ALG_SET_KEY,
ALG_SET_AEAD_AUTHSIZE setsockopts and ALG_SET_IV, ALG_SET_OP,
ALG_SET_AEAD_ASSOCLEN control messages are the ones that was able to
find). Now if I chain something complex like
gcm_base(ctr(aes-aesni),ghash-generic) (I don't know if algorithms
there require setup or not, but let's assume they do).  How do I setup
inner algorithms parameters (e.g. aes-aesni in this case)? Is there a
way to call setsockopt effectively on a particular inner alg? Or pass
control messages to an inner alg? Maybe I am asking non-sense, but
that's what comes to my mind looking at the api.




>
> Dto. BLKCIPHER is the synchronous and ABLKCIPHER is the asynchronous
> implementation. The same logic applies here as discussed for the SHASH/AHASH.
>
> There used to be a blkcipher and ablkcipher API just like the shash/ahash
> which follows exactly the same logic.
>
> About a year ago, the skcipher API was introduced which tries to get rid of
> the blkcipher API and supersedes the ablkcipher API. I.e. per default, the
> skcipher is asynchronous in nature. Yet, you can make it synchronous by
> supplying a specific flag/mask combo during crypto_skcipher_alloc.
>
> Again, BLKCIPHERS are turned into async implementations using the cryptd
> transparently by the kernel.
>
> PS: AF_ALG does *not* allow choosing the API discussed above. It always uses
> the async API (which implies that *all* cipher implementat

Re: x509 parsing bug + fuzzing crypto in the userspace

2017-11-24 Thread Dmitry Vyukov

On Fri, Nov 24, 2017 at 4:03 PM, Stephan Mueller <smuel...@chronox.de> wrote:
> Am Freitag, 24. November 2017, 14:49:49 CET schrieb Dmitry Vyukov:
>
> Hi Dmitry,
>
>> I've cooked syzkaller change that teaches it to generate more
>> algorithm names. Probably not idea, but much better than was before:
>> https://github.com/google/syzkaller/blob/ddf7b3e0655cf6dfeacfe509e477c1486d2
>> cc7db/sys/linux/alg.go
>
> I am not as fluent in GO, so I may miss a point here:
>
> {"authencesn", []int{ALG_HASH, ALG_BLKCIPHER}},
> {"authenc", []int{ALG_HASH, ALG_BLKCIPHER}},
>
> These both are templates to form an AEAD cipher. They allow you to specify the
> block cipher and the hash type.
>
>
> {"rfc7539esp", []int{ALG_BLKCIPHER, ALG_HASH}},
> {"rfc7539", []int{ALG_BLKCIPHER, ALG_HASH}},
> {"rfc4543", []int{ALG_AEAD}},
> {"rfc4106", []int{ALG_AEAD}},
>
> These are no ciphers per se, but simply formatting mechanisms. For example, to
> make use of rfc4106, you must split the IV: the first four bytes need to be
> appended to the key and the trailing 8 bytes are used as the IV. Any other
> formatting should cause an error. Besides, these implementations should only
> work with some AEAD ciphers like GCM.
>
>
> {"pcrypt", []int{ALG_AEAD}},
> {"rfc4309", []int{ALG_AEAD}},
> {"gcm", []int{ALG_CIPHER}},
> {"gcm_base", []int{ALG_BLKCIPHER, ALG_HASH}},
> {"ccm", []int{ALG_CIPHER}},
> {"ccm_base", []int{ALG_BLKCIPHER, ALG_HASH}},
>
>
>
>
> {"echainiv", []int{ALG_AEAD}},
> {"seqiv", []int{ALG_AEAD}},
>
> These are IV generators and should be used with an AEAD cipher to internally
> generate IVs. Note, the use of IV generators have changed with 4.2 (see my
> script I sent you as part of this thread).
>
>
>
> {"gcm(aes)", nil},
>
> gcm() is also a template that can consume any block cipher, including aes-
> generic, serpent, ...
>
> {"gcm_base(ctr(aes-aesni),ghash-generic)", nil},
>
> Again, ctr() is a template that can consume other block ciphers.
>
> {"generic-gcm-aesni", nil},
>
> Does this exist?
>
> {"rfc4106(gcm(aes))", nil},
>
> rfc... is a template that can consume AEAD ciphers.
>
> {"rfc4106-gcm-aesni", nil},
> {"__gcm-aes-aesni", nil},
> {"__driver-gcm-aes-aesni", nil},
>
> Please specifically test that __driver_... names should NEVER be allocatable
> from user space.
>
>
>
> // algorithms:
> {"cbc(aes)", nil},
>
> cbc() is a template
>
>
> {"cbc(aes-aesni)", nil},
> {"chacha20", nil},
> {"chacha20-simd", nil},
> {"pcbc(aes)", nil},
> {"pcbc-aes-aesni", nil},
> {"fpu(pcbc(__aes))", nil},
> {"fpu(pcbc(__aes-aesni))", nil},
> {"pcbc(__aes)", nil},
> {"pcbc(__aes-aesni)", nil},
>
> They are all templates.
>
> {"xts(aes)", nil},
>
> xts() is a template.
>
> Note, starting with 4.9, you must use xts(ecb(aes)).
>
> {"xts-aes-aesni", nil},
> {"ctr(aes)", nil},
>
> ctr() is a template
>
>
>
> In general, I would rather suggest:
>
> 1. identify all templates and mark them what they can consume (other
> templates, AEAD, skcipher, hash, ...)
>
> 2. identify all ciphers (aes, aes-generic, aes-aesni, ...) and identify their
> types (skcipher, hash)
>
> 3. mix-n-match templates with the ciphers according to what templates can
> consume


That's more-or-less what I did. Here:

var allAlgs = map[int][]algDesc{
  ALG_AEAD: []algDesc{
// templates:
{"authencesn", []int{ALG_HASH, ALG_BLKCIPHER}},
{"gcm", []int{ALG_CIPHER}},

ALG_AEAD means that all of these names produce ALG_AEAD as the result.
Names that has arguments after them (authencesn, gcm) are templates,
and the list of arguments denote number and types of arguments for the
template.

So here authencesn is a template that produces AEAD and has 2
arguments: first is ALG_HASH, second is ALG_BLKCIPHER.
Similarly, gsm is template that produces AEAD and has 1 argument of

Re: x509 parsing bug + fuzzing crypto in the userspace

2017-11-24 Thread Dmitry Vyukov

On Fri, Nov 24, 2017 at 4:13 PM, Stephan Mueller <smuel...@chronox.de> wrote:
> Am Freitag, 24. November 2017, 15:55:59 CET schrieb Dmitry Vyukov:
>
> Hi Dmitry,
>
>> On Fri, Nov 24, 2017 at 3:36 PM, Stephan Mueller <smuel...@chronox.de>
> wrote:
>> > Am Freitag, 24. November 2017, 14:49:49 CET schrieb Dmitry Vyukov:
>> >
>> > Hi Dmitry,
>> >
>> >> On Thu, Nov 23, 2017 at 1:35 PM, Stephan Mueller <smuel...@chronox.de>
>> >
>> > wrote:
>> >> > Am Donnerstag, 23. November 2017, 12:34:54 CET schrieb Dmitry Vyukov:
>> >> >
>> >> > Hi Dmitry,
>> >> >
>> >> >> Btw, I've started doing some minimal improvements, did not yet sorted
>> >> >> out alg types/names, and fuzzer started scratching surface:
>> >> >>
>> >> >> WARNING: kernel stack regs has bad 'bp' value 77 Nov 23 2017 12:29:36
>> >> >> CET
>> >> >> general protection fault in af_alg_free_areq_sgls 54 Nov 23 2017
>> >> >> 12:23:30
>> >> >> CET general protection fault in crypto_chacha20_crypt 100 Nov 23 2017
>> >> >> 12:29:48 CET suspicious RCU usage at
>> >> >> ./include/trace/events/kmem.h:LINE
>> >> >> 88
>> >> >> Nov 23 2017 12:29:15 CET
>> >> >
>> >> > This all looks strange. Where would RCU come into play with
>> >> > af_alg_free_areq_sgls?
>> >> >
>> >> > Do you have a reproducer?
>> >> >
>> >> >> This strongly suggests that we need to dig deeper.
>> >> >
>> >> > Absolutely. That is why I started my fuzzer that turned up already
>> >> > quite
>> >> > some issues.
>> >>
>> >> I've cooked syzkaller change that teaches it to generate more
>> >> algorithm names. Probably not idea, but much better than was before:
>> >> https://github.com/google/syzkaller/blob/ddf7b3e0655cf6dfeacfe509e477c148
>> >> 6d2 cc7db/sys/linux/alg.go (if you see any obvious issues there, feedback
>> >> is welcome,
>> >
>> > I will peek into that code shortly.
>> >
>> >> I still did not figure out completely difference between e.g.
>> >> HASH/AHASH,
>> >
>> > AHASH is the asynchronous hash. I.e. the implementation can sleep.
>> >
>> > HASH == SHASH and is the synchronous hash. I.e. that implementation will
>> > never sleep.
>> >
>> > An SHASH can be turned into an AHASH by using cryptd().
>> >
>> > An AHASH can never be turned into an SHASH.
>> >
>> > To use SHASH implementations, you use the *_shash API calls. This API does
>> > not require a request structure.
>> >
>> > To use AHASH implementations, you use the *_ahash API calls. This API
>> > requires the use of ahash_request_* calls. By transparently employing
>> > cryptd(), the kernel allows the use of SHASH implementations with the
>> > AHASH API.
>> >
>> > Currently there is only one real AHASH implementation outside specific
>> > hardware drivers: sha1_mb and sha2*_mb found in arch/x86/crypto/. This
>> > implementation can only be used with the AHASH API. All (other) SHASH
>> > implementations can be used with either the shash or the ahash API,
>> > because
>> > when using it as AHASH, the kernel automatically uses the cryptd() under
>> > the hood.
>>
>> I am interested solely in user-space API because that's what fuzzer
>> uses. *_shash, ahash_request_* are of little help.
>> Your last sentence above means that there is _no_ difference between
>> HASH and AHASH from user-space?
>
> Correct.
>
>> I thrown all HASH/AHASH algs into a single bucket here:
>> https://github.com/google/syzkaller/blob/ddf7b3e0655cf6dfeacfe509e477c1486d2
>> cc7db/sys/linux/alg.go And similarly for BLKCIPHER/ABLKCIPHER.
>
> This approach is correct.
>>
>>
>> Few additional questions:
>>
>> 1. just to double check: compress algs are not accessible from
>> user-space, right?
>
> Right, because there is no algif_acomp (yet).
>>
>> 2. There is some setup for algorithms (ALG_SET_KEY,
>> ALG_SET_AEAD_AUTHSIZE setsockopts and ALG_SET_IV, ALG_SET_OP,
>> ALG_SET_AEAD_ASSOCLEN control messages are the ones that was able to
>> find).
>
> ... and do not forget that you need to call the setup calls *before* the
> accept() call for an opera

Re: x509 parsing bug + fuzzing crypto in the userspace

2017-11-23 Thread Dmitry Vyukov

On Thu, Nov 23, 2017 at 10:32 AM, Dmitry Vyukov <dvyu...@google.com> wrote:
> On Wed, Nov 22, 2017 at 6:08 PM, Stephan Mueller <smuel...@chronox.de> wrote:
>> Am Mittwoch, 22. November 2017, 11:44:51 CET schrieb Dmitry Vyukov:
>>
>> Hi Dmitry,
>>
>>>
>>> Thanks! I think we can incorporate this into syzkaller.
>>>
>>> One question: what's the relation between alg names and type ("aead",
>>> "hash", "rng", "skcipher")?
>>
>> If you refer to AF_ALG, then the following applies:
>>
>> AF_ALG type of aead -> kernel crypto API: crypto_aead_*, aead_request_*
>>
>> AF_ALG type of skcipher -> kernel crypto API: crypto_skcipher_*,
>> skcipher_request_*
>>
>> AF_ALG type of hash -> kernel crypto API: crypto_ahash_*, ahash_request_*
>>
>> AF_ALG type of rng -> kernel crypto API: crypto_rng_*
>>
>>
>>> I remember I already looked at it before
>>> and could not figure it out. Are all algorithms and templates
>>> partitioned between types? Or they are orthogonal?
>>
>> If you refer to the cipher names, there are two types: templates and cipher
>> implementations. See [1] for details. [2] gives you some ideas of the
>> interrelationships between the templates and the ciphers.
>>
>> The relationship between the names and the AF_ALG names mentioned above is
>> defined with the .cra_flags in the cipher specification of each cipher
>> implementation. See [3] for details.
>>
>> Note, I started some fuzzing work in my libkcapi test application. See [4] 
>> for
>> the implementation.
>>
>> [1] 
>> http://www.chronox.de/crypto-API/crypto/architecture.html#crypto-api-cipher-references-and-priority
>>
>> [2] 
>> http://www.chronox.de/crypto-API/crypto/architecture.html#internal-structure-of-kernel-crypto-api
>>
>> [3] 
>> http://www.chronox.de/crypto-API/crypto/architecture.html#cipher-allocation-type-and-masks
>>
>> [4] https://github.com/smuellerDD/libkcapi/blob/master/test/kcapi-main.c line
>> 522
>
>
> I've read the links and starring at the code, but still can't get it.
> The question is about textual type names in sockaddr.
> .cra_flags does not specify textual names.
> [3] again talks about int flags rather than textual names.
>
> I see they are used here:
> http://www.chronox.de/crypto-API/crypto/userspace-if.html#aead-cipher-api
>
> but it merely says:
>
> .salg_type = "aead", /* this selects the symmetric cipher */
> .salg_name = "gcm(aes)" /* this is the cipher name */
>
> and does not explain why it is must be "aead" for  "gcm(aes)", nor why
> would "skcipher" fail for  "gcm(aes)" (would it?).
>
> These integer flags in sockaddr_alg.feat/mask seem to be better always
> be 0 (because they can only fail an otherwise passing bind, right?).
> But the textual type seems to matter, because bind first looks at type
> and then everything else happens as callbacks on type.
>
> I've found these guys:
>
> tatic const struct crypto_type crypto_skcipher_type2 = {
> .extsize = crypto_skcipher_extsize,
> .init_tfm = crypto_skcipher_init_tfm,
> .free = crypto_skcipher_free_instance,
> #ifdef CONFIG_PROC_FS
> .show = crypto_skcipher_show,
> #endif
> .report = crypto_skcipher_report,
> .maskclear = ~CRYPTO_ALG_TYPE_MASK,
> .maskset = CRYPTO_ALG_TYPE_BLKCIPHER_MASK,
> .type = CRYPTO_ALG_TYPE_SKCIPHER,
> .tfmsize = offsetof(struct crypto_skcipher, base),
> };
>
> But it still does not make sense to me.
>  CRYPTO_ALG_TYPE_SKCIPHER const is not mentioned in any actual algorithms.
> and CRYPTO_ALG_TYPE_BLKCIPHER_MASK is 0xc, which selects
> CRYPTO_ALG_TYPE_BLKCIPHER, CRYPTO_ALG_TYPE_KPP and
> CRYPTO_ALG_TYPE_RNG. And it looks like a random subset of
> constants


Also, there seems to be only 4 types ("aead", "hash", "rng",
"skcipher"), but more algorithm types. For example, how do I get
access to ACOMPRESS/SCOMPRESS?

Re: x509 parsing bug + fuzzing crypto in the userspace

2017-11-23 Thread Dmitry Vyukov

On Thu, Nov 23, 2017 at 12:10 PM, Stephan Mueller <smuel...@chronox.de> wrote:
> Am Donnerstag, 23. November 2017, 10:37:35 CET schrieb Dmitry Vyukov:
>
> Hi Dmitry,
>
>> >> I've read the links and starring at the code, but still can't get it.
>> >> The question is about textual type names in sockaddr.
>> >> .cra_flags does not specify textual names.
>> >> [3] again talks about int flags rather than textual names.
>> >>
>> >> I see they are used here:
>> >> http://www.chronox.de/crypto-API/crypto/userspace-if.html#aead-cipher-api
>> >>
>> >> but it merely says:
>> >> .salg_type = "aead", /* this selects the symmetric cipher */
>> >> .salg_name = "gcm(aes)" /* this is the cipher name */
>> >>
>> >> and does not explain why it is must be "aead" for  "gcm(aes)", nor why
>> >> would "skcipher" fail for  "gcm(aes)" (would it?).
>> >>
>> >> These integer flags in sockaddr_alg.feat/mask seem to be better always
>> >> be 0 (because they can only fail an otherwise passing bind, right?).
>> >> But the textual type seems to matter, because bind first looks at type
>> >> and then everything else happens as callbacks on type.
>> >>
>> >> I've found these guys:
>> >>
>> >> tatic const struct crypto_type crypto_skcipher_type2 = {
>> >> .extsize = crypto_skcipher_extsize,
>> >> .init_tfm = crypto_skcipher_init_tfm,
>> >> .free = crypto_skcipher_free_instance,
>> >> #ifdef CONFIG_PROC_FS
>> >> .show = crypto_skcipher_show,
>> >> #endif
>> >> .report = crypto_skcipher_report,
>> >> .maskclear = ~CRYPTO_ALG_TYPE_MASK,
>> >> .maskset = CRYPTO_ALG_TYPE_BLKCIPHER_MASK,
>> >> .type = CRYPTO_ALG_TYPE_SKCIPHER,
>> >> .tfmsize = offsetof(struct crypto_skcipher, base),
>> >> };
>> >>
>> >> But it still does not make sense to me.
>> >>
>> >>  CRYPTO_ALG_TYPE_SKCIPHER const is not mentioned in any actual
>> >>  algorithms.
>> >>
>> >> and CRYPTO_ALG_TYPE_BLKCIPHER_MASK is 0xc, which selects
>> >> CRYPTO_ALG_TYPE_BLKCIPHER, CRYPTO_ALG_TYPE_KPP and
>> >> CRYPTO_ALG_TYPE_RNG. And it looks like a random subset of
>> >> constants
>> >
>> > Also, there seems to be only 4 types ("aead", "hash", "rng",
>> > "skcipher"), but more algorithm types. For example, how do I get
>> > access to ACOMPRESS/SCOMPRESS?
>>
>> Looking at /proc/crypto confuses me even more:
>>
>> $ cat /proc/crypto  | grep type | sort | uniq
>> type : ablkcipher
>> type : aead
>> type : ahash
>> type : akcipher
>> type : blkcipher
>> type : cipher
>> type : compression
>> type : givcipher
>> type : rng
>> type : shash
>>
>> Now, there are 10 types. They partially intersect with the other
>> textual types (e.g. "aead"). But, say "skcipher" is not present in
>> /proc/crypto at all.
>
> The types that a cipher can implement is given in include/linux/crypto.h:
>
> /*
>  * Algorithm masks and types.
>  */
> #define CRYPTO_ALG_TYPE_MASK0x000f
> #define CRYPTO_ALG_TYPE_CIPHER  0x0001
> ...
>
> These types are resolved when the various crypto_alloc_* functions are
> invoked. For example
>
> static const struct crypto_type crypto_skcipher_type2 = {
> ...
> .type = CRYPTO_ALG_TYPE_SKCIPHER,
> ...
> };
>
> struct crypto_skcipher *crypto_alloc_skcipher(const char *alg_name,
>   u32 type, u32 mask)
> {
> return crypto_alloc_tfm(alg_name, _skcipher_type2, type, mask);
> }
>
> Thus, when you use crypto_alloc_skcipher, it will only "find" cipher
> implementations that are of type SKCIPHER (or ABLKCIPHER as both values are
> identical). I.e. even when you try to call crypto_alloc_skcipher("sha256"),
> the lookup code will not find it as the sha256 implementation is of type AHASH
> (or SHASH) and not SKCIPHER.
>
> The name you see in /proc/crypto are given with the respective report function
> call (e.g. crypto_skcipher_report for the aforementioned example). These names
> are just informative and not relevant at all for anything.
>
>
> When you come to the AF_ALG interface, th

Re: x509 parsing bug + fuzzing crypto in the userspace

2017-11-23 Thread Dmitry Vyukov

On Thu, Nov 23, 2017 at 10:35 AM, Dmitry Vyukov <dvyu...@google.com> wrote:
> On Thu, Nov 23, 2017 at 10:32 AM, Dmitry Vyukov <dvyu...@google.com> wrote:
>> On Wed, Nov 22, 2017 at 6:08 PM, Stephan Mueller <smuel...@chronox.de> wrote:
>>> Am Mittwoch, 22. November 2017, 11:44:51 CET schrieb Dmitry Vyukov:
>>>
>>> Hi Dmitry,
>>>
>>>>
>>>> Thanks! I think we can incorporate this into syzkaller.
>>>>
>>>> One question: what's the relation between alg names and type ("aead",
>>>> "hash", "rng", "skcipher")?
>>>
>>> If you refer to AF_ALG, then the following applies:
>>>
>>> AF_ALG type of aead -> kernel crypto API: crypto_aead_*, aead_request_*
>>>
>>> AF_ALG type of skcipher -> kernel crypto API: crypto_skcipher_*,
>>> skcipher_request_*
>>>
>>> AF_ALG type of hash -> kernel crypto API: crypto_ahash_*, ahash_request_*
>>>
>>> AF_ALG type of rng -> kernel crypto API: crypto_rng_*
>>>
>>>
>>>> I remember I already looked at it before
>>>> and could not figure it out. Are all algorithms and templates
>>>> partitioned between types? Or they are orthogonal?
>>>
>>> If you refer to the cipher names, there are two types: templates and cipher
>>> implementations. See [1] for details. [2] gives you some ideas of the
>>> interrelationships between the templates and the ciphers.
>>>
>>> The relationship between the names and the AF_ALG names mentioned above is
>>> defined with the .cra_flags in the cipher specification of each cipher
>>> implementation. See [3] for details.
>>>
>>> Note, I started some fuzzing work in my libkcapi test application. See [4] 
>>> for
>>> the implementation.
>>>
>>> [1] 
>>> http://www.chronox.de/crypto-API/crypto/architecture.html#crypto-api-cipher-references-and-priority
>>>
>>> [2] 
>>> http://www.chronox.de/crypto-API/crypto/architecture.html#internal-structure-of-kernel-crypto-api
>>>
>>> [3] 
>>> http://www.chronox.de/crypto-API/crypto/architecture.html#cipher-allocation-type-and-masks
>>>
>>> [4] https://github.com/smuellerDD/libkcapi/blob/master/test/kcapi-main.c 
>>> line
>>> 522
>>
>>
>> I've read the links and starring at the code, but still can't get it.
>> The question is about textual type names in sockaddr.
>> .cra_flags does not specify textual names.
>> [3] again talks about int flags rather than textual names.
>>
>> I see they are used here:
>> http://www.chronox.de/crypto-API/crypto/userspace-if.html#aead-cipher-api
>>
>> but it merely says:
>>
>> .salg_type = "aead", /* this selects the symmetric cipher */
>> .salg_name = "gcm(aes)" /* this is the cipher name */
>>
>> and does not explain why it is must be "aead" for  "gcm(aes)", nor why
>> would "skcipher" fail for  "gcm(aes)" (would it?).
>>
>> These integer flags in sockaddr_alg.feat/mask seem to be better always
>> be 0 (because they can only fail an otherwise passing bind, right?).
>> But the textual type seems to matter, because bind first looks at type
>> and then everything else happens as callbacks on type.
>>
>> I've found these guys:
>>
>> tatic const struct crypto_type crypto_skcipher_type2 = {
>> .extsize = crypto_skcipher_extsize,
>> .init_tfm = crypto_skcipher_init_tfm,
>> .free = crypto_skcipher_free_instance,
>> #ifdef CONFIG_PROC_FS
>> .show = crypto_skcipher_show,
>> #endif
>> .report = crypto_skcipher_report,
>> .maskclear = ~CRYPTO_ALG_TYPE_MASK,
>> .maskset = CRYPTO_ALG_TYPE_BLKCIPHER_MASK,
>> .type = CRYPTO_ALG_TYPE_SKCIPHER,
>> .tfmsize = offsetof(struct crypto_skcipher, base),
>> };
>>
>> But it still does not make sense to me.
>>  CRYPTO_ALG_TYPE_SKCIPHER const is not mentioned in any actual algorithms.
>> and CRYPTO_ALG_TYPE_BLKCIPHER_MASK is 0xc, which selects
>> CRYPTO_ALG_TYPE_BLKCIPHER, CRYPTO_ALG_TYPE_KPP and
>> CRYPTO_ALG_TYPE_RNG. And it looks like a random subset of
>> constants
>
>
> Also, there seems to be only 4 types ("aead", "hash", "rng",
> "skcipher"), but more algorithm types. For example, how do I get
> access to ACOMPRESS/SCOMPRESS?

Looking at /proc/crypto confuses me even more:

$ cat /proc/crypto  | grep type | sort | uniq
type : ablkcipher
type : aead
type : ahash
type : akcipher
type : blkcipher
type : cipher
type : compression
type : givcipher
type : rng
type : shash

Now, there are 10 types. They partially intersect with the other
textual types (e.g. "aead"). But, say "skcipher" is not present in
/proc/crypto at all.

Re: x509 parsing bug + fuzzing crypto in the userspace

2017-11-23 Thread Dmitry Vyukov

On Thu, Nov 23, 2017 at 12:27 PM, Dmitry Vyukov <dvyu...@google.com> wrote:
>>
>> Hi Dmitry,
>>
>>> >> I've read the links and starring at the code, but still can't get it.
>>> >> The question is about textual type names in sockaddr.
>>> >> .cra_flags does not specify textual names.
>>> >> [3] again talks about int flags rather than textual names.
>>> >>
>>> >> I see they are used here:
>>> >> http://www.chronox.de/crypto-API/crypto/userspace-if.html#aead-cipher-api
>>> >>
>>> >> but it merely says:
>>> >> .salg_type = "aead", /* this selects the symmetric cipher */
>>> >> .salg_name = "gcm(aes)" /* this is the cipher name */
>>> >>
>>> >> and does not explain why it is must be "aead" for  "gcm(aes)", nor why
>>> >> would "skcipher" fail for  "gcm(aes)" (would it?).
>>> >>
>>> >> These integer flags in sockaddr_alg.feat/mask seem to be better always
>>> >> be 0 (because they can only fail an otherwise passing bind, right?).
>>> >> But the textual type seems to matter, because bind first looks at type
>>> >> and then everything else happens as callbacks on type.
>>> >>
>>> >> I've found these guys:
>>> >>
>>> >> tatic const struct crypto_type crypto_skcipher_type2 = {
>>> >> .extsize = crypto_skcipher_extsize,
>>> >> .init_tfm = crypto_skcipher_init_tfm,
>>> >> .free = crypto_skcipher_free_instance,
>>> >> #ifdef CONFIG_PROC_FS
>>> >> .show = crypto_skcipher_show,
>>> >> #endif
>>> >> .report = crypto_skcipher_report,
>>> >> .maskclear = ~CRYPTO_ALG_TYPE_MASK,
>>> >> .maskset = CRYPTO_ALG_TYPE_BLKCIPHER_MASK,
>>> >> .type = CRYPTO_ALG_TYPE_SKCIPHER,
>>> >> .tfmsize = offsetof(struct crypto_skcipher, base),
>>> >> };
>>> >>
>>> >> But it still does not make sense to me.
>>> >>
>>> >>  CRYPTO_ALG_TYPE_SKCIPHER const is not mentioned in any actual
>>> >>  algorithms.
>>> >>
>>> >> and CRYPTO_ALG_TYPE_BLKCIPHER_MASK is 0xc, which selects
>>> >> CRYPTO_ALG_TYPE_BLKCIPHER, CRYPTO_ALG_TYPE_KPP and
>>> >> CRYPTO_ALG_TYPE_RNG. And it looks like a random subset of
>>> >> constants
>>> >
>>> > Also, there seems to be only 4 types ("aead", "hash", "rng",
>>> > "skcipher"), but more algorithm types. For example, how do I get
>>> > access to ACOMPRESS/SCOMPRESS?
>>>
>>> Looking at /proc/crypto confuses me even more:
>>>
>>> $ cat /proc/crypto  | grep type | sort | uniq
>>> type : ablkcipher
>>> type : aead
>>> type : ahash
>>> type : akcipher
>>> type : blkcipher
>>> type : cipher
>>> type : compression
>>> type : givcipher
>>> type : rng
>>> type : shash
>>>
>>> Now, there are 10 types. They partially intersect with the other
>>> textual types (e.g. "aead"). But, say "skcipher" is not present in
>>> /proc/crypto at all.
>>
>> The types that a cipher can implement is given in include/linux/crypto.h:
>>
>> /*
>>  * Algorithm masks and types.
>>  */
>> #define CRYPTO_ALG_TYPE_MASK0x000f
>> #define CRYPTO_ALG_TYPE_CIPHER  0x0001
>> ...
>>
>> These types are resolved when the various crypto_alloc_* functions are
>> invoked. For example
>>
>> static const struct crypto_type crypto_skcipher_type2 = {
>> ...
>> .type = CRYPTO_ALG_TYPE_SKCIPHER,
>> ...
>> };
>>
>> struct crypto_skcipher *crypto_alloc_skcipher(const char *alg_name,
>>   u32 type, u32 mask)
>> {
>> return crypto_alloc_tfm(alg_name, _skcipher_type2, type, 
>> mask);
>> }
>>
>> Thus, when you use crypto_alloc_skcipher, it will only "find" cipher
>> implementations that are of type SKCIPHER (or ABLKCIPHER as both values are
>> identical). I.e. even when you try to call crypto_alloc_skcipher("sha256"),
>> the lookup code will not find it as the sha256 implementation is of type 
>>

Re: x509 parsing bug + fuzzing crypto in the userspace

2017-11-23 Thread Dmitry Vyukov

On Wed, Nov 22, 2017 at 6:08 PM, Stephan Mueller <smuel...@chronox.de> wrote:
> Am Mittwoch, 22. November 2017, 11:44:51 CET schrieb Dmitry Vyukov:
>
> Hi Dmitry,
>
>>
>> Thanks! I think we can incorporate this into syzkaller.
>>
>> One question: what's the relation between alg names and type ("aead",
>> "hash", "rng", "skcipher")?
>
> If you refer to AF_ALG, then the following applies:
>
> AF_ALG type of aead -> kernel crypto API: crypto_aead_*, aead_request_*
>
> AF_ALG type of skcipher -> kernel crypto API: crypto_skcipher_*,
> skcipher_request_*
>
> AF_ALG type of hash -> kernel crypto API: crypto_ahash_*, ahash_request_*
>
> AF_ALG type of rng -> kernel crypto API: crypto_rng_*
>
>
>> I remember I already looked at it before
>> and could not figure it out. Are all algorithms and templates
>> partitioned between types? Or they are orthogonal?
>
> If you refer to the cipher names, there are two types: templates and cipher
> implementations. See [1] for details. [2] gives you some ideas of the
> interrelationships between the templates and the ciphers.
>
> The relationship between the names and the AF_ALG names mentioned above is
> defined with the .cra_flags in the cipher specification of each cipher
> implementation. See [3] for details.
>
> Note, I started some fuzzing work in my libkcapi test application. See [4] for
> the implementation.
>
> [1] 
> http://www.chronox.de/crypto-API/crypto/architecture.html#crypto-api-cipher-references-and-priority
>
> [2] 
> http://www.chronox.de/crypto-API/crypto/architecture.html#internal-structure-of-kernel-crypto-api
>
> [3] 
> http://www.chronox.de/crypto-API/crypto/architecture.html#cipher-allocation-type-and-masks
>
> [4] https://github.com/smuellerDD/libkcapi/blob/master/test/kcapi-main.c line
> 522


I've read the links and starring at the code, but still can't get it.
The question is about textual type names in sockaddr.
.cra_flags does not specify textual names.
[3] again talks about int flags rather than textual names.

I see they are used here:
http://www.chronox.de/crypto-API/crypto/userspace-if.html#aead-cipher-api

but it merely says:

.salg_type = "aead", /* this selects the symmetric cipher */
.salg_name = "gcm(aes)" /* this is the cipher name */

and does not explain why it is must be "aead" for  "gcm(aes)", nor why
would "skcipher" fail for  "gcm(aes)" (would it?).

These integer flags in sockaddr_alg.feat/mask seem to be better always
be 0 (because they can only fail an otherwise passing bind, right?).
But the textual type seems to matter, because bind first looks at type
and then everything else happens as callbacks on type.

I've found these guys:

tatic const struct crypto_type crypto_skcipher_type2 = {
.extsize = crypto_skcipher_extsize,
.init_tfm = crypto_skcipher_init_tfm,
.free = crypto_skcipher_free_instance,
#ifdef CONFIG_PROC_FS
.show = crypto_skcipher_show,
#endif
.report = crypto_skcipher_report,
.maskclear = ~CRYPTO_ALG_TYPE_MASK,
.maskset = CRYPTO_ALG_TYPE_BLKCIPHER_MASK,
.type = CRYPTO_ALG_TYPE_SKCIPHER,
.tfmsize = offsetof(struct crypto_skcipher, base),
};

But it still does not make sense to me.
 CRYPTO_ALG_TYPE_SKCIPHER const is not mentioned in any actual algorithms.
and CRYPTO_ALG_TYPE_BLKCIPHER_MASK is 0xc, which selects
CRYPTO_ALG_TYPE_BLKCIPHER, CRYPTO_ALG_TYPE_KPP and
CRYPTO_ALG_TYPE_RNG. And it looks like a random subset of
constants

Re: x509 parsing bug + fuzzing crypto in the userspace

2017-11-22 Thread Dmitry Vyukov

On Wed, Nov 22, 2017 at 5:54 PM, Stephan Mueller  wrote:
> Am Dienstag, 21. November 2017, 21:46:28 CET schrieb Eric Biggers:
>
> Hi Eric,
>
>>
>> (There is probably more to improve for AF_ALG besides the algorithm names;
>> this is just what I happened to notice for now.)
>
> Just grepping may not cover all possibilities. Attached is a script that I use
> to invoke an array different tests for different cipher implementations. For
> now, it only covers C, ASM and CPU-based cipher implementations.

Hi Stephan,

I see it has lots of names hardcoded. Is it possible to extract
up-to-date list from kernel? Maybe at runtime from running kernel?

What's the max number of arguments for a template? I see there is at least 2:
  rfc4106(gcm_base(ctr(aes-aesni),ghash-clmulni))
can there be more?

Do you know answer to this question by any chance?
what's the relation between alg names and type ("aead", "hash", "rng",
"skcipher")? I remember I already looked at it before and could not
figure it out. Are all algorithms and templates partitioned between
types? Or they are orthogonal?

Thanks

Re: BUG: unable to handle kernel NULL pointer dereference in kfree

2017-11-29 Thread Dmitry Vyukov

On Wed, Nov 29, 2017 at 11:24 AM, syzbot

wrote:
> Hello,
>
> syzkaller hit the following crash on
> 43570f0383d6d5879ae585e6c3cf027ba321546f
> git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/master
> compiler: gcc (GCC) 7.1.1 20170620
> .config is attached
> Raw console output is attached.
>
> Unfortunately, I don't have any reproducer for this bug yet.
>
>
> netlink: 3 bytes leftover after parsing attributes in process
> `syz-executor3'.
> device gre0 entered promiscuous mode
> BUG: unable to handle kernel NULL pointer dereference at 0074
> IP: virt_to_cache mm/slab.c:400 [inline]
> IP: kfree+0xb2/0x250 mm/slab.c:3802
> PGD 1d369e067 P4D 1d369e067 PUD 1c8da0067 PMD 0
> Oops:  [#1] SMP KASAN
> Dumping ftrace buffer:
>(ftrace buffer empty)
> Modules linked in:
> CPU: 0 PID: 8031 Comm: syz-executor5 Not tainted 4.15.0-rc1+ #199
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
> Google 01/01/2011
> task: 8801d33f8540 task.stack: 8801d3b58000
> RIP: 0010:virt_to_cache mm/slab.c:400 [inline]
> RIP: 0010:kfree+0xb2/0x250 mm/slab.c:3802
> RSP: 0018:8801d3b5f780 EFLAGS: 00010046
> RAX:  RBX: 8801d3b5f948 RCX: 
> RDX: ea00074ed7c0 RSI:  RDI: 8801d3b5f948
> RBP: 8801d3b5f7a0 R08: ed003a2c3b7c R09: 
> R10: 0001 R11: ed003a2c3b7b R12: 0286
> R13:  R14: 8801d3b5f948 R15: 8801d3b5f8b0
> FS:  7f7f80409700() GS:8801db40() knlGS:
> CS:  0010 DS:  ES:  CR0: 80050033
> CR2: 0074 CR3: 0001c97a2000 CR4: 001426f0
> DR0: 20001000 DR1:  DR2: 
> DR3:  DR6: 0ff0 DR7: 0600
> Call Trace:
>  blkcipher_walk_done+0x72b/0xde0 crypto/blkcipher.c:139
>  encrypt+0x50a/0xaf0 crypto/salsa20_generic.c:208
>  skcipher_crypt_blkcipher crypto/skcipher.c:622 [inline]
>  skcipher_decrypt_blkcipher+0x213/0x310 crypto/skcipher.c:640
>  crypto_skcipher_decrypt include/crypto/skcipher.h:463 [inline]
>  _skcipher_recvmsg crypto/algif_skcipher.c:144 [inline]
>  skcipher_recvmsg+0xa54/0xf20 crypto/algif_skcipher.c:165
>  sock_recvmsg_nosec net/socket.c:805 [inline]
>  sock_recvmsg+0xc9/0x110 net/socket.c:812
>  ___sys_recvmsg+0x29b/0x630 net/socket.c:2207
>  __sys_recvmsg+0xe2/0x210 net/socket.c:2252
>  SYSC_recvmsg net/socket.c:2264 [inline]
>  SyS_recvmsg+0x2d/0x50 net/socket.c:2259
>  entry_SYSCALL_64_fastpath+0x1f/0x96
> RIP: 0033:0x4529d9
> RSP: 002b:7f7f80408c58 EFLAGS: 0212 ORIG_RAX: 002f
> RAX: ffda RBX: 00758190 RCX: 004529d9
> RDX: 0002 RSI: 2022efc8 RDI: 0018
> RBP: 001a R08:  R09: 
> R10:  R11: 0212 R12: 006ed310
> R13:  R14: 7f7f804096d4 R15: 0002
> Code: c2 48 b8 00 00 00 00 00 ea ff ff 48 89 df 48 c1 ea 0c 48 c1 e2 06 48
> 01 c2 48 8b 42 20 48 8d 48 ff a8 01 48 0f 45 d1 4c 8b 6a 30 <49> 63 75 74 e8
> 65 75 af ff 48 89 de 4c 89 ef 4c 8b 75 08 e8 06
> RIP: virt_to_cache mm/slab.c:400 [inline] RSP: 8801d3b5f780
> RIP: kfree+0xb2/0x250 mm/slab.c:3802 RSP: 8801d3b5f780
> CR2: 0074
> ---[ end trace e41183aa3c5416f4 ]---


I think this was misattributed to mm. So +crypto maintainers, mm
maintainers to bcc.
Original attachments are here:
https://groups.google.com/forum/#!msg/syzkaller-bugs/bQmKHgXOQyc/ODs0diBkAgAJ
I've also filed an issue to fix such misattribution in future:
https://github.com/google/syzkaller/issues/446



> ---
> This bug is generated by a dumb bot. It may contain errors.
> See https://goo.gl/tpsmEJ for details.
> Direct all questions to syzkal...@googlegroups.com.
> Please credit me with: Reported-by: syzbot 
>
> syzbot will keep track of this bug report.
> Once a fix for this bug is committed, please reply to this email with:
> #syz fix: exact-commit-title
> To mark this as a duplicate of another syzbot report, please reply with:
> #syz dup: exact-subject-of-another-report
> If it's a one-off invalid bug report, please reply with:
> #syz invalid
> Note: if the crash happens again, it will cause creation of a new bug
> report.
> Note: all commands must start from beginning of the line in the email body.

Re: INFO: task hung in lock_sock_nested

2017-12-12 Thread Dmitry Vyukov

On Sun, Dec 10, 2017 at 2:37 PM, syzbot

wrote:
> Hello,
>
> syzkaller hit the following crash on
> 51e18a453f5f59a40c721d4aeab082b4e2e9fac6
> git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git/master
> compiler: gcc (GCC) 7.1.1 20170620
> .config is attached
> Raw console output is attached.
>
> Unfortunately, I don't have any reproducer for this bug yet.
>
>
> RDS: rds_bind could not find a transport for 172.20.1.187, load rds_tcp or
> rds_rdma?
> INFO: task syz-executor2:19495 blocked for more than 120 seconds.
>   Not tainted 4.15.0-rc2+ #148
> "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> syz-executor2   D25200 19495   3406 0x0004
> Call Trace:
>  context_switch kernel/sched/core.c:2799 [inline]
>  __schedule+0x8eb/0x2060 kernel/sched/core.c:3375
>  schedule+0xf5/0x430 kernel/sched/core.c:3434
>  __lock_sock+0x1dc/0x2f0 net/core/sock.c:2240
>  lock_sock_nested+0xf3/0x110 net/core/sock.c:2764
>  lock_sock include/net/sock.h:1461 [inline]
>  af_alg_sendmsg+0x349/0x1080 crypto/af_alg.c:858
>  aead_sendmsg+0x103/0x150 crypto/algif_aead.c:76
>  sock_sendmsg_nosec net/socket.c:636 [inline]
>  sock_sendmsg+0xca/0x110 net/socket.c:646
>  ___sys_sendmsg+0x75b/0x8a0 net/socket.c:2026
>  __sys_sendmsg+0xe5/0x210 net/socket.c:2060
>  SYSC_sendmsg net/socket.c:2071 [inline]
>  SyS_sendmsg+0x2d/0x50 net/socket.c:2067
>  entry_SYSCALL_64_fastpath+0x1f/0x96
> RIP: 0033:0x452a39
> RSP: 002b:7f63f58cfc58 EFLAGS: 0212 ORIG_RAX: 002e
> RAX: ffda RBX: 00758020 RCX: 00452a39
> RDX: 0040 RSI: 2063 RDI: 0015
> RBP: 0001 R08:  R09: 
> R10:  R11: 0212 R12: 006ee0b8
> R13:  R14: 7f63f58d06d4 R15: 
>
> Showing all locks held in the system:
> 2 locks held by khungtaskd/663:
>  #0:  (rcu_read_lock){}, at: []
> check_hung_uninterruptible_tasks kernel/hung_task.c:175 [inline]
>  #0:  (rcu_read_lock){}, at: [] watchdog+0x1c5/0xd60
> kernel/hung_task.c:249
>  #1:  (tasklist_lock){.+.+}, at: []
> debug_show_all_locks+0xd3/0x400 kernel/locking/lockdep.c:4554
> 1 lock held by rsyslogd/3008:
>  #0:  (>f_pos_lock){+.+.}, at: []
> __fdget_pos+0x131/0x1a0 fs/file.c:770
> 2 locks held by getty/3130:
>  #0:  (>ldisc_sem){}, at: []
> ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
>  #1:  (>atomic_read_lock){+.+.}, at: [<2bd4e259>]
> n_tty_read+0x2f2/0x1a10 drivers/tty/n_tty.c:2131
> 2 locks held by getty/3131:
>  #0:  (>ldisc_sem){}, at: []
> ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
>  #1:  (>atomic_read_lock){+.+.}, at: [<2bd4e259>]
> n_tty_read+0x2f2/0x1a10 drivers/tty/n_tty.c:2131
> 2 locks held by getty/3132:
>  #0:  (>ldisc_sem){}, at: []
> ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
>  #1:  (>atomic_read_lock){+.+.}, at: [<2bd4e259>]
> n_tty_read+0x2f2/0x1a10 drivers/tty/n_tty.c:2131
> 2 locks held by getty/3133:
>  #0:  (>ldisc_sem){}, at: []
> ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
>  #1:  (>atomic_read_lock){+.+.}, at: [<2bd4e259>]
> n_tty_read+0x2f2/0x1a10 drivers/tty/n_tty.c:2131
> 2 locks held by getty/3134:
>  #0:  (>ldisc_sem){}, at: []
> ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
>  #1:  (>atomic_read_lock){+.+.}, at: [<2bd4e259>]
> n_tty_read+0x2f2/0x1a10 drivers/tty/n_tty.c:2131
> 2 locks held by getty/3135:
>  #0:  (>ldisc_sem){}, at: []
> ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
>  #1:  (>atomic_read_lock){+.+.}, at: [<2bd4e259>]
> n_tty_read+0x2f2/0x1a10 drivers/tty/n_tty.c:2131
> 2 locks held by getty/3136:
>  #0:  (>ldisc_sem){}, at: []
> ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
>  #1:  (>atomic_read_lock){+.+.}, at: [<2bd4e259>]
> n_tty_read+0x2f2/0x1a10 drivers/tty/n_tty.c:2131
> 1 lock held by syz-executor2/19506:
>  #0:  (sk_lock-AF_ALG){+.+.}, at: [] lock_sock
> include/net/sock.h:1461 [inline]
>  #0:  (sk_lock-AF_ALG){+.+.}, at: []
> aead_recvmsg+0xb3/0x1bc0 crypto/algif_aead.c:327
>
> =
>
> NMI backtrace for cpu 1
> CPU: 1 PID: 663 Comm: khungtaskd Not tainted 4.15.0-rc2+ #148
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
> Google 01/01/2011
> Call Trace:
>  __dump_stack lib/dump_stack.c:17 [inline]
>  dump_stack+0x194/0x257 lib/dump_stack.c:53
>  nmi_cpu_backtrace+0x1d2/0x210 lib/nmi_backtrace.c:103
>  nmi_trigger_cpumask_backtrace+0x122/0x180 lib/nmi_backtrace.c:62
>  arch_trigger_cpumask_backtrace+0x14/0x20

Re: INFO: task hung in aead_recvmsg

2017-12-12 Thread Dmitry Vyukov

On Sun, Dec 10, 2017 at 2:34 PM, syzbot

wrote:
> Hello,
>
> syzkaller hit the following crash on
> ad4dac17f9d563b9e34aab78a34293b10993e9b5
> git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/master
> compiler: gcc (GCC) 7.1.1 20170620
> .config is attached
> Raw console output is attached.
>
> Unfortunately, I don't have any reproducer for this bug yet.
>
>
> Use struct sctp_assoc_value instead
> INFO: task syz-executor6:7377 blocked for more than 120 seconds.
>   Not tainted 4.15.0-rc2-next-20171208+ #63
> "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> syz-executor6   D24416  7377   3393 0x0004
> Call Trace:
>  context_switch kernel/sched/core.c:2800 [inline]
>  __schedule+0x8eb/0x2060 kernel/sched/core.c:3376
>  schedule+0xf5/0x430 kernel/sched/core.c:3435
>  schedule_timeout+0x43a/0x560 kernel/time/timer.c:1776
>  do_wait_for_common kernel/sched/completion.c:91 [inline]
>  __wait_for_common kernel/sched/completion.c:112 [inline]
>  wait_for_common kernel/sched/completion.c:123 [inline]
>  wait_for_completion+0x44b/0x7b0 kernel/sched/completion.c:144
>  crypto_wait_req include/linux/crypto.h:496 [inline]
>  _aead_recvmsg crypto/algif_aead.c:308 [inline]
>  aead_recvmsg+0x1396/0x1bc0 crypto/algif_aead.c:329
>  aead_recvmsg_nokey+0x60/0x80 crypto/algif_aead.c:447
>  sock_recvmsg_nosec net/socket.c:809 [inline]
>  sock_recvmsg+0xc9/0x110 net/socket.c:816
>  ___sys_recvmsg+0x29b/0x630 net/socket.c:2185
>  __sys_recvmsg+0xe2/0x210 net/socket.c:2230
>  SYSC_recvmsg net/socket.c:2242 [inline]
>  SyS_recvmsg+0x2d/0x50 net/socket.c:2237
>  entry_SYSCALL_64_fastpath+0x1f/0x96
> RIP: 0033:0x452a39
> RSP: 002b:7f9dc7c93c58 EFLAGS: 0212 ORIG_RAX: 002f
> RAX: ffda RBX: 7f9dc7c94700 RCX: 00452a39
> RDX:  RSI: 20539fc8 RDI: 0025
> RBP:  R08:  R09: 
> R10:  R11: 0212 R12: 
> R13: 00a6f7ff R14: 7f9dc7c949c0 R15: 
>
> Showing all locks held in the system:
> 2 locks held by khungtaskd/671:
>  #0:  (rcu_read_lock){}, at: []
> check_hung_uninterruptible_tasks kernel/hung_task.c:175 [inline]
>  #0:  (rcu_read_lock){}, at: [] watchdog+0x1c5/0xd60
> kernel/hung_task.c:249
>  #1:  (tasklist_lock){.+.+}, at: []
> debug_show_all_locks+0xd3/0x400 kernel/locking/lockdep.c:4554
> 1 lock held by rsyslogd/2995:
>  #0:  (>f_pos_lock){+.+.}, at: [<34bb33fc>]
> __fdget_pos+0x131/0x1a0 fs/file.c:765
> 2 locks held by getty/3116:
>  #0:  (>ldisc_sem){}, at: [<8df66e53>]
> ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
>  #1:  (>atomic_read_lock){+.+.}, at: [<870ebf25>]
> n_tty_read+0x2f2/0x1a10 drivers/tty/n_tty.c:2131
> 2 locks held by getty/3117:
>  #0:  (>ldisc_sem){}, at: [<8df66e53>]
> ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
>  #1:  (>atomic_read_lock){+.+.}, at: [<870ebf25>]
> n_tty_read+0x2f2/0x1a10 drivers/tty/n_tty.c:2131
> 2 locks held by getty/3118:
>  #0:  (>ldisc_sem){}, at: [<8df66e53>]
> ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
>  #1:  (>atomic_read_lock){+.+.}, at: [<870ebf25>]
> n_tty_read+0x2f2/0x1a10 drivers/tty/n_tty.c:2131
> 2 locks held by getty/3119:
>  #0:  (>ldisc_sem){}, at: [<8df66e53>]
> ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
>  #1:  (>atomic_read_lock){+.+.}, at: [<870ebf25>]
> n_tty_read+0x2f2/0x1a10 drivers/tty/n_tty.c:2131
> 2 locks held by getty/3120:
>  #0:  (>ldisc_sem){}, at: [<8df66e53>]
> ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
>  #1:  (>atomic_read_lock){+.+.}, at: [<870ebf25>]
> n_tty_read+0x2f2/0x1a10 drivers/tty/n_tty.c:2131
> 2 locks held by getty/3121:
>  #0:  (>ldisc_sem){}, at: [<8df66e53>]
> ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
>  #1:  (>atomic_read_lock){+.+.}, at: [<870ebf25>]
> n_tty_read+0x2f2/0x1a10 drivers/tty/n_tty.c:2131
> 2 locks held by getty/3122:
>  #0:  (>ldisc_sem){}, at: [<8df66e53>]
> ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
>  #1:  (>atomic_read_lock){+.+.}, at: [<870ebf25>]
> n_tty_read+0x2f2/0x1a10 drivers/tty/n_tty.c:2131
> 1 lock held by syz-executor6/7377:
>  #0:  (sk_lock-AF_ALG){+.+.}, at: [<96d0e030>] lock_sock
> include/net/sock.h:1463 [inline]
>  #0:  (sk_lock-AF_ALG){+.+.}, at: [<96d0e030>]
> aead_recvmsg+0xb3/0x1bc0 crypto/algif_aead.c:327
> 1 lock held by syz-executor6/7391:
>  #0:  (sk_lock-AF_ALG){+.+.}, at: [<96d0e030>] lock_sock
> include/net/sock.h:1463 [inline]
>  #0:  (sk_lock-AF_ALG){+.+.}, at: [<96d0e030>]
> aead_recvmsg+0xb3/0x1bc0 crypto/algif_aead.c:327
>
> =
>

Re: KASAN: use-after-free Read in crypto_aead_free_instance

2017-12-20 Thread Dmitry Vyukov

On Wed, Dec 20, 2017 at 10:17 AM, Stephan Müller  wrote:
> Am Mittwoch, 20. Dezember 2017, 08:48:01 CET schrieb syzbot:
>
> Hi,
>
>> Hello,
>>
>> syzkaller hit the following crash on
>> 032b4cc8ff84490c4bc7c4ef8c91e6d83a637538
>> git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/master
>> compiler: gcc (GCC) 7.1.1 20170620
>> .config is attached
>> Raw console output is attached.
>> C reproducer is attached
>> syzkaller reproducer is attached. See https://goo.gl/kgGztJ
>> for information about syzkaller reproducers
>>
>>
>> ==
>> BUG: KASAN: use-after-free in crypto_aead_free_instance+0xc0/0xd0
>> crypto/aead.c:154
>> Read of size 8 at addr 8801c32cf240 by task cryptomgr_test/6646
>>
>> CPU: 1 PID: 6646 Comm: cryptomgr_test Not tainted 4.15.0-rc3+ #132
>> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
>> Google 01/01/2011
>> Call Trace:
>>   __dump_stack lib/dump_stack.c:17 [inline]
>>   dump_stack+0x194/0x257 lib/dump_stack.c:53
>>   print_address_description+0x73/0x250 mm/kasan/report.c:252
>>   kasan_report_error mm/kasan/report.c:351 [inline]
>>   kasan_report+0x25b/0x340 mm/kasan/report.c:409
>>   __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:430
>>   crypto_aead_free_instance+0xc0/0xd0 crypto/aead.c:154
>>   crypto_free_instance+0x6d/0x100 crypto/algapi.c:77
>>   crypto_destroy_instance+0x3c/0x80 crypto/algapi.c:85
>>   crypto_alg_put crypto/internal.h:116 [inline]
>>   crypto_remove_final+0x212/0x370 crypto/algapi.c:331
>>   crypto_alg_tested+0x445/0x6f0 crypto/algapi.c:320
>>   cryptomgr_test+0x17/0x30 crypto/algboss.c:226
>>   kthread+0x37a/0x440 kernel/kthread.c:238
>>   ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:441
>>
>> Allocated by task 6641:
>>   save_stack+0x43/0xd0 mm/kasan/kasan.c:447
>>   set_track mm/kasan/kasan.c:459 [inline]
>>   kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551
>>   kmem_cache_alloc_trace+0x136/0x750 mm/slab.c:3610
>>   kmalloc include/linux/slab.h:499 [inline]
>>   kzalloc include/linux/slab.h:688 [inline]
>>   pcrypt_create_aead crypto/pcrypt.c:291 [inline]
>>   pcrypt_create+0x137/0x6c0 crypto/pcrypt.c:346
>>   cryptomgr_probe+0x74/0x240 crypto/algboss.c:75
>>   kthread+0x37a/0x440 kernel/kthread.c:238
>>   ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:441
>>
>> Freed by task 3335:
>>   save_stack+0x43/0xd0 mm/kasan/kasan.c:447
>>   set_track mm/kasan/kasan.c:459 [inline]
>>   kasan_slab_free+0x71/0xc0 mm/kasan/kasan.c:524
>>   __cache_free mm/slab.c:3488 [inline]
>>   kfree+0xca/0x250 mm/slab.c:3803
>>   crypto_larval_destroy+0x110/0x150 crypto/api.c:107
>>   crypto_alg_put crypto/internal.h:116 [inline]
>>   crypto_larval_kill+0x1e8/0x2e0 crypto/api.c:167
>>   crypto_alg_mod_lookup+0x178/0x1b0 crypto/api.c:283
>>   crypto_find_alg crypto/api.c:501 [inline]
>>   crypto_alloc_tfm+0xf3/0x2f0 crypto/api.c:534
>>   crypto_alloc_aead+0x2c/0x40 crypto/aead.c:342
>>   aead_bind+0x70/0x140 crypto/algif_aead.c:482
>>   alg_bind+0x1ab/0x440 crypto/af_alg.c:179
>>   SYSC_bind+0x1b4/0x3f0 net/socket.c:1454
>>   SyS_bind+0x24/0x30 net/socket.c:1440
>>   do_syscall_32_irqs_on arch/x86/entry/common.c:327 [inline]
>>   do_fast_syscall_32+0x3ee/0xf9d arch/x86/entry/common.c:389
>>   entry_SYSENTER_compat+0x51/0x60 arch/x86/entry/entry_64_compat.S:125
>>
> This issue vanishes after applying the patch "[PATCH v2] crypto: AF_ALG -
> limit mask and type".


Hi Stephan,

syzbot does not understand arbitrary English prose, it only understands this:

> Once a fix for this bug is merged into any tree, reply to this email with:
> #syz fix: exact-commit-title

Let's tell it about the fix:

#syz fix: crypto: AF_ALG - limit mask and type

Re: KASAN: use-after-free Read in crypto_aead_free_instance

2017-12-20 Thread Dmitry Vyukov

On Wed, Dec 20, 2017 at 10:55 AM, Stephan Mueller <smuel...@chronox.de> wrote:
> Am Mittwoch, 20. Dezember 2017, 10:50:10 CET schrieb Dmitry Vyukov:
>
> Hi Dmitry,
>
>> On Wed, Dec 20, 2017 at 10:29 AM, Stephan Mueller <smuel...@chronox.de>
> wrote:
>> > Am Mittwoch, 20. Dezember 2017, 10:19:43 CET schrieb Dmitry Vyukov:
>> >
>> > Hi Dmitry,
>> >
>> >> > This issue vanishes after applying the patch "[PATCH v2] crypto: AF_ALG
>> >> > -
>> >> > limit mask and type".
>> >>
>> >> Hi Stephan,
>> >>
>> >> syzbot does not understand arbitrary English prose, it only understands
>> >
>> > this:
>> >> > Once a fix for this bug is merged into any tree, reply to this email
>> >> > with:
>> >> > #syz fix: exact-commit-title
>> >>
>> >> Let's tell it about the fix:
>> >>
>> >> #syz fix: crypto: AF_ALG - limit mask and type
>> >
>> > I have seen that this is the approach, but the fix is not yet in the tree.
>> > I just want to let folks know that there is a patch.
>>
>> Ah, ok, sorry. It's just difficult to tell when there is a reason to
>> not provide the tag right now, or when people are don't know about
>> them or ignore.
>> If the patch is merged with this title, then there is nothing else to
>> do. If it's merged under a different title, a new "#syz fix:" tag will
>> override the old one.
>
> Maybe you can teach the syzcaller that there is a proposed fix? E.g.
>
> #syz proposed: commit-title

What will be its meaning? How will it differ from fix?

Re: KASAN: use-after-free Read in crypto_aead_free_instance

2017-12-20 Thread Dmitry Vyukov

On Wed, Dec 20, 2017 at 10:29 AM, Stephan Mueller <smuel...@chronox.de> wrote:
> Am Mittwoch, 20. Dezember 2017, 10:19:43 CET schrieb Dmitry Vyukov:
>
> Hi Dmitry,
>> >
>> > This issue vanishes after applying the patch "[PATCH v2] crypto: AF_ALG -
>> > limit mask and type".
>>
>> Hi Stephan,
>>
>> syzbot does not understand arbitrary English prose, it only understands
> this:
>> > Once a fix for this bug is merged into any tree, reply to this email with:
>> > #syz fix: exact-commit-title
>>
>> Let's tell it about the fix:
>>
>> #syz fix: crypto: AF_ALG - limit mask and type
>
> I have seen that this is the approach, but the fix is not yet in the tree. I
> just want to let folks know that there is a patch.

Ah, ok, sorry. It's just difficult to tell when there is a reason to
not provide the tag right now, or when people are don't know about
them or ignore.
If the patch is merged with this title, then there is nothing else to
do. If it's merged under a different title, a new "#syz fix:" tag will
override the old one.

Re: KASAN: use-after-free Read in crypto_aead_free_instance

2017-12-20 Thread Dmitry Vyukov

On Wed, Dec 20, 2017 at 12:49 PM, Stephan Mueller <smuel...@chronox.de> wrote:
> Am Mittwoch, 20. Dezember 2017, 11:15:38 CET schrieb Dmitry Vyukov:
>
> Hi Dmitry,
>
>>
>> What will be its meaning? How will it differ from fix?
>
> Maybe a short clarification would help: what is the meaning of the syz fix
> marker?

It's described here:
https://github.com/google/syzkaller/blob/master/docs/syzbot.md#bug-status-tracking

> Depending on this answer, all that I am thinking of is to mark bug
> reports for which there are fixes actively discussed, but yet not integrated.
> Thus, such marker should only help others to point them to active discussions
> instead of them trying to find fixes alone.

If it's only for humans, then there is no need to make a special
machine-readable command for this.
So basically what you wrote above is good:

> This issue vanishes after applying the patch "[PATCH v2] crypto: AF_ALG - 
> limit mask and type".

I just didn't understand that's still pending (but perhaps that's what
you meant by including "[PATCH v2]" part).

Re: x509 parsing bug + fuzzing crypto in the userspace

2017-11-21 Thread Dmitry Vyukov

On Mon, Nov 20, 2017 at 10:42 PM, Eric Biggers  wrote:
> +Cc keyri...@vger.kernel.org (for asymmetric_keys)
>
> First of all, thanks for working on this!  A lot of this code really needs to 
> be
> better tested.
>
> On Mon, Nov 20, 2017 at 03:10:55PM +0100, Alexander Potapenko wrote:
>> Hi all,
>>
>> TL;DR userspace fuzzing may be very effective for finding bugs in
>> crypto code, but might require some kernel-side changes.
>>
>> When the attached binary file,
>> crash-bbeda07d4ce60cf3b7fc20c3af92297e19f6dbae, is passed as payload
>> to add_key("asymmetric", "desc", payload, len), it triggers a
>> slab-out-of-bounds KASAN report (see below). This can be reproduced
>> with the attached program, add_key.c.
>
> The bug is in asn1_ber_decoder(): the special length value of 0x80, which
> indicates an "indefinite length", is being passed to one of the "action"
> functions as the real length, causing the input buffer to be overread.  
> Off-hand
> I don't know the best fix, but probably it would involve calling
> asn1_find_indefinite_length() before the "action" rather than after...
>
>>
>> We found this bug by fuzzing asn1_ber_decoder() in the userspace using
>> libFuzzer (http://llvm.org/docs/LibFuzzer.html).
>> The trick is to compile and link together several translation units
>> that provide a transitive closure of asn1_ber_decoder() (some of the
>> kernel functions need to be re-implemented to run in the userspace).
>> I've attached the resulting fuzzer as well (asn1_fuzzer.tar.gz). It
>> can operate on its own, although better results are achieved when
>> using the fuzzing corpus from
>> https://github.com/google/boringssl/tree/master/fuzz and
>> https://github.com/openssl/openssl/tree/master/fuzz.
>
> I had actually tried almost the same thing recently, but with afl-fuzz instead
> of libFuzzer.  But I don't think it was any better than what you did, and I
> didn't find the above bug, perhaps because the AFL_HARDEN compiler options 
> don't
> include AddressSanitizer.
>
> I did use the original unpreprocessed .c files instead of preprocessed ones, 
> but
> it required stubbing out quite a few headers.  Probably using preprocessed 
> files
> is a bit easier.


We've also experimented with building x86_64 user-mode linux and then
linking vmlinux.o to a normal C program with main. Then the program
can directly call any kernel functions.
This required approximately the following:
 - weaken main in vmlinux.o (there is some binutil for this)
 - weaken all actual slab function and define them in the main program
forwarding to malloc/free
 - do the same for other called function that use global kernel
state/can't work without kernel initialization
 - add symbols denoting sections start/end (added by linker script in
linux build) to the main program (we've added them as just int's
because we just need symbols)

We did not get to the point where it actually works, but it looks like
this can work.
The advantage of this approach is that we reuse kernel Makefile to get
working vmlinux.o, and that almost all of this can be reused for other
fuzzers.


> For the fuzzing function I had it call x509_cert_parse() rather than
> asn1_ber_decode() directly, but with x509_get_sig_params() and
> x509_check_for_self_signed() stubbed out.  It would be nice to get coverage of
> x509_check_for_self_signed() because it will call into crypto/rsa.c and, among
> other things, run yet another ASN.1 decoder...  But that would have required
> porting a lot of the crypto API to userspace.
>
>>
>> Could it be possible to decouple the parsing algorithms from the rest
>> of kernel-specific crypto code? Maybe someone has other ideas about
>> how this code can be ran in the userspace?
>>
>
> One idea is that the ASN.1 parsers could be run with no "actions", which would
> minimize dependencies on other kernel code.  But unfortunately that would 
> omit a
> lot of the interesting code.  Many (or most?) bugs will be in how the parsed
> data is used, rather than in the parsing itself.

Good point.

>  I don't think there is an easy
> solution.
>
> Note that separate from asymmetric_keys (which you can think of as being
> in-between the keyrings subsystem and the crypto subsystem) there is also the
> userspace interface to cryptographic algorithms, AF_ALG.  It might be possible
> to port a lot of the crypto API to userspace, but it would require a lot of 
> work
> to stub things out.  Maybe a simpler improvement would be to teach syzkaller 
> to
> more thoroughly test AF_ALG.  For example it could be made aware of algorithm
> templates so that it could try combining them in unusual ways.  (Example:
> https://marc.info/?l=linux-crypto-vger=148063683310477=2 was a NULL 
> pointer
> dereference bug that occurred if you asked to use the algorithm 
> "mcryptd(md5)",
> i.e. the mcryptd template wrapping md5.)  Also,
> CONFIG_CRYPTO_MANAGER_DISABLE_TESTS should be unset, so that the crypto
> self-tests are enabled.


Can you please

Re: BUG: unable to handle kernel paging request in hmac_init_tfm

2017-12-20 Thread Dmitry Vyukov

On Thu, Dec 21, 2017 at 12:09 AM, Eric Biggers  wrote:
> On Mon, Dec 18, 2017 at 11:36:01AM -0800, syzbot wrote:
>> Hello,
>>
>> syzkaller hit the following crash on
>> 6084b576dca2e898f5c101baef151f7bfdbb606d
>> git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/master
>> compiler: gcc (GCC) 7.1.1 20170620
>> .config is attached
>
> FYI, in linux-next KASAN and other memory debugging options are now behind
> CONFIG_DEBUG_MEMORY.  So, I think KASAN isn't getting turned on anymore, 
> despite
> the CONFIG_KASAN=y.  (Patch was "lib/: make "Memory Debugging" a menuconfig to
> ease disabling it all".)

Ouch! That would be pretty bad.

But I've tried both linux-next HEAD at:

commit 0e08c463db387a2adcb0243b15ab868a73f87807 (HEAD, tag:
next-20171221, linux-next/master)
Author: Stephen Rothwell 
Date:   Thu Dec 21 15:37:39 2017 +1100
Add linux-next specific files for 20171221

and mmots HEAD at:

commit 75aa5540627fdb3d8f86229776ea87f995275351 (HEAD, tag:
v4.15-rc4-mmots-2017-12-20-19-10, mmots/master)
Author: Linus Torvalds 
Date:   Thu Dec 21 04:02:17 2017 +
pci: test for unexpectedly disabled bridges

and after running make olddefconfig I still see CONFIG_KASAN=y in
.config, nor I can find CONFIG_DEBUG_MEMORY in menuconfig.

What am I missing? What commit has added the config?

Re: [PATCH] crypto: pcrypt - fix freeing pcrypt instances

2017-12-21 Thread Dmitry Vyukov

On Wed, Dec 20, 2017 at 11:28 PM, Eric Biggers  wrote:
> From: Eric Biggers 
>
> pcrypt is using the old way of freeing instances, where the ->free()
> method specified in the 'struct crypto_template' is passed a pointer to
> the 'struct crypto_instance'.  But the crypto_instance is being
> kfree()'d directly, which is incorrect because the memory was actually
> allocated as an aead_instance, which contains the crypto_instance at a
> nonzero offset.  Thus, the wrong pointer was being kfree()'d.


That's interesting. KASAN does not detect frees of invalid pointers
(e.g. into a middle of an object). It should.
I've requested a component for KASAN in kernel bugzilla to file this
(not sure if anybody is actually reading these emails), and so far
filed it in an internal bug tracker.


> Fix it by switching to the new way to free aead_instance's where the
> ->free() method is specified in the aead_instance itself.
>
> Reported-by: syzbot 
> Fixes: 0496f56065e0 ("crypto: pcrypt - Add support for new AEAD interface")
> Cc:  # v4.2+
> Signed-off-by: Eric Biggers 
> ---
>  crypto/pcrypt.c | 19 ++-
>  1 file changed, 10 insertions(+), 9 deletions(-)
>
> diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c
> index ee9cfb99fe25..f8ec3d4ba4a8 100644
> --- a/crypto/pcrypt.c
> +++ b/crypto/pcrypt.c
> @@ -254,6 +254,14 @@ static void pcrypt_aead_exit_tfm(struct crypto_aead *tfm)
> crypto_free_aead(ctx->child);
>  }
>
> +static void pcrypt_free(struct aead_instance *inst)
> +{
> +   struct pcrypt_instance_ctx *ctx = aead_instance_ctx(inst);
> +
> +   crypto_drop_aead(>spawn);
> +   kfree(inst);
> +}
> +
>  static int pcrypt_init_instance(struct crypto_instance *inst,
> struct crypto_alg *alg)
>  {
> @@ -319,6 +327,8 @@ static int pcrypt_create_aead(struct crypto_template 
> *tmpl, struct rtattr **tb,
> inst->alg.encrypt = pcrypt_aead_encrypt;
> inst->alg.decrypt = pcrypt_aead_decrypt;
>
> +   inst->free = pcrypt_free;
> +
> err = aead_register_instance(tmpl, inst);
> if (err)
> goto out_drop_aead;
> @@ -349,14 +359,6 @@ static int pcrypt_create(struct crypto_template *tmpl, 
> struct rtattr **tb)
> return -EINVAL;
>  }
>
> -static void pcrypt_free(struct crypto_instance *inst)
> -{
> -   struct pcrypt_instance_ctx *ctx = crypto_instance_ctx(inst);
> -
> -   crypto_drop_aead(>spawn);
> -   kfree(inst);
> -}
> -
>  static int pcrypt_cpumask_change_notify(struct notifier_block *self,
> unsigned long val, void *data)
>  {
> @@ -469,7 +471,6 @@ static void pcrypt_fini_padata(struct padata_pcrypt 
> *pcrypt)
>  static struct crypto_template pcrypt_tmpl = {
> .name = "pcrypt",
> .create = pcrypt_create,
> -   .free = pcrypt_free,
> .module = THIS_MODULE,
>  };
>
> --
> 2.15.1.620.gb9897f4670-goog
>
> --
> You received this message because you are subscribed to the Google Groups 
> "syzkaller-bugs" group.
> To unsubscribe from this group and stop receiving emails from it, send an 
> email to syzkaller-bugs+unsubscr...@googlegroups.com.
> To view this discussion on the web visit 
> https://groups.google.com/d/msgid/syzkaller-bugs/20171220222825.207321-1-ebiggers3%40gmail.com.
> For more options, visit https://groups.google.com/d/optout.

Re: WARNING: kernel stack regs has bad 'bp' value (3)

2018-05-12 Thread Dmitry Vyukov

On Sat, May 12, 2018 at 11:09 AM, Ard Biesheuvel
<ard.biesheu...@linaro.org> wrote:
> (+ Arnd)
>
> On 12 May 2018 at 10:43, Dmitry Vyukov <dvyu...@google.com> wrote:
>> On Fri, Feb 2, 2018 at 11:18 PM, Eric Biggers <ebigge...@gmail.com> wrote:
>>> On Fri, Feb 02, 2018 at 02:57:32PM +0100, Dmitry Vyukov wrote:
>>>> On Fri, Feb 2, 2018 at 2:48 PM, syzbot
>>>> <syzbot+ffa3a158337bbc01f...@syzkaller.appspotmail.com> wrote:
>>>> > Hello,
>>>> >
>>>> > syzbot hit the following crash on upstream commit
>>>> > 7109a04eae81c41ed529da9f3c48c3655ccea741 (Thu Feb 1 17:37:30 2018 +)
>>>> > Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/ide
>>>> >
>>>> > So far this crash happened 4 times on net-next, upstream.
>>>> > C reproducer is attached.
>>>> > syzkaller reproducer is attached.
>>>> > Raw console output is attached.
>>>> > compiler: gcc (GCC) 7.1.1 20170620
>>>> > .config is attached.
>>>>
>>>>
>>>> From suspicious frames I see salsa20_asm_crypt there, so +crypto 
>>>> maintainers.
>>>>
>>>
>>> Looks like the x86 implementations of Salsa20 (both i586 and x86_64) need 
>>> to be
>>> updated to not use %ebp/%rbp.
>>
>> Ard,
>>
>> This was bisected as introduced by:
>>
>> commit 83dee2ce1ae791c3dc0c9d4d3a8d42cb109613f6
>> Author: Ard Biesheuvel <ard.biesheu...@linaro.org>
>> Date:   Fri Jan 19 12:04:34 2018 +
>>
>> crypto: sha3-generic - rewrite KECCAK transform to help the
>> compiler optimize
>>
>> https://gist.githubusercontent.com/dvyukov/47f93f5a0679170dddf93bc019b42f6d/raw/65beac8ddd30003bbd4e9729236dc8572094abf7/gistfile1.txt
>
> Ouch.
>
> I'm not an expert in x86 assembly. Could someone please check the
> generated code to see what's going on? The C code changes are not that
> intricate, they basically unroll a loop, replacing accesses to
> 'array[indirect_index[i]]' with 'array[constant]'.
>
> As mentioned in the commit log, the speedup is more than significant
> for architectures with lots of GPRs so I'd prefer fixing the patch
> over reverting it (if there is anything wrong with the code in the
> first place)

I suspect the problem is with __attribute__((__optimize__("O3"))). It
makes compiler use rbp register, which must not be used.

Re: WARNING: kernel stack regs has bad 'bp' value (3)

2018-05-12 Thread Dmitry Vyukov

On Fri, Feb 2, 2018 at 11:18 PM, Eric Biggers <ebigge...@gmail.com> wrote:
> On Fri, Feb 02, 2018 at 02:57:32PM +0100, Dmitry Vyukov wrote:
>> On Fri, Feb 2, 2018 at 2:48 PM, syzbot
>> <syzbot+ffa3a158337bbc01f...@syzkaller.appspotmail.com> wrote:
>> > Hello,
>> >
>> > syzbot hit the following crash on upstream commit
>> > 7109a04eae81c41ed529da9f3c48c3655ccea741 (Thu Feb 1 17:37:30 2018 +)
>> > Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/ide
>> >
>> > So far this crash happened 4 times on net-next, upstream.
>> > C reproducer is attached.
>> > syzkaller reproducer is attached.
>> > Raw console output is attached.
>> > compiler: gcc (GCC) 7.1.1 20170620
>> > .config is attached.
>>
>>
>> From suspicious frames I see salsa20_asm_crypt there, so +crypto maintainers.
>>
>
> Looks like the x86 implementations of Salsa20 (both i586 and x86_64) need to 
> be
> updated to not use %ebp/%rbp.

Ard,

This was bisected as introduced by:

commit 83dee2ce1ae791c3dc0c9d4d3a8d42cb109613f6
Author: Ard Biesheuvel <ard.biesheu...@linaro.org>
Date:   Fri Jan 19 12:04:34 2018 +

crypto: sha3-generic - rewrite KECCAK transform to help the
compiler optimize

https://gist.githubusercontent.com/dvyukov/47f93f5a0679170dddf93bc019b42f6d/raw/65beac8ddd30003bbd4e9729236dc8572094abf7/gistfile1.txt

[PATCH] crypto: don't optimize keccakf()

2018-06-08 Thread Dmitry Vyukov

keccakf() is the only function in kernel that uses __optimize() macro.
__optimize() breaks frame pointer unwinder as optimized code uses RBP,
and amusingly this always lead to degraded performance as gcc does not
inline across different optimizations levels, so keccakf() wasn't inlined
into its callers and keccakf_round() wasn't inlined into keccakf().

Drop __optimize() to resolve both problems.

Signed-off-by: Dmitry Vyukov 
Fixes: 83dee2ce1ae7 ("crypto: sha3-generic - rewrite KECCAK transform to help 
the compiler optimize")
Reported-by: syzbot+37035ccfa9a0a017f...@syzkaller.appspotmail.com
Reported-by: syzbot+e073e4740cfbb3ae2...@syzkaller.appspotmail.com
Cc: linux-crypto@vger.kernel.org
Cc: "David S. Miller" 
Cc: Herbert Xu 
Cc: Ard Biesheuvel 
---
 crypto/sha3_generic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crypto/sha3_generic.c b/crypto/sha3_generic.c
index 264ec12c0b9c..7f6735d9003f 100644
--- a/crypto/sha3_generic.c
+++ b/crypto/sha3_generic.c
@@ -152,7 +152,7 @@ static SHA3_INLINE void keccakf_round(u64 st[25])
st[24] ^= bc[ 4];
 }
 
-static void __optimize("O3") keccakf(u64 st[25])
+static void keccakf(u64 st[25])
 {
int round;
 
-- 
2.18.0.rc1.242.g61856ae69a-goog

Re: KASAN: use-after-free Read in crypto_destroy_tfm

2018-05-26 Thread Dmitry Vyukov

On Sat, May 26, 2018 at 7:40 PM, syzbot
 wrote:
> Hello,
>
> syzbot found the following crash on:
>
> HEAD commit:0644f186fc9d Merge tag 'for_linus' of git://git.kernel.org..
> git tree:   upstream
> console output: https://syzkaller.appspot.com/x/log.txt?x=102bc25780
> kernel config:  https://syzkaller.appspot.com/x/.config?x=61c12b53c2a25ec4
> dashboard link: https://syzkaller.appspot.com/bug?extid=352126a5be7ccb25754e
> compiler:   gcc (GCC) 8.0.1 20180413 (experimental)
>
> Unfortunately, I don't have any reproducer for this crash yet.
>
> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> Reported-by: syzbot+352126a5be7ccb257...@syzkaller.appspotmail.com

Eric, do you remember if we had any recent fixes for this?

> ==
> BUG: KASAN: use-after-free in crypto_destroy_tfm+0x2a3/0x300
> crypto/api.c:573
> Read of size 8 at addr 8801d9023238 by task syz-executor6/10078
>
> CPU: 1 PID: 10078 Comm: syz-executor6 Not tainted 4.17.0-rc2+ #19
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
> Google 01/01/2011
> Call Trace:
>  __dump_stack lib/dump_stack.c:77 [inline]
>  dump_stack+0x1b9/0x294 lib/dump_stack.c:113
>  print_address_description+0x6c/0x20b mm/kasan/report.c:256
>  kasan_report_error mm/kasan/report.c:354 [inline]
>  kasan_report.cold.7+0x242/0x2fe mm/kasan/report.c:412
>  __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433
>  crypto_destroy_tfm+0x2a3/0x300 crypto/api.c:573
>  crypto_free_rng include/crypto/rng.h:122 [inline]
>  rng_release+0x18/0x20 crypto/algif_rng.c:124
>  alg_do_release crypto/af_alg.c:119 [inline]
>  alg_sock_destruct+0x92/0xe0 crypto/af_alg.c:362
>  __sk_destruct+0xff/0xa40 net/core/sock.c:1566
>  sk_destruct+0x78/0x90 net/core/sock.c:1601
>  __sk_free+0x22e/0x340 net/core/sock.c:1612
>  sk_free+0x42/0x50 net/core/sock.c:1623
>  sock_put include/net/sock.h:1664 [inline]
>  af_alg_release+0x6e/0x90 crypto/af_alg.c:126
>  sock_release+0x96/0x1b0 net/socket.c:594
>  sock_close+0x16/0x20 net/socket.c:1149
>  __fput+0x34d/0x890 fs/file_table.c:209
>  fput+0x15/0x20 fs/file_table.c:243
>  task_work_run+0x1e4/0x290 kernel/task_work.c:113
>  exit_task_work include/linux/task_work.h:22 [inline]
>  do_exit+0x1aee/0x2730 kernel/exit.c:865
>  do_group_exit+0x16f/0x430 kernel/exit.c:968
>  get_signal+0x886/0x1960 kernel/signal.c:2469
>  do_signal+0x98/0x2040 arch/x86/kernel/signal.c:810
>  exit_to_usermode_loop+0x28a/0x310 arch/x86/entry/common.c:162
>  prepare_exit_to_usermode arch/x86/entry/common.c:196 [inline]
>  syscall_return_slowpath arch/x86/entry/common.c:265 [inline]
>  do_syscall_64+0x6ac/0x800 arch/x86/entry/common.c:290
>  entry_SYSCALL_64_after_hwframe+0x49/0xbe
> RIP: 0033:0x455979
> RSP: 002b:7f4f4bbc2c68 EFLAGS: 0246 ORIG_RAX: 0036
> RAX:  RBX: 7f4f4bbc36d4 RCX: 00455979
> RDX: 0001 RSI: 0117 RDI: 0014
> RBP: 0072bf50 R08:  R09: 
> R10: 204f7000 R11: 0246 R12: 
> R13: 0519 R14: 006faaf8 R15: 0001
>
> Allocated by task 4484:
>  save_stack+0x43/0xd0 mm/kasan/kasan.c:448
>  set_track mm/kasan/kasan.c:460 [inline]
>  kasan_kmalloc+0xc4/0xe0 mm/kasan/kasan.c:553
>  __do_kmalloc_node mm/slab.c:3682 [inline]
>  __kmalloc_node_track_caller+0x47/0x70 mm/slab.c:3696
>  __kmalloc_reserve.isra.38+0x3a/0xe0 net/core/skbuff.c:137
>  __alloc_skb+0x14d/0x780 net/core/skbuff.c:205
>  alloc_skb include/linux/skbuff.h:987 [inline]
>  netlink_alloc_large_skb net/netlink/af_netlink.c:1182 [inline]
>  netlink_sendmsg+0xb01/0xfa0 net/netlink/af_netlink.c:1876
>  sock_sendmsg_nosec net/socket.c:629 [inline]
>  sock_sendmsg+0xd5/0x120 net/socket.c:639
>  ___sys_sendmsg+0x805/0x940 net/socket.c:2117
>  __sys_sendmsg+0x115/0x270 net/socket.c:2155
>  __do_sys_sendmsg net/socket.c:2164 [inline]
>  __se_sys_sendmsg net/socket.c:2162 [inline]
>  __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2162
>  do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287
>  entry_SYSCALL_64_after_hwframe+0x49/0xbe
>
> Freed by task 4484:
>  save_stack+0x43/0xd0 mm/kasan/kasan.c:448
>  set_track mm/kasan/kasan.c:460 [inline]
>  __kasan_slab_free+0x11a/0x170 mm/kasan/kasan.c:521
>  kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528
>  __cache_free mm/slab.c:3498 [inline]
>  kfree+0xd9/0x260 mm/slab.c:3813
>  skb_free_head+0x99/0xc0 net/core/skbuff.c:550
>  skb_release_data+0x690/0x860 net/core/skbuff.c:570
>  skb_release_all+0x4a/0x60 net/core/skbuff.c:627
>  __kfree_skb net/core/skbuff.c:641 [inline]
>  consume_skb+0x18b/0x550 net/core/skbuff.c:701
>  netlink_unicast_kernel net/netlink/af_netlink.c:1311 [inline]
>  netlink_unicast+0x593/0x740 net/netlink/af_netlink.c:1336
>  netlink_sendmsg+0x9f0/0xfa0 net/netlink/af_netlink.c:1901
>

Re: WARNING: kernel stack regs has bad 'bp' value (2)

2017-12-27 Thread Dmitry Vyukov

On Thu, Nov 30, 2017 at 10:17 AM, Eric Biggers  wrote:
> On Tue, Nov 28, 2017 at 10:36:01AM -0800, syzbot wrote:
>> WARNING: kernel stack regs at 8801c1e5f468 in syzkaller196611:6199 has
>> bad 'bp' value 0001
>> unwind stack type:0 next_sp:  (null) mask:0x6 graph_idx:0
>> 8801db4075a8: 8801db407630 (0x8801db407630)
>> 8801db4075b0: 8128a84e (__save_stack_trace+0x6e/0xd0)
>> 8801db4075b8:  ...
>> 8801db4075c0: 8801c1e58000 (0x8801c1e58000)
>> 8801db4075c8: 8801c1e6 (0x8801c1e6)
>> 8801db4075d0:  ...
>> 8801db4075d8: 0006 (0x6)
>> 8801db4075e0: 8801c1e4e000 (0x8801c1e4e000)
>> 8801db4075e8: 0101 (0x101)
>> 8801db4075f0:  ...
>> 8801db4075f8: 8801db4075a8 (0x8801db4075a8)
>> 8801db407600: 8134ff7d (__twofish_enc_blk_3way+0x1b1d/0x1b30)
>
> Looks like the x86_64 "3 way" version of Twofish 
> (twofish-x86_64-asm_64-3way.S)
> needs to be updated to not use %rbp.


This is what is supposed to be fixed with "crypto: x86/twofish-3way -
Fix %rbp usage", right? Was it merged anywhere?
This is one of top crashers with 15K crashes.

Re: WARNING: kernel stack regs has bad 'bp' value (3)

2018-02-02 Thread Dmitry Vyukov

On Fri, Feb 2, 2018 at 2:48 PM, syzbot
 wrote:
> Hello,
>
> syzbot hit the following crash on upstream commit
> 7109a04eae81c41ed529da9f3c48c3655ccea741 (Thu Feb 1 17:37:30 2018 +)
> Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/ide
>
> So far this crash happened 4 times on net-next, upstream.
> C reproducer is attached.
> syzkaller reproducer is attached.
> Raw console output is attached.
> compiler: gcc (GCC) 7.1.1 20170620
> .config is attached.


>From suspicious frames I see salsa20_asm_crypt there, so +crypto maintainers.


> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> Reported-by: syzbot+ffa3a158337bbc01f...@syzkaller.appspotmail.com
> It will help syzbot understand when the bug is fixed. See footer for
> details.
> If you forward the report, please keep this part and the footer.
>
> WARNING: kernel stack regs at a8291e69 in syzkaller047086:4677 has
> bad 'bp' value 1077994c
> unwind stack type:0 next_sp:  (null) mask:0x6 graph_idx:0
> 1d3b7fe2: 8801db4075c8 (0x8801db4075c8)
> 83b445d5: 8128e6de (__save_stack_trace+0x6e/0xd0)
> b52ac563:  ...
> 3035eb8b: 8801addd (0x8801addd)
> ee6283c3: 8801addd8000 (0x8801addd8000)
> 331afaf0:  ...
> b93daa43: 0006 (0x6)
> aa09edca: 8801acaca4c0 (0x8801acaca4c0)
> 5388d67c: 0101 (0x101)
> d567f6b6:  ...
> fd90d54b: 8801db407540 (0x8801db407540)
> d598c5fd: 8135b07e (._mainloop+0x187/0x4ca)
> 255a8082: 8801addd7658 (0x8801addd7658)
> 0175a1d9: 0100 (0x100)
> 6420fb62: 8801aca0c780 (0x8801aca0c780)
> ef007705: 8801aca0c7a0 (0x8801aca0c7a0)
> c3a16804: 82213878 (selinux_cred_free+0x48/0x70)
> 35d2f6f8: 8801db4075d8 (0x8801db4075d8)
> d255d236: 8128e75a (save_stack_trace+0x1a/0x20)
> ad4323cc: 8801db407808 (0x8801db407808)
> a76fbd41: 81a8d883 (save_stack+0x43/0xd0)
> 104bc778: 004c (0x4c)
> 920efa26: 8801db407600 (0x8801db407600)
> a66a1c57:  (0x)
> 602c97d2: 81a8d883 (save_stack+0x43/0xd0)
> 11ee1976: 81a8e191 (kasan_slab_free+0x71/0xc0)
> c84a6163: 81a8bf06 (kfree+0xd6/0x260)
> e47cbeab: 82213878 (selinux_cred_free+0x48/0x70)
> 2d741013: 821fddd8 (security_cred_free+0x48/0x80)
> 4adf7771: 814a82b6 (put_cred_rcu+0x106/0x400)
> 30fd4806: 815e6f2c (rcu_process_callbacks+0xd6c/0x17f0)
> 3b1f46f7: 85c002d7 (__do_softirq+0x2d7/0xb85)
> f227b7b3: 8142fbac (irq_exit+0x1cc/0x200)
> 5a82eab3: 85a05adb (smp_apic_timer_interrupt+0x16b/0x700)
> 055afa4e: 85a01d69 (apic_timer_interrupt+0xa9/0xb0)
> a35590a8: 8135b07e (._mainloop+0x187/0x4ca)
> 179a751c: 86b45498 (rcu_sched_state+0x18/0x1520)
> 0ba49ddf: 8801acaca4c0 (0x8801acaca4c0)
> a404c5c9: 41b58ab3 (0x41b58ab3)
> 1d2af172: 867cd428 (regoff.32610+0x28dca8/0x29dc80)
> 584a687b: 81563440 (print_irqtrace_events+0x270/0x270)
> 6c14677d: 11003b680edd (0x11003b680edd)
> 9dfd46e8: 88271400 (obj_hash+0xebe00/0x100020)
> ba757625: 8801aca0d000 (0x8801aca0d000)
> 6f6bffaa: 8801db407708 (0x8801db407708)
> 0e864f6d: 11003b680edd (0x11003b680edd)
> 005451e3: 8801aca0c7a0 (0x8801aca0c7a0)
> 605a674b: 8801aca0c780 (0x8801aca0c780)
> 5f59b991: 000ebe00 (0xebe00)
> c279fdba: fbfff104e280 (0xfbfff104e280)
> 3f76ca22: 8801aca0d000 (0x8801aca0d000)
> a56ab73c: 88271408 (obj_hash+0xebe08/0x100020)
> ac22805a: 8801db407708 (0x8801db407708)
> f9e3111c: 41b58ab3 (0x41b58ab3)
> f1cc5486: 8681d8f0 (K512_4+0x3e6f0/0xd09a0)
> 64d36645: 825e6230 (free_obj_work+0x690/0x690)
> 2f3949a8: 41b58ab3 (0x41b58ab3)
> 37d72c89:  ...
> bec2a1d3: 81563440 (print_irqtrace_events+0x270/0x270)
> 1b90ad27: 88145240 (console_drivers+0x40/0x40)
> d2c3d196: 8801db407858 (0x8801db407858)
> c4fe609c: 8801acaca4c0 (0x8801acaca4c0)
> 02ee263f: 8801db407798 (0x8801db407798)
> 0be75ad5: 11003b680eef (0x11003b680eef)
> 78f83b59: 8801db407880 (0x8801db407880)
> 43ac401f: 0082 (0x82)
> 6e6e547c:

Re: BUG: unable to handle kernel paging request in hmac_init_tfm

2017-12-22 Thread Dmitry Vyukov

On Fri, Dec 22, 2017 at 3:27 AM, Eric Biggers <ebigge...@gmail.com> wrote:
> On Thu, Dec 21, 2017 at 08:44:03AM +0100, 'Dmitry Vyukov' via syzkaller-bugs 
> wrote:
>> On Thu, Dec 21, 2017 at 12:09 AM, Eric Biggers <ebigge...@gmail.com> wrote:
>> > On Mon, Dec 18, 2017 at 11:36:01AM -0800, syzbot wrote:
>> >> Hello,
>> >>
>> >> syzkaller hit the following crash on
>> >> 6084b576dca2e898f5c101baef151f7bfdbb606d
>> >> git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/master
>> >> compiler: gcc (GCC) 7.1.1 20170620
>> >> .config is attached
>> >
>> > FYI, in linux-next KASAN and other memory debugging options are now behind
>> > CONFIG_DEBUG_MEMORY.  So, I think KASAN isn't getting turned on anymore, 
>> > despite
>> > the CONFIG_KASAN=y.  (Patch was "lib/: make "Memory Debugging" a 
>> > menuconfig to
>> > ease disabling it all".)
>>
>> Ouch! That would be pretty bad.
>>
>> But I've tried both linux-next HEAD at:
>>
>> commit 0e08c463db387a2adcb0243b15ab868a73f87807 (HEAD, tag:
>> next-20171221, linux-next/master)
>> Author: Stephen Rothwell <s...@canb.auug.org.au>
>> Date:   Thu Dec 21 15:37:39 2017 +1100
>> Add linux-next specific files for 20171221
>>
>> and mmots HEAD at:
>>
>> commit 75aa5540627fdb3d8f86229776ea87f995275351 (HEAD, tag:
>> v4.15-rc4-mmots-2017-12-20-19-10, mmots/master)
>> Author: Linus Torvalds <torva...@linux-foundation.org>
>> Date:   Thu Dec 21 04:02:17 2017 +
>> pci: test for unexpectedly disabled bridges
>>
>> and after running make olddefconfig I still see CONFIG_KASAN=y in
>> .config, nor I can find CONFIG_DEBUG_MEMORY in menuconfig.
>>
>> What am I missing? What commit has added the config?
>>
>
> Ah, I see the patch was added to -mm on 2017-12-12 but removed two days later:
>
> https://www.spinics.net/lists/mm-commits/msg129685.html
> https://www.spinics.net/lists/mm-commits/msg129696.html
>
> So it was in linux-next temporarily (including the kernel version used for 
> this
> particular bug report) but now it's not.  Just keep an eye out for it coming
> back, I guess.


I see. Then I will add CONFIG_DEBUG_MEMORY=y to our config
proactively. Without that patch olddefconfig will just wipe that
config, so should work.

Re: WARNING: kernel stack regs has bad 'bp' value (2)

2017-12-27 Thread Dmitry Vyukov

On Wed, Dec 27, 2017 at 7:29 PM, Dmitry Vyukov <dvyu...@google.com> wrote:
> On Thu, Nov 30, 2017 at 10:17 AM, Eric Biggers <ebigge...@gmail.com> wrote:
>> On Tue, Nov 28, 2017 at 10:36:01AM -0800, syzbot wrote:
>>> WARNING: kernel stack regs at 8801c1e5f468 in syzkaller196611:6199 has
>>> bad 'bp' value 0001
>>> unwind stack type:0 next_sp:  (null) mask:0x6 graph_idx:0
>>> 8801db4075a8: 8801db407630 (0x8801db407630)
>>> 8801db4075b0: 8128a84e (__save_stack_trace+0x6e/0xd0)
>>> 8801db4075b8:  ...
>>> 8801db4075c0: 8801c1e58000 (0x8801c1e58000)
>>> 8801db4075c8: 8801c1e6 (0x8801c1e6)
>>> 8801db4075d0:  ...
>>> 8801db4075d8: 0006 (0x6)
>>> 8801db4075e0: 8801c1e4e000 (0x8801c1e4e000)
>>> 8801db4075e8: 0101 (0x101)
>>> 8801db4075f0:  ...
>>> 8801db4075f8: 8801db4075a8 (0x8801db4075a8)
>>> 8801db407600: 8134ff7d (__twofish_enc_blk_3way+0x1b1d/0x1b30)
>>
>> Looks like the x86_64 "3 way" version of Twofish 
>> (twofish-x86_64-asm_64-3way.S)
>> needs to be updated to not use %rbp.
>
>
> This is what is supposed to be fixed with "crypto: x86/twofish-3way -
> Fix %rbp usage", right? Was it merged anywhere?
> This is one of top crashers with 15K crashes.


#syz fix: crypto: x86/twofish-3way - Fix %rbp usage

Re: WARNING in kmem_cache_free

2018-04-08 Thread Dmitry Vyukov

On Sun, Apr 8, 2018 at 12:26 PM, Dmitry Vyukov <dvyu...@google.com> wrote:
> On Sun, Apr 8, 2018 at 8:01 AM, Matthew Wilcox <wi...@infradead.org> wrote:
>> On Fri, Apr 06, 2018 at 03:33:36PM +0200, Dmitry Vyukov wrote:
>>> On Fri, Apr 6, 2018 at 3:24 PM, syzbot
>>> <syzbot+75397ee3df5c70164...@syzkaller.appspotmail.com> wrote:
>>> > Unfortunately, I don't have any reproducer for this crash yet.
>>>
>>> Interesting type of bug, I think we see this for the first time.
>>
>> Can you focus syzbot to try to find a reproducer?  This seems to be
>> produced by calling mount() with a pathname that's somewhere between,
>> say, 3950 & 4100 bytes long from a compat 32-bit task.
>
>
> Something in the log definitely triggers a very bad heap corruption.
>
> This can be reproduced following instructions at:
> https://github.com/google/syzkaller/blob/master/docs/syzbot.md#syzkaller-reproducers
>
> and then running:
> ./syz-execprog -sandbox=namespace -arch=386 -repeat=0 -procs=10 log.txt
>
> where log.txt comes from "Raw console output" link.
>
> Note that you need to build syzkaller with 'make TARGETARCH=386' and
> the use bin/linux_386/syz-executor.
>
> While running it I got:
> BUG: KASAN: double-free or invalid-free in free_request_size+0x5b/0x70
> block/blk-core.c:769
> https://gist.githubusercontent.com/dvyukov/05f4e77a34795d329aa7a2f40265e396/raw/63a29123b79f1fbad3521d0ff034946be68bfd4a/gistfile1.txt
>
> Then kernel BUG at mm/slab.c:4407!
> https://gist.githubusercontent.com/dvyukov/5b3bcc90d326e9da3636aea2c95ace8f/raw/1589504c708994936681d61ba9d70029998b9b1a/gistfile1.txt
>
> And then BUG: unable to handle kernel paging request at ebe00020
> https://gist.githubusercontent.com/dvyukov/72025b1c68e488f4fda243e0c152f044/raw/d2c171bc55ad3a43cea33095fa2eea48768b1131/gistfile1.txt
>
> One interesting thing is that if I run the log once and it does not
> crash, then when I try to start binary again I am getting:
> [  456.837870] Invalid argument reading file caps for /root/syz-executor
> The binary somehow becomes broken on disk...
>
> I guess syzbot did find a reproducer in this log, but did not
> attribute it to this bug as it causes crashes all over the place.



Running syz-repro utility on this log, I think I've found the guilty guy:
https://gist.githubusercontent.com/dvyukov/1dd75d55efd238e7207af1cc38478b3a/raw/403859b56b161a6fbb158e8953fac5bb6e73b1a1/gistfile1.txt

It crashes as:
BUG: KASAN: use-after-free in drbg_kcapi_seed+0x1178/0x12e0
and:
BUG: unable to handle kernel paging request at ebe00020
and with other indications of badly corrupted heap.

This points to crypto/drbg.c, so +crypto maintainers.

Re: WARNING in kmem_cache_free

2018-04-08 Thread Dmitry Vyukov

On Sun, Apr 8, 2018 at 5:31 PM, Stephan Müller <smuel...@chronox.de> wrote:
> Am Sonntag, 8. April 2018, 13:18:06 CEST schrieb Dmitry Vyukov:
>
> Hi Dmitry,
>
>>
>> Running syz-repro utility on this log, I think I've found the guilty guy:
>> https://gist.githubusercontent.com/dvyukov/1dd75d55efd238e7207af1cc38478b3a/
>> raw/403859b56b161a6fbb158e8953fac5bb6e73b1a1/gistfile1.txt
>>
>
> I am unable to reproduce it with the code. I am using the current
> cryptodev-2.6 tree with kazan enabled. Could you please give me your kernel
> config or a pointer of the used tree?

Hi,

Here is config and kernel commit:
https://groups.google.com/d/msg/syzkaller-bugs/PINYyzoaG1s/ntZPOZdcCAAJ
You can also find compiler and image here if necessary:
https://github.com/google/syzkaller/blob/master/docs/syzbot.md

And note that the program needs to be compiled with -m32. The bugs is
probably not-compat specific, but the program injects fault into a
particular malloc invocation and maybe malloc numbering is affected by
compat path.


>> It crashes as:
>> BUG: KASAN: use-after-free in drbg_kcapi_seed+0x1178/0x12e0
>> and:
>> BUG: unable to handle kernel paging request at ebe00020
>> and with other indications of badly corrupted heap.
>>
>> This points to crypto/drbg.c, so +crypto maintainers.
>
>
> Ciao
> Stephan
>
>
> --
> You received this message because you are subscribed to the Google Groups 
> "syzkaller-bugs" group.
> To unsubscribe from this group and stop receiving emails from it, send an 
> email to syzkaller-bugs+unsubscr...@googlegroups.com.
> To view this discussion on the web visit 
> https://groups.google.com/d/msgid/syzkaller-bugs/4564679.HlOejCIXXz%40positron.chronox.de.
> For more options, visit https://groups.google.com/d/optout.

Re: [PATCH] crypto: DRBG - guard uninstantion by lock

2018-04-10 Thread Dmitry Vyukov

On Mon, Apr 9, 2018 at 9:57 AM, Dmitry Vyukov <dvyu...@google.com> wrote:
> On Mon, Apr 9, 2018 at 7:40 AM, Stephan Mueller <smuel...@chronox.de> wrote:
>> Am Montag, 9. April 2018, 00:46:03 CEST schrieb Theodore Y. Ts'o:
>>
>> Hi Theodore,
>>>
>>> So the syzbot will run while the patch goes through the normal e-mail
>>> review process, which is kind of neat.  :-)
>>
>> Thank you very much for the hint. That is a neat feature indeed.
>>
>> As I came late to the party and I missed the original mails, I am wondering
>> about which GIT repo was used and which branch of it. With that, I would be
>> happy to resubmit with the test line.
>
> All syzbot reported bugs are available here:
> https://groups.google.com/forum/#!searchin/syzkaller-bugs/"WARNING$20in$20kmem_cache_free;
> and here:
> https://syzkaller.appspot.com/
>
> But unfortunately testing won't work in this case, because I manually
> extracted a reproducer and syzbot does not know about it. This bug
> seems to lead to assorted silent heap corruptions and different
> manifestations each time, so it's difficult for syzbot to attribute a
> reproducer to the bug. When we debug it, it would be nice to
> understand why the heap corruption is silent and is not detected by
> KASAN and anything else, to prevent such unpleasant cases in future.
>
> I've tested it manually, but unfortunately kernel still crashed within a 
> minute:

Stephan,

Do you have any hypothesis as to why this is not detected by KASAN and
causes silent corruptions?
We generally try to understand such cases and improve KASAN so that it
catches such cases more reliably and they do not cause splashes of
random crashes on syzbot.

Thanks



> $ git status
> HEAD detached at f2d285669aae
> Changes not staged for commit:
>   (use "git add ..." to update what will be committed)
>   (use "git checkout -- ..." to discard changes in working directory)
>
> modified:   crypto/drbg.c
>
> $ git diff
> diff --git a/crypto/drbg.c b/crypto/drbg.c
> index 4faa2781c964..68c1949a253f 100644
> --- a/crypto/drbg.c
> +++ b/crypto/drbg.c
> @@ -1510,8 +1510,8 @@ static int drbg_instantiate(struct drbg_state
> *drbg, struct drbg_string *pers,
> return ret;
>
>  free_everything:
> -   mutex_unlock(>drbg_mutex);
> drbg_uninstantiate(drbg);
> +   mutex_unlock(>drbg_mutex);
> return ret;
>  }
>
> # ./a.out
> ...
> [  183.647874] FAULT_INJECTION: forcing a failure.
> [  183.647874] name failslab, interval 1, probability 0, space 0, times 0
> [  183.648287] Call Trace:
> [  183.648297]  dump_stack+0x1b9/0x29f
> [  183.648306]  ? arch_local_irq_restore+0x52/0x52
> [  183.648318]  ? __save_stack_trace+0x7e/0xd0
> [  183.651848]  should_fail.cold.4+0xa/0x1a
> [  183.652411]  ? fault_create_debugfs_attr+0x1f0/0x1f0
> [  183.653138]  ? kasan_kmalloc+0xc4/0xe0
> [  183.653694]  ? __kmalloc+0x14e/0x760
> [  183.654206]  ? drbg_kcapi_seed+0x776/0x12e0
> [  183.654798]  ? crypto_rng_reset+0x7c/0x130
> [  183.655379]  ? rng_setkey+0x25/0x30
> [  183.655882]  ? alg_setsockopt+0x306/0x3b0
> [  183.656450]  ? graph_lock+0x170/0x170
> [  183.656975]  ? entry_SYSENTER_compat+0x70/0x7f
> [  183.657606]  ? find_held_lock+0x36/0x1c0
> [  183.658164]  ? __lock_is_held+0xb5/0x140
> [  183.658728]  ? check_same_owner+0x320/0x320
> [  183.659321]  ? rcu_note_context_switch+0x710/0x710
> [  183.66]  should_failslab+0x124/0x180
> [  183.660561]  __kmalloc+0x2c8/0x760
> [  183.661046]  ? graph_lock+0x170/0x170
> [  183.661569]  ? drbg_kcapi_seed+0x882/0x12e0
> [  183.662161]  drbg_kcapi_seed+0x882/0x12e0
> [  183.662731]  ? drbg_seed+0x10a0/0x10a0
> [  183.663267]  ? lock_downgrade+0x8e0/0x8e0
> [  183.663833]  ? lock_acquire+0x1dc/0x520
> [  183.664385]  ? lock_release+0xa10/0xa10
> [  183.664934]  ? check_same_owner+0x320/0x320
> [  183.665530]  ? __sanitizer_cov_trace_const_cmp8+0x18/0x20
> [  183.666292]  ? __check_object_size+0x95/0x5d9
> [  183.666904]  ? sock_kmalloc+0x14e/0x1d0
> [  183.667444]  ? mark_held_locks+0xc9/0x160
> [  183.668020]  ? __might_sleep+0x95/0x190
> [  183.668567]  crypto_rng_reset+0x7c/0x130
> [  183.669124]  rng_setkey+0x25/0x30
> [  183.669598]  ? rng_sock_destruct+0x90/0x90
> [  183.670176]  alg_setsockopt+0x306/0x3b0
> [  183.670724]  __compat_sys_setsockopt+0x315/0x7c0
> [  183.671375]  ? __compat_sys_getsockopt+0x7f0/0x7f0
> [  183.672057]  ? __sanitizer_cov_trace_const_cmp4+0x16/0x20
> [  183.672813]  ? ksys_write+0x1a6/0x250
> [  183.67]  ? SyS_read+0x30/0x30
> [  183.673811]  compat_SyS_setsockopt+0x34/0x50
> [  183.674416]  ? scm_detach_fds_compat+0x440/0x440

Re: [PATCH] crypto: DRBG - guard uninstantion by lock

2018-04-11 Thread Dmitry Vyukov

On Tue, Apr 10, 2018 at 5:35 PM, Stephan Mueller <smuel...@chronox.de> wrote:
> Am Dienstag, 10. April 2018, 17:23:46 CEST schrieb Dmitry Vyukov:
>
> Hi Dmitry,
>
>> Stephan,
>>
>> Do you have any hypothesis as to why this is not detected by KASAN and
>> causes silent corruptions?
>> We generally try to understand such cases and improve KASAN so that it
>> catches such cases more reliably and they do not cause splashes of
>> random crashes on syzbot.
>
> I do not have any hypothesis at this point. I know that you induce some fault.
> As you mentioned the drbg_kcapi_seed function, I was looking through the error
> code paths to see whether some error handlers trip over each other. But all is
> guesswork so far. And I am not even sure whether the bug is in the DRBG code
> base.
>
> Looking into the trace you sent, I see a NULL pointer dereference. At one
> point there is also the drbg_init_hash_kernel that is called. But nowhere I
> see any smoking gun.
>
> Could you please give me a description of the fault you are inducing?

Hi Stephan,

What do you mean by description of the fault?
It's kernel standard FAULT_INJECTION facility, it injects faults
mainly into kmalloc/slab_alloc (also in a bunch of other things, but
in this case this seems to be kmalloc). In the repro you can see that
it's injecting a fault into 8-th malloc in the setsockopt syscall.

I wonder why you can't reproduce it. I can trigger it reliably in a
qemu. Let's try this:
I have upstream kernel on b284d4d5a6785f8cd07eda2646a95782373cd01e.
Here is my config:
https://gist.githubusercontent.com/dvyukov/f843ea09bc5b9439a820c8e809a5501d/raw/ad330e9b6b710f57f63b61590747b48230e5cb61/gistfile1.txt
Here is the compiler:
https://storage.googleapis.com/syzkaller/gcc-8.0.1-20180301.tar.gz
Build as: make -jN CC=that/gcc/bin/gcc
Then I start qemu as:

qemu-system-x86_64 -hda wheezy.img -net
user,host=10.0.2.10,hostfwd=tcp::10022-:22 -net nic -nographic -kernel
arch/x86/boot/bzImage -append "kvm-intel.nested=1
kvm-intel.unrestricted_guest=1 kvm-intel.ept=1
kvm-intel.flexpriority=1 kvm-intel.vpid=1
kvm-intel.emulate_invalid_guest_state=1 kvm-intel.eptad=1
kvm-intel.enable_shadow_vmcs=1 kvm-intel.pml=1
kvm-intel.enable_apicv=1 console=ttyS0 root=/dev/sda
earlyprintk=serial slub_debug=UZ vsyscall=native rodata=n oops=panic
panic_on_warn=1 panic=86400" -enable-kvm -pidfile vm_pid -m 2G -smp 4
-cpu host

You can find the wheezy.img and ssh key for it here:
https://github.com/google/syzkaller/blob/master/docs/syzbot.md#crash-does-not-reproduce

Then I compile this program:
https://gist.githubusercontent.com/dvyukov/1dd75d55efd238e7207af1cc38478b3a/raw/403859b56b161a6fbb158e8953fac5bb6e73b1a1/gistfile1.txt
as: gcc prog.c -static -m32

Run in the qemu and within a minute it gives me the crash.

Re: [PATCH] crypto: DRBG - guard uninstantion by lock

2018-04-11 Thread Dmitry Vyukov

On Wed, Apr 11, 2018 at 4:26 PM, Stephan Müller  wrote:
> Hi Dimitry,
>
> This fix prevents the kernel from crashing when injecting the fault.

Good!

> Stack traces are yet shown but I guess that is expected every time
> a fault is injected.

Yes, nothing to fix here.

> As to why KASAN did not notice this one, I am not sure. Maybe it is
> because I use two buffer pointers to point to (almost) the same memory
> (one that is aligned and one pointing to the complete buffer)?

After looking at the fix, I figured out what happened with KASAN.
Filed https://bugzilla.kernel.org/show_bug.cgi?id=199359. In short,
tricky interplay between kzfree, ksize and double-free detection.
If KASAN worked as intended it would give a nice "double-free in this
stack for object allocated in this stack and previously freed in this
stack", which would probably make debugging much simpler.


> ---8<---
>
> During freeing of the internal buffers used by the DRBG, set the pointer
> to NULL. It is possible that the context with the freed buffers is
> reused. In case of an error during initialization where the pointers
> do not yet point to allocated memory, the NULL value prevents a double
> free.
>
> Signed-off-by: Stephan Mueller 
> Reported-by: syzbot+75397ee3df5c70164...@syzkaller.appspotmail.com
> ---
>  crypto/drbg.c | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/crypto/drbg.c b/crypto/drbg.c
> index 4faa2781c964..466a112a4446 100644
> --- a/crypto/drbg.c
> +++ b/crypto/drbg.c
> @@ -1134,8 +1134,10 @@ static inline void drbg_dealloc_state(struct 
> drbg_state *drbg)
> if (!drbg)
> return;
> kzfree(drbg->Vbuf);
> +   drbg->Vbuf = NULL;
> drbg->V = NULL;
> kzfree(drbg->Cbuf);
> +   drbg->Cbuf = NULL;
> drbg->C = NULL;
> kzfree(drbg->scratchpadbuf);
> drbg->scratchpadbuf = NULL;
> --
> 2.14.3
>
>
>
>
> --
> You received this message because you are subscribed to the Google Groups 
> "syzkaller-bugs" group.
> To unsubscribe from this group and stop receiving emails from it, send an 
> email to syzkaller-bugs+unsubscr...@googlegroups.com.
> To view this discussion on the web visit 
> https://groups.google.com/d/msgid/syzkaller-bugs/2186798.qrgUIDAn9S%40positron.chronox.de.
> For more options, visit https://groups.google.com/d/optout.

Re: [PATCH] AF_ALG: register completely initialized request in list

2018-04-09 Thread Dmitry Vyukov

On Sun, Apr 8, 2018 at 7:57 PM, Stephan Müller  wrote:
> Hi,
>
> May I ask to check whether this patch fixes the issue? I cannot re-create
> the issue with the reproducter. Yet, as far as I understand, you try to
> induce errors which shall validate whether the error code paths are correct.

You can ask syzbot to test by replying to its report email with a test
command, see:
https://github.com/google/syzkaller/blob/master/docs/syzbot.md#communication-with-syzbot

Note that all testing of KMSAN bugs needs to go to KMSAN tree, for details see:
https://github.com/google/syzkaller/blob/master/docs/syzbot.md#kmsan-bugs




> The fix below should ensure this now.
>
> Thanks a lot.
>
> ---8<---
>
> From 8f083e7b0684a9f91c186d7b46eec34e439689c3 Mon Sep 17 00:00:00 2001
> From: Stephan Mueller 
> Date: Sun, 8 Apr 2018 19:53:59 +0200
> Subject: [PATCH] AF_ALG: Initialize sg_num_bytes in error code path
>
> The RX SGL in processing is already registered with the RX SGL tracking
> list to support proper cleanup. The cleanup code path uses the
> sg_num_bytes variable which must therefore be always initialized, even
> in the error code path.
>
> Signed-off-by: Stephan Mueller 
> Reported-by: syzbot+9c251bdd09f83b92b...@syzkaller.appspotmail.com
> ---
>  crypto/af_alg.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/crypto/af_alg.c b/crypto/af_alg.c
> index c49766b03165..0d555c072669 100644
> --- a/crypto/af_alg.c
> +++ b/crypto/af_alg.c
> @@ -1156,8 +1156,10 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr 
> *msg, int flags,
>
> /* make one iovec available as scatterlist */
> err = af_alg_make_sg(>sgl, >msg_iter, seglen);
> -   if (err < 0)
> +   if (err < 0) {
> +   rsgl->sg_num_bytes = 0;
> return err;
> +   }
>
> /* chain the new scatterlist with previous one */
> if (areq->last_rsgl)
> --
> 2.14.3
>
>
>
>
>
> --
> You received this message because you are subscribed to the Google Groups 
> "syzkaller-bugs" group.
> To unsubscribe from this group and stop receiving emails from it, send an 
> email to syzkaller-bugs+unsubscr...@googlegroups.com.
> To view this discussion on the web visit 
> https://groups.google.com/d/msgid/syzkaller-bugs/3337259.MW9pfDCdka%40positron.chronox.de.
> For more options, visit https://groups.google.com/d/optout.

Re: [PATCH] crypto: DRBG - guard uninstantion by lock

2018-04-09 Thread Dmitry Vyukov

On Mon, Apr 9, 2018 at 7:40 AM, Stephan Mueller  wrote:
> Am Montag, 9. April 2018, 00:46:03 CEST schrieb Theodore Y. Ts'o:
>
> Hi Theodore,
>>
>> So the syzbot will run while the patch goes through the normal e-mail
>> review process, which is kind of neat.  :-)
>
> Thank you very much for the hint. That is a neat feature indeed.
>
> As I came late to the party and I missed the original mails, I am wondering
> about which GIT repo was used and which branch of it. With that, I would be
> happy to resubmit with the test line.

All syzbot reported bugs are available here:
https://groups.google.com/forum/#!searchin/syzkaller-bugs/"WARNING$20in$20kmem_cache_free;
and here:
https://syzkaller.appspot.com/

But unfortunately testing won't work in this case, because I manually
extracted a reproducer and syzbot does not know about it. This bug
seems to lead to assorted silent heap corruptions and different
manifestations each time, so it's difficult for syzbot to attribute a
reproducer to the bug. When we debug it, it would be nice to
understand why the heap corruption is silent and is not detected by
KASAN and anything else, to prevent such unpleasant cases in future.

I've tested it manually, but unfortunately kernel still crashed within a minute:

$ git status
HEAD detached at f2d285669aae
Changes not staged for commit:
  (use "git add ..." to update what will be committed)
  (use "git checkout -- ..." to discard changes in working directory)

modified:   crypto/drbg.c

$ git diff
diff --git a/crypto/drbg.c b/crypto/drbg.c
index 4faa2781c964..68c1949a253f 100644
--- a/crypto/drbg.c
+++ b/crypto/drbg.c
@@ -1510,8 +1510,8 @@ static int drbg_instantiate(struct drbg_state
*drbg, struct drbg_string *pers,
return ret;

 free_everything:
-   mutex_unlock(>drbg_mutex);
drbg_uninstantiate(drbg);
+   mutex_unlock(>drbg_mutex);
return ret;
 }

# ./a.out
...
[  183.647874] FAULT_INJECTION: forcing a failure.
[  183.647874] name failslab, interval 1, probability 0, space 0, times 0
[  183.648287] Call Trace:
[  183.648297]  dump_stack+0x1b9/0x29f
[  183.648306]  ? arch_local_irq_restore+0x52/0x52
[  183.648318]  ? __save_stack_trace+0x7e/0xd0
[  183.651848]  should_fail.cold.4+0xa/0x1a
[  183.652411]  ? fault_create_debugfs_attr+0x1f0/0x1f0
[  183.653138]  ? kasan_kmalloc+0xc4/0xe0
[  183.653694]  ? __kmalloc+0x14e/0x760
[  183.654206]  ? drbg_kcapi_seed+0x776/0x12e0
[  183.654798]  ? crypto_rng_reset+0x7c/0x130
[  183.655379]  ? rng_setkey+0x25/0x30
[  183.655882]  ? alg_setsockopt+0x306/0x3b0
[  183.656450]  ? graph_lock+0x170/0x170
[  183.656975]  ? entry_SYSENTER_compat+0x70/0x7f
[  183.657606]  ? find_held_lock+0x36/0x1c0
[  183.658164]  ? __lock_is_held+0xb5/0x140
[  183.658728]  ? check_same_owner+0x320/0x320
[  183.659321]  ? rcu_note_context_switch+0x710/0x710
[  183.66]  should_failslab+0x124/0x180
[  183.660561]  __kmalloc+0x2c8/0x760
[  183.661046]  ? graph_lock+0x170/0x170
[  183.661569]  ? drbg_kcapi_seed+0x882/0x12e0
[  183.662161]  drbg_kcapi_seed+0x882/0x12e0
[  183.662731]  ? drbg_seed+0x10a0/0x10a0
[  183.663267]  ? lock_downgrade+0x8e0/0x8e0
[  183.663833]  ? lock_acquire+0x1dc/0x520
[  183.664385]  ? lock_release+0xa10/0xa10
[  183.664934]  ? check_same_owner+0x320/0x320
[  183.665530]  ? __sanitizer_cov_trace_const_cmp8+0x18/0x20
[  183.666292]  ? __check_object_size+0x95/0x5d9
[  183.666904]  ? sock_kmalloc+0x14e/0x1d0
[  183.667444]  ? mark_held_locks+0xc9/0x160
[  183.668020]  ? __might_sleep+0x95/0x190
[  183.668567]  crypto_rng_reset+0x7c/0x130
[  183.669124]  rng_setkey+0x25/0x30
[  183.669598]  ? rng_sock_destruct+0x90/0x90
[  183.670176]  alg_setsockopt+0x306/0x3b0
[  183.670724]  __compat_sys_setsockopt+0x315/0x7c0
[  183.671375]  ? __compat_sys_getsockopt+0x7f0/0x7f0
[  183.672057]  ? __sanitizer_cov_trace_const_cmp4+0x16/0x20
[  183.672813]  ? ksys_write+0x1a6/0x250
[  183.67]  ? SyS_read+0x30/0x30
[  183.673811]  compat_SyS_setsockopt+0x34/0x50
[  183.674416]  ? scm_detach_fds_compat+0x440/0x440
[  183.675079]  do_fast_syscall_32+0x41f/0x10dc
[  183.675725]  ? do_page_fault+0xee/0x8a7
[  183.676284]  ? do_int80_syscall_32+0xa70/0xa70
[  183.676925]  ? trace_hardirqs_on_thunk+0x1a/0x1c
[  183.677590]  ? __sanitizer_cov_trace_const_cmp4+0x16/0x20
[  183.678348]  ? syscall_return_slowpath+0x30f/0x5c0
[  183.679026]  ? sysret32_from_system_call+0x5/0x3c
[  183.679694]  ? trace_hardirqs_off_thunk+0x1a/0x1c
[  183.680380]  entry_SYSENTER_compat+0x70/0x7f
[  183.681000] RIP: 0023:0xf7f0ecb9
[  183.681488] RSP: 002b:ffeb1e9c EFLAGS: 0296 ORIG_RAX:
016e
[  183.682606] RAX: ffda RBX: 0003 RCX: 0117
[  183.683620] RDX: 0001 RSI: 205b1fd0 RDI: 
[  183.684602] RBP: 2040 R08:  R09: 
[  183.685622] R10:  R11:  R12: 
[  183.686642] R13:  R14:

Re: WARNING: kernel stack frame pointer has bad value

2018-04-19 Thread Dmitry Vyukov

On Thu, Apr 19, 2018 at 5:57 PM, syzbot
 wrote:
> Hello,
>
> syzbot hit the following crash on upstream commit
> 48023102b7078a6674516b1fe0d639669336049d (Fri Apr 13 23:55:41 2018 +)
> Merge branch 'overlayfs-linus' of
> git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs
> syzbot dashboard link:
> https://syzkaller.appspot.com/bug?extid=37035ccfa9a0a017ffcf
>
> So far this crash happened 141 times on net-next, upstream.
> C reproducer: https://syzkaller.appspot.com/x/repro.c?id=5871698234572800
> syzkaller reproducer:
> https://syzkaller.appspot.com/x/repro.syz?id=5086177975599104
> Raw console output:
> https://syzkaller.appspot.com/x/log.txt?id=5110926181138432
> Kernel config:
> https://syzkaller.appspot.com/x/.config?id=-8852471259444315113
> compiler: gcc (GCC) 8.0.1 20180413 (experimental)

This seems to be related to keccakf_rndc, please see the "Raw console
output" link.
+crypto maintainers

> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> Reported-by: syzbot+37035ccfa9a0a017f...@syzkaller.appspotmail.com
> It will help syzbot understand when the bug is fixed. See footer for
> details.
> If you forward the report, please keep this part and the footer.
>
> ed8ccbe7: 00440169 (0x440169)
> 469f2a79: 0033 (0x33)
> 4636639d: 0246 (0x246)
> aa65aef8: 7ffead676158 (0x7ffead676158)
> e3ef297c: 002b (0x2b)
> WARNING: kernel stack frame pointer at 4832711f in
> syzkaller561281:4479 has bad value 6b4f8502
> WARNING: kernel stack regs at 89e11b3b in syzkaller561281:4479 has
> bad 'bp' value f19a2a3b
> random: crng init done
>
>
> ---
> This bug is generated by a dumb bot. It may contain errors.
> See https://goo.gl/tpsmEJ for details.
> Direct all questions to syzkal...@googlegroups.com.
>
> syzbot will keep track of this bug report.
> If you forgot to add the Reported-by tag, once the fix for this bug is
> merged
> into any tree, please reply to this email with:
> #syz fix: exact-commit-title
> If you want to test a patch for this bug, please reply with:
> #syz test: git://repo/address.git branch
> and provide the patch inline or as an attachment.
> To mark this as a duplicate of another syzbot report, please reply with:
> #syz dup: exact-subject-of-another-report
> If it's a one-off invalid bug report, please reply with:
> #syz invalid
> Note: if the crash happens again, it will cause creation of a new bug
> report.
> Note: all commands must start from beginning of the line in the email body.

Re: WARNING: kernel stack regs at (ptrval) in syzkaller has bad 'bp' value (ptrval)

2018-04-23 Thread Dmitry Vyukov

On Mon, Apr 23, 2018 at 12:10 PM, syzbot
 wrote:
> Hello,
>
> syzbot hit the following crash on upstream commit
> 5ec83b22a2dd13180762c89698e4e2c2881a423c (Sun Apr 22 19:13:04 2018 +)
> Merge tag '4.17-rc1-SMB3-CIFS' of git://git.samba.org/sfrench/cifs-2.6
> syzbot dashboard link:
> https://syzkaller.appspot.com/bug?extid=e073e4740cfbb3ae200b
>
> C reproducer: https://syzkaller.appspot.com/x/repro.c?id=6509307940044800
> syzkaller reproducer:
> https://syzkaller.appspot.com/x/repro.syz?id=6027105183727616
> Raw console output:
> https://syzkaller.appspot.com/x/log.txt?id=4763013630394368
> Kernel config:
> https://syzkaller.appspot.com/x/.config?id=1808800213120130118
> compiler: gcc (GCC) 8.0.1 20180413 (experimental)

There are 2 similar bugs reported:

https://syzkaller.appspot.com/bug?id=4fcc07e4e85fc6c1d5472fcc8ab5a35db01972dc
https://syzkaller.appspot.com/bug?id=bed7a7acabe08fe69a89a4225572911a32598651

They all seem to point to crypto/SHA3/K512_4.
+crypto maintainers

> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> Reported-by: syzbot+e073e4740cfbb3ae2...@syzkaller.appspotmail.com
> It will help syzbot understand when the bug is fixed. See footer for
> details.
> If you forward the report, please keep this part and the footer.
>
> (ptrval): 00445649 (0x445649)
> (ptrval): 0033 (0x33)
> (ptrval): 0216 (0x216)
> (ptrval): 7f155536eda8 (0x7f155536eda8)
> (ptrval): 002b (0x2b)
> WARNING: kernel stack regs at (ptrval) in syzkaller547737:4493 has
> bad 'bp' value (ptrval)
> WARNING: kernel stack frame pointer at (ptrval) in
> syzkaller547737:4496 has bad value (ptrval)
>
>
> ---
> This bug is generated by a dumb bot. It may contain errors.
> See https://goo.gl/tpsmEJ for details.
> Direct all questions to syzkal...@googlegroups.com.
>
> syzbot will keep track of this bug report.
> If you forgot to add the Reported-by tag, once the fix for this bug is
> merged
> into any tree, please reply to this email with:
> #syz fix: exact-commit-title
> If you want to test a patch for this bug, please reply with:
> #syz test: git://repo/address.git branch
> and provide the patch inline or as an attachment.
> To mark this as a duplicate of another syzbot report, please reply with:
> #syz dup: exact-subject-of-another-report
> If it's a one-off invalid bug report, please reply with:
> #syz invalid
> Note: if the crash happens again, it will cause creation of a new bug
> report.
> Note: all commands must start from beginning of the line in the email body.
>
> --
> You received this message because you are subscribed to the Google Groups
> "syzkaller-bugs" group.
> To unsubscribe from this group and stop receiving emails from it, send an
> email to syzkaller-bugs+unsubscr...@googlegroups.com.
> To view this discussion on the web visit
> https://groups.google.com/d/msgid/syzkaller-bugs/af09f3056a813d2d%40google.com.
> For more options, visit https://groups.google.com/d/optout.

71 matches

Mail list logo