On Thu, Sep 13, 2012 at 04:31:34PM -0400, Alan Stern wrote:
> On Thu, 13 Sep 2012, Don Zickus wrote:
>
> > Hi Alan,
> >
> > I adapted your patch to our 2.6.32 tree and the customer tested it without
> > success. The output panic is attached below. I will work on getting a
> > machine with the latest kernel to reproduce this problem so I don't waste
> > your time chasing something that might be fixed upstream.
> >
> > But if you could take a quick glance at the panic below to see if anything
> > comes to mind I would appreciate it.
>
> > The test failed on the first surprise removal of PCI devices. Last gasp
> > from the console is posted below. I would guess the faulting process was
> > reading a file under /proc/bus/usb. I should have a dump if more info is
> > needed.
>
> > ehci_hcd 0000:2c:00.0: HC died; cleaning up
> > ehci_hcd 0000:2c:00.0: force halt; handhake ffffc90000654024 00004000
> > 00004000 -> -19
> > ehci_hcd 0000:2c:00.0: HC died; cleaning up
> > ehci_hcd 0000:2c:00.0: remove, state 0
> > usb usb1: USB disconnect, device number 1
> > usb 1-1: USB disconnect, device number 2
> > usb 1-1.1: USB disconnect, device number 3
> > hub 4-1:1.0: unable to enumerate USB device on port 3
> > usb 1-1.3: USB disconnect, device number 4
> > usb 1-1.6: USB disconnect, device number 5
> > usb 1-1.6.1: USB disconnect, device number 6
> > ehci_hcd 0000:2c:00.0: USB bus 1 deregistered
> > hub 4-1:1.0: unable to enumerate USB device on port 3
> > general protection fault: 0000 [#1] SMP
> > last sysfs file:
> > /sys/devices/pci0000:00/0000:00:01.0/0000:01:00.0/0000:02:01.0/0000:3d:00.0/0000:3e:01.0/0000:66:00.0/usb3/3-1/3-1.6/3-1.6.1/3-1.6.1:1.2/input/input12/event12/uevent
> > CPU 0
> > Modules linked in: autofs4 sunrpc configfs cachefiles fscache(T) bonding
> > 8021q garp stp llc vhost_net macvtap macvlan tun uinput ipmi_devintf
> > ftmod(P)(U) ipmi_msghandler sg matroxfb(U) fosil(U) ext4 mbcache jbd2
> > raid1 sr_mod cdrom sd_mod(U) crc_t10dif usb_storage mpt2sas(U)
> > scsi_hbas(U) scsi_transport_sas raid_class igb(U) dca dm_mirror
> > dm_region_hash dm_log dm_mod ipv6 cxgb4 cxgb3 mdio libiscsi_tcp libiscsi
> > scsi_transport_iscsi [last unloaded: scsi_wait_scan]
> >
> > Pid: 32752, comm: cat Tainted: P --------------- T
> > 2.6.32-279.el6.bz849188.test01.x86_64 #1 Stratus ftServer 2700/G7LAY
> > RIP: 0010:[<ffffffff813b8167>] [<ffffffff813b8167>]
> > usb_device_dump+0x87/0xa70
>
> It would help to know what source statement that memory address
> corresponds to. Offhand I can't see any remaining races between
> usb_device_dump() and usb_disconnect().
I have added some in-depth analysis from our customer.
The problem is that the failing routine was called with a pointer to usbdev in
memory that has already been freed and overwritten with the pool poison pattern.
This causes an access violation on line 502:
470 /* This is a recursive function. Parameters:
471 * buffer - the user-space buffer to write data into
472 * nbytes - the maximum number of bytes to write
473 * skip_bytes - the number of bytes to skip before writing anything
474 * file_offset - the offset into the devices file on completion
475 * The caller must own the device lock.
476 */
477 static ssize_t usb_device_dump(char __user **buffer, size_t *nbytes,
478 loff_t *skip_bytes, loff_t *file_offset,
479 struct usb_device *usbdev, struct usb_bus
*bus,
480 int level, int index, int count)
481 {
482 int chix;
483 int ret, cnt = 0;
484 int parent_devnum = 0;
485 char *pages_start, *data_end, *speed;
486 unsigned int length;
487 ssize_t total_written = 0;
488
489 /* don't bother with anything else if we're not writing any data */
490 if (*nbytes <= 0)
491 return 0;
492
493 if (level > MAX_TOPO_LEVEL)
494 return 0;
495 /* allocate 2^1 pages = 8K (on i386);
496 * should be more than enough for one device */
497 pages_start = (char *)__get_free_pages(GFP_NOIO, 1);
498 if (!pages_start)
499 return -ENOMEM;
500
501 if (usbdev->parent && usbdev->parent->devnum != -1)
502 parent_devnum = usbdev->parent->devnum;
503 /*
504 * So the root hub's parent is 0 and any device that is
505 * plugged into the root hub has a parent of 0.
506 */
507 switch (usbdev->speed) {
crash> hex
output radix: 16 (hex)
crash> sym ffffffff813b8167
ffffffff813b8167 (t) usb_device_dump+0x87
../debug/kernel-2.6.32-279.el6/linux-2.6.32-279.el6.bz849188.test01.x86_64/drivers/usb/core/devices.c:
501
crash> struct -o usb_device | grep parent
[0x38] struct usb_device *parent;
Annotated listing...
crash> dis usb_device_dump | cut -f 2- -d' '
<usb_device_dump>: push %rbp
<usb_device_dump+0x1>: mov %rsp,%rbp
<usb_device_dump+0x4>: push %r15
<usb_device_dump+0x6>: push %r14
<usb_device_dump+0x8>: push %r13
<usb_device_dump+0xa>: push %r12
<usb_device_dump+0xc>: push %rbx
<usb_device_dump+0xd>: sub $0xa8,%rsp
<usb_device_dump+0x14>: nopl 0x0(%rax,%rax,1)
<usb_device_dump+0x19>: mov %rdi,-0x68(%rbp) <<< **buffer
<usb_device_dump+0x1d>: mov %rsi,-0x40(%rbp) <<< *nbytes
<usb_device_dump+0x21>: mov %rdx,-0x60(%rbp) <<< *skip_bytes
<usb_device_dump+0x25>: mov %rcx,-0x70(%rbp) <<< *file_offset
<usb_device_dump+0x29>: mov %r8,-0x50(%rbp) <<< *usbdev
<usb_device_dump+0x2d>: mov %r9,-0x58(%rbp) <<< *bus
<usb_device_dump+0x31>: cmpq $0x0,(%rsi) <<< #490
<usb_device_dump+0x35>: je 0xffffffff813b811d <usb_device_dump+0x3d>
<usb_device_dump+0x37>: cmpl $0x6,0x10(%rbp) <<< #493
<usb_device_dump+0x3b>: jle 0xffffffff813b8134 <usb_device_dump+0x54>
<usb_device_dump+0x3d>: xor %ebx,%ebx
<usb_device_dump+0x3f>: add $0xa8,%rsp
<usb_device_dump+0x46>: mov %rbx,%rax
<usb_device_dump+0x49>: pop %rbx
<usb_device_dump+0x4a>: pop %r12
<usb_device_dump+0x4c>: pop %r13
<usb_device_dump+0x4e>: pop %r14
<usb_device_dump+0x50>: pop %r15
<usb_device_dump+0x52>: leaveq
<usb_device_dump+0x53>: retq
<usb_device_dump+0x54>: mov $0x1,%esi <<< #497
<usb_device_dump+0x59>: mov $0x10,%edi
<usb_device_dump+0x5e>: mov $0xfffffffffffffff4,%rbx <<< -ENOMEM
<usb_device_dump+0x65>: callq 0xffffffff81124120 <__get_free_pages>
<usb_device_dump+0x6a>: test %rax,%rax <<< #498
<usb_device_dump+0x6d>: mov %rax,-0x98(%rbp) <<< pages_start
<usb_device_dump+0x74>: je 0xffffffff813b811f <usb_device_dump+0x3f>
<usb_device_dump+0x76>: mov -0x50(%rbp),%rdx <<< usbdev
<usb_device_dump+0x7a>: mov 0x38(%rdx),%rax <<< usbdev->parent
<usb_device_dump+0x7e>: test %rax,%rax <<< #501
<usb_device_dump+0x81>: je 0xffffffff813b8476 <usb_device_dump+0x396>
<usb_device_dump+0x87>: mov (%rax),%r8d <<< #502
<usb_device_dump+0x8a>: cmp $0xffffffff,%r8d
<usb_device_dump+0x8e>: je 0xffffffff813b8476 <usb_device_dump+0x396>
crash> bt
PID: 32752 TASK: ffff8801788a6a40 CPU: 0 COMMAND: "cat"
#0 [ffff88014bfebc70] die at ffffffff8100f22b
#1 [ffff88014bfebca0] do_general_protection at ffffffff815018c2
#2 [ffff88014bfebcd0] general_protection at ffffffff81501095
[exception RIP: usb_device_dump+0x87]
RIP: ffffffff813b8167 RSP: ffff88014bfebd88 RFLAGS: 00010202
RAX: 6b6b6b6b6b6b6b6b RBX: fffffffffffffff4 RCX: 0000000000000000
RDX: ffff88017d2eeba8 RSI: 0000000000000000 RDI: ffffea00052a7978
RBP: ffff88014bfebe58 R8: 0000000000000000 R9: 000000000004b828
R10: 0000000000000000 R11: 0000000000000030 R12: ffff880176c88cd8
R13: 000000000000133a R14: ffff88014bfebeb0 R15: ffff88014bfebe98
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
#3 [ffff88014bfebd80] usb_device_dump at ffffffff813b814a
#4 [ffff88014bfebe60] usb_device_read at ffffffff813b8c70
#5 [ffff88014bfebef0] vfs_read at ffffffff8117cdf5
#6 [ffff88014bfebf30] sys_read at ffffffff8117cfb1
#7 [ffff88014bfebf80] tracesys at ffffffff8100b2c8 (via system_call)
RIP: 0000003930cda360 RSP: 00007fffc8369c08 RFLAGS: 00000246
RAX: ffffffffffffffda RBX: ffffffff8100b2c8 RCX: ffffffffffffffff
RDX: 0000000000008000 RSI: 0000000002284000 RDI: 0000000000000005
RBP: 0000000002284000 R8: 0000003930f8dee8 R9: 0000000000000001
R10: 0000000000008fff R11: 0000000000000246 R12: ffffffffffff8000
R13: 0000000000000005 R14: 0000000000008000 R15: 0000000002284000
ORIG_RAX: 0000000000000000 CS: 0033 SS: 002b
Here's *usbdev based upon address in RDX...
crash> struct usb_device 0xffff88017d2eeba8
struct usb_device {
devnum = 0x6b6b6b6b,
devpath = "kkkkkkkkkkkkkkkk",
route = 0x6b6b6b6b,
state = 1802201963,
speed = 1802201963,
tt = 0x6b6b6b6b6b6b6b6b,
ttport = 0x6b6b6b6b,
toggle = {0x6b6b6b6b, 0x6b6b6b6b},
parent = 0x6b6b6b6b6b6b6b6b,
bus = 0x6b6b6b6b6b6b6b6b,
ep0 = {
desc = {
bLength = 0x6b,
bDescriptorType = 0x6b,
bEndpointAddress = 0x6b,
bmAttributes = 0x6b,
wMaxPacketSize = 0x6b6b,
bInterval = 0x6b,
bRefresh = 0x6b,
bSynchAddress = 0x6b
},
urb_list = {
next = 0x6b6b6b6b6b6b6b6b,
prev = 0x6b6b6b6b6b6b6b6b
},
hcpriv = 0x6b6b6b6b6b6b6b6b,
ep_dev = 0x6b6b6b6b6b6b6b6b,
ss_ep_comp = 0x6b6b6b6b6b6b6b6b,
extra = 0x6b6b6b6b6b6b6b6b <Address 0x6b6b6b6b6b6b6b6b out of bounds>,
extralen = 0x6b6b6b6b,
enabled = 0x6b6b6b6b
},
dev = {
parent = 0x6b6b6b6b6b6b6b6b,
p = 0x6b6b6b6b6b6b6b6b,
kobj = {
name = 0x6b6b6b6b6b6b6b6b <Address 0x6b6b6b6b6b6b6b6b out of bounds>,
entry = {
next = 0x6b6b6b6b6b6b6b6b,
prev = 0x6b6b6b6b6b6b6b6b
},
parent = 0x6b6b6b6b6b6b6b6b,
kset = 0x6b6b6b6b6b6b6b6b,
ktype = 0x6b6b6b6b6b6b6b6b,
sd = 0x6b6b6b6b6b6b6b6b,
kref = {
refcount = {
counter = 0x6b6b6b6b
}
},
state_initialized = 0x1,
state_in_sysfs = 0x1,
state_add_uevent_sent = 0x0,
state_remove_uevent_sent = 0x1,
uevent_suppress = 0x0
},
init_name = 0x6b6b6b6b6b6b6b6b <Address 0x6b6b6b6b6b6b6b6b out of bounds>,
type = 0x6b6b6b6b6b6b6b6b,
Thanks,
Don
--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html