Re: CVS commit: src/sys [freeze on boot]

2020-01-20 Thread Andrew Doran
Hi,

This also happened the last time I touched rw_downgrade(), and I backed out
the change then, but both times I don't see the bug.  I have some questions:

- Are you running DIAGNOSTIC and/or LOCKDEBUG?  I would be very interested
  to see what happens with a LOCKDEBUG kernel here.
- Do you have an ATI Radeon graphics chip?
- Are you using ZFS?

Thanks,
Andrew


On Mon, Jan 20, 2020 at 12:41:37PM +0900, Ryo ONODERA wrote:
> Hi,
> 
> After this commit, the kernel stalls just before root file system
> will be found on my NetBSD/amd64 laptop.
> 
> Reverting
> src/sys/kern/kern_rwlock.c to r1.60
> and
> src/sys/sys/rwlock.h to r1.12
> in latest -current tree and I can get the kernel that works like
> before.
> 
> And on another laptop, the problematic kernel stalls before root file
> system detection like my laptop.
> 
> It may be universal problem.
> 
> Could you take a look at this problem?
> 
> Thank you.
> 
> "Andrew Doran"  writes:
> 
> > Module Name:src
> > Committed By:   ad
> > Date:   Sun Jan 19 18:34:24 UTC 2020
> >
> > Modified Files:
> > src/sys/kern: kern_rwlock.c
> > src/sys/sys: rwlock.h
> >
> > Log Message:
> > Tidy rwlocks a bit, no functional change intended.  Mainly:
> >
> > - rw_downgrade(): do it in a for () loop like all the others.
> > - Explicitly carry around RW_NODEBUG - don't be lazy.
> > - Remove pointless macros.
> > - Don't make assertions conditional on LOCKDEBUG, there's no reason.
> > - Make space for a new flag bit (not added yet).
> >
> >
> > To generate a diff of this commit:
> > cvs rdiff -u -r1.60 -r1.61 src/sys/kern/kern_rwlock.c
> > cvs rdiff -u -r1.12 -r1.13 src/sys/sys/rwlock.h
> >
> > Please note that diffs are not public domain; they are subject to the
> > copyright notices on the relevant files.
> >
> > Modified files:
> >
> > Index: src/sys/kern/kern_rwlock.c
> > diff -u src/sys/kern/kern_rwlock.c:1.60 src/sys/kern/kern_rwlock.c:1.61
> > --- src/sys/kern/kern_rwlock.c:1.60 Sun Jan 12 18:37:10 2020
> > +++ src/sys/kern/kern_rwlock.c  Sun Jan 19 18:34:24 2020
> > @@ -1,4 +1,4 @@
> > -/* $NetBSD: kern_rwlock.c,v 1.60 2020/01/12 18:37:10 ad Exp $  */
> > +/* $NetBSD: kern_rwlock.c,v 1.61 2020/01/19 18:34:24 ad Exp $  */
> >  
> >  /*-
> >   * Copyright (c) 2002, 2006, 2007, 2008, 2009, 2019, 2020
> > @@ -39,7 +39,9 @@
> >   */
> >  
> >  #include 
> > -__KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.60 2020/01/12 18:37:10 ad 
> > Exp $");
> > +__KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.61 2020/01/19 18:34:24 ad 
> > Exp $");
> > +
> > +#include "opt_lockdebug.h"
> >  
> >  #define__RWLOCK_PRIVATE
> >  
> > @@ -63,58 +65,32 @@ __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.
> >   * LOCKDEBUG
> >   */
> >  
> > -#if defined(LOCKDEBUG)
> > -
> > -#defineRW_WANTLOCK(rw, op) 
> > \
> > -   LOCKDEBUG_WANTLOCK(RW_DEBUG_P(rw), (rw),\
> > -   (uintptr_t)__builtin_return_address(0), op == RW_READER);
> > -#defineRW_LOCKED(rw, op)   
> > \
> > -   LOCKDEBUG_LOCKED(RW_DEBUG_P(rw), (rw), NULL,\
> > -   (uintptr_t)__builtin_return_address(0), op == RW_READER);
> > -#defineRW_UNLOCKED(rw, op) 
> > \
> > -   LOCKDEBUG_UNLOCKED(RW_DEBUG_P(rw), (rw),\
> > -   (uintptr_t)__builtin_return_address(0), op == RW_READER);
> > -#defineRW_DASSERT(rw, cond)
> > \
> > -do {   
> > \
> > -   if (__predict_false(!(cond)))   \
> > -   rw_abort(__func__, __LINE__, rw, "assertion failed: " #cond);\
> > -} while (/* CONSTCOND */ 0);
> > -
> > -#else  /* LOCKDEBUG */
> > -
> > -#defineRW_WANTLOCK(rw, op) /* nothing */
> > -#defineRW_LOCKED(rw, op)   /* nothing */
> > -#defineRW_UNLOCKED(rw, op) /* nothing */
> > -#defineRW_DASSERT(rw, cond)/* nothing */
> > +#defineRW_DEBUG_P(rw)  (((rw)->rw_owner & RW_NODEBUG) == 0)
> >  
> > -#endif /* LOCKDEBUG */
> > +#defineRW_WANTLOCK(rw, op) \
> > +LOCKDEBUG_WANTLOCK(RW_DEBUG_P(rw), (rw), \
> > +(uintptr_t)__builtin_return_address(0), op == RW_READER);
> > +#defineRW_LOCKED(rw, op) \
> > +LOCKDEBUG_LOCKED(RW_DEBUG_P(rw), (rw), NULL, \
> > +(uintptr_t)__builtin_return_address(0), op == RW_READER);
> > +#defineRW_UNLOCKED(rw, op) \
> > +LOCKDEBUG_UNLOCKED(RW_DEBUG_P(rw), (rw), \
> > +(uintptr_t)__builtin_return_address(0), op == RW_READER);
> >  
> >  /*
> >   * DIAGNOSTIC
> >   */
> >  
> >  #if defined(DIAGNOSTIC)
> > -
> > -#defineRW_ASSERT(rw, cond) 
> > \
> > -do {   
> > \
> > -   if (__predict_false(!(cond))) 

Re: CVS commit: src/sys [freeze on boot]

2020-01-20 Thread Andrew Doran
Fix committed with sys/kern/kern_rwlock.c rev 1.62.  I didn't see the
problem as I am running with LOCKDEBUG.

Apologies for the disruption.

Andrew


Re: CVS commit: src/sys [freeze on boot]

2020-01-20 Thread Patrick Welche
On Mon, Jan 20, 2020 at 12:51:00PM +, Andrew Doran wrote:
> This also happened the last time I touched rw_downgrade(), and I backed out
> the change then, but both times I don't see the bug.  I have some questions:
> 
> - Are you running DIAGNOSTIC and/or LOCKDEBUG?  I would be very interested
>   to see what happens with a LOCKDEBUG kernel here.

One worked with the addition of LOCKDEBUG. The other didn't, but it seems
to be unrelated:

db{0}> show panic
Panic string: mutex_vector_enter,510: uninitialized lock (lock=0xbd012366609
0, from=8033dc9d)
bt
breakpoint() at netbsd:breakpoint+0x5
vpanic() at netbsd:vpanic+0x178
snprintf() at netbsd:snprintf
lockdebug_wantlock() at netbsd:lockdebug_wantlock+0x166
mutex_enter() at netbsd:mutex_enter+0x37c
ixgbe_getext() at netbsd:ixgbe_getext+0x1d
ixgbe_jcl_freeall.isra.0() at netbsd:ixgbe_jcl_freeall.isra.0+0xd6
ixgbe_jcl_destroy() at netbsd:ixgbe_jcl_destroy+0x14
ixgbe_free_receive_structures() at netbsd:ixgbe_free_receive_structures+0x11b
ixgbe_attach() at netbsd:ixgbe_attach+0x2b0a
config_attach_loc() at netbsd:config_attach_loc+0x1a8
config_found_sm_loc() at netbsd:config_found_sm_loc+0x4d
pci_probe_device() at netbsd:pci_probe_device+0x586
pci_enumerate_bus() at netbsd:pci_enumerate_bus+0x1b7
pcirescan() at netbsd:pcirescan+0x4e
pciattach() at netbsd:pciattach+0x186
config_attach_loc() at netbsd:config_attach_loc+0x1a8
config_found_sm_loc() at netbsd:config_found_sm_loc+0x4d
ppbattach() at netbsd:ppbattach+0x1c5
config_attach_loc() at netbsd:config_attach_loc+0x1a8
config_found_sm_loc() at netbsd:config_found_sm_loc+0x4d
pci_probe_device() at netbsd:pci_probe_device+0x586
pci_enumerate_bus() at netbsd:pci_enumerate_bus+0x1b7
pcirescan() at netbsd:pcirescan+0x4e
pciattach() at netbsd:pciattach+0x186
config_attach_loc() at netbsd:config_attach_loc+0x1a8
config_found_sm_loc() at netbsd:config_found_sm_loc+0x4d
ppbattach() at netbsd:ppbattach+0x1c5
config_attach_loc() at netbsd:config_attach_loc+0x1a8
config_found_sm_loc() at netbsd:config_found_sm_loc+0x4d
pci_probe_device() at netbsd:pci_probe_device+0x586
pci_enumerate_bus() at netbsd:pci_enumerate_bus+0x1b7
pcirescan() at netbsd:pcirescan+0x4e
pciattach() at netbsd:pciattach+0x186
config_attach_loc() at netbsd:config_attach_loc+0x1a8
config_found_sm_loc() at netbsd:config_found_sm_loc+0x4d
ppbattach() at netbsd:ppbattach+0x1c5
config_attach_loc() at netbsd:config_attach_loc+0x1a8
config_found_sm_loc() at netbsd:config_found_sm_loc+0x4d
pci_probe_device() at netbsd:pci_probe_device+0x586
pci_enumerate_bus() at netbsd:pci_enumerate_bus+0x1b7
pcirescan() at netbsd:pcirescan+0x4e
pciattach() at netbsd:pciattach+0x186
config_attach_loc() at netbsd:config_attach_loc+0x1a8
config_found_sm_loc() at netbsd:config_found_sm_loc+0x4d
mp_pci_scan() at netbsd:mp_pci_scan+0xa4
amd64_mainbus_attach() at netbsd:amd64_mainbus_attach+0x237
mainbus_attach() at netbsd:mainbus_attach+0x70
config_attach_loc() at netbsd:config_attach_loc+0x1a8
cpu_configure() at netbsd:cpu_configure+0x2b
main() at netbsd:main+0x311

Cheers,

Patrick


Re: CVS commit: src/sys [freeze on boot]

2020-01-20 Thread Patrick Welche
On Mon, Jan 20, 2020 at 12:51:00PM +, Andrew Doran wrote:
> This also happened the last time I touched rw_downgrade(), and I backed out
> the change then, but both times I don't see the bug.  I have some questions:
> 
> - Are you running DIAGNOSTIC and/or LOCKDEBUG?  I would be very interested
>   to see what happens with a LOCKDEBUG kernel here.

I think that's basically it!
- custom without any of DIAGNOSTIC / DEBUG / LOCKDEBUG fails
- standard GENERIC with DIAGNOSTIC fails
- custom with all of DIAGNOSTIC / DEBUG / LOCKDEBUG boots!

> - Do you have an ATI Radeon graphics chip?
(of the two failing one with and one without)
> - Are you using ZFS?
no

I'll try addking LOCKDEBUG to the other one and see if that allows it to
boot too...

Cheers,

Patrick


Re: CVS commit: src/sys [freeze on boot]

2020-01-20 Thread Paul Goyette

On Mon, 20 Jan 2020, Patrick Welche wrote:


On Mon, Jan 20, 2020 at 12:51:00PM +, Andrew Doran wrote:

This also happened the last time I touched rw_downgrade(), and I backed out
the change then, but both times I don't see the bug.  I have some questions:

- Are you running DIAGNOSTIC and/or LOCKDEBUG?  I would be very interested
  to see what happens with a LOCKDEBUG kernel here.


Hmmm, at least on x86, in the LOCKDEBUG case we don't use the assembler
stubs;  we simply use the C versions.

On IRC/ICB, mlelstv has indicated there's something wrong in the stubs,
but I don't see it.


One worked with the addition of LOCKDEBUG. The other didn't, but it seems
to be unrelated:


Yeah, that backtrace looks unrelated.


++--+---+
| Paul Goyette   | PGP Key fingerprint: | E-mail addresses: |
| (Retired)  | FA29 0E3B 35AF E8AE 6651 | p...@whooppee.com |
| Software Developer | 0786 F758 55DE 53BA 7731 | pgoye...@netbsd.org   |
++--+---+


Re: CVS commit: src/sys [freeze on boot]

2020-01-20 Thread Andrew Doran
On Mon, Jan 20, 2020 at 09:28:32AM -0800, Paul Goyette wrote:

> On Mon, 20 Jan 2020, Patrick Welche wrote:
> 
> > On Mon, Jan 20, 2020 at 12:51:00PM +, Andrew Doran wrote:
> > > This also happened the last time I touched rw_downgrade(), and I backed 
> > > out
> > > the change then, but both times I don't see the bug.  I have some 
> > > questions:
> > > 
> > > - Are you running DIAGNOSTIC and/or LOCKDEBUG?  I would be very interested
> > >   to see what happens with a LOCKDEBUG kernel here.
> 
> Hmmm, at least on x86, in the LOCKDEBUG case we don't use the assembler
> stubs;  we simply use the C versions.
> 
> On IRC/ICB, mlelstv has indicated there's something wrong in the stubs,
> but I don't see it.

Yup, I think it's the stubs choking on the RW_NODEBUG flag being set. 
Testing a change for that now.

Andrew

> 
> > One worked with the addition of LOCKDEBUG. The other didn't, but it seems
> > to be unrelated:
> 
> Yeah, that backtrace looks unrelated.
> 
> 
> ++--+---+
> | Paul Goyette   | PGP Key fingerprint: | E-mail addresses: |
> | (Retired)  | FA29 0E3B 35AF E8AE 6651 | p...@whooppee.com |
> | Software Developer | 0786 F758 55DE 53BA 7731 | pgoye...@netbsd.org   |
> ++--+---+


Re: CVS commit: src/sys [freeze on boot]

2020-01-20 Thread Patrick Welche
On Mon, Jan 20, 2020 at 04:12:45PM +, Patrick Welche wrote:
> On Mon, Jan 20, 2020 at 12:51:00PM +, Andrew Doran wrote:
> > This also happened the last time I touched rw_downgrade(), and I backed out
> > the change then, but both times I don't see the bug.  I have some questions:
> > 
> > - Are you running DIAGNOSTIC and/or LOCKDEBUG?  I would be very interested
> >   to see what happens with a LOCKDEBUG kernel here.
> 
> One worked with the addition of LOCKDEBUG. The other didn't, but it seems
> to be unrelated:
> 
> db{0}> show panic
> Panic string: mutex_vector_enter,510: uninitialized lock 
> (lock=0xbd012366609
> 0, from=8033dc9d)
> bt
> breakpoint() at netbsd:breakpoint+0x5
> vpanic() at netbsd:vpanic+0x178
> snprintf() at netbsd:snprintf
> lockdebug_wantlock() at netbsd:lockdebug_wantlock+0x166
> mutex_enter() at netbsd:mutex_enter+0x37c
> ixgbe_getext() at netbsd:ixgbe_getext+0x1d

ixgbe_getext does mutex_enter(>eh_mtx) but...

> ixgbe_jcl_freeall.isra.0() at netbsd:ixgbe_jcl_freeall.isra.0+0xd6
> ixgbe_jcl_destroy() at netbsd:ixgbe_jcl_destroy+0x14

... ixgbe_jc_destroy does mutex_destroy(>eh_mtx)


Re: CVS commit: src/sys [freeze on boot]

2020-01-20 Thread Andrew Doran
Thanks.  I can reproduce a hang on boot in qemu.  It's hanging starting
init, waiting on "needbuf".  Investigating now.

Andrew

On Mon, Jan 20, 2020 at 04:12:45PM +, Patrick Welche wrote:
> On Mon, Jan 20, 2020 at 12:51:00PM +, Andrew Doran wrote:
> > This also happened the last time I touched rw_downgrade(), and I backed out
> > the change then, but both times I don't see the bug.  I have some questions:
> > 
> > - Are you running DIAGNOSTIC and/or LOCKDEBUG?  I would be very interested
> >   to see what happens with a LOCKDEBUG kernel here.
> 
> One worked with the addition of LOCKDEBUG. The other didn't, but it seems
> to be unrelated:
> 
> db{0}> show panic
> Panic string: mutex_vector_enter,510: uninitialized lock 
> (lock=0xbd012366609
> 0, from=8033dc9d)
> bt
> breakpoint() at netbsd:breakpoint+0x5
> vpanic() at netbsd:vpanic+0x178
> snprintf() at netbsd:snprintf
> lockdebug_wantlock() at netbsd:lockdebug_wantlock+0x166
> mutex_enter() at netbsd:mutex_enter+0x37c
> ixgbe_getext() at netbsd:ixgbe_getext+0x1d
> ixgbe_jcl_freeall.isra.0() at netbsd:ixgbe_jcl_freeall.isra.0+0xd6
> ixgbe_jcl_destroy() at netbsd:ixgbe_jcl_destroy+0x14
> ixgbe_free_receive_structures() at netbsd:ixgbe_free_receive_structures+0x11b
> ixgbe_attach() at netbsd:ixgbe_attach+0x2b0a
> config_attach_loc() at netbsd:config_attach_loc+0x1a8
> config_found_sm_loc() at netbsd:config_found_sm_loc+0x4d
> pci_probe_device() at netbsd:pci_probe_device+0x586
> pci_enumerate_bus() at netbsd:pci_enumerate_bus+0x1b7
> pcirescan() at netbsd:pcirescan+0x4e
> pciattach() at netbsd:pciattach+0x186
> config_attach_loc() at netbsd:config_attach_loc+0x1a8
> config_found_sm_loc() at netbsd:config_found_sm_loc+0x4d
> ppbattach() at netbsd:ppbattach+0x1c5
> config_attach_loc() at netbsd:config_attach_loc+0x1a8
> config_found_sm_loc() at netbsd:config_found_sm_loc+0x4d
> pci_probe_device() at netbsd:pci_probe_device+0x586
> pci_enumerate_bus() at netbsd:pci_enumerate_bus+0x1b7
> pcirescan() at netbsd:pcirescan+0x4e
> pciattach() at netbsd:pciattach+0x186
> config_attach_loc() at netbsd:config_attach_loc+0x1a8
> config_found_sm_loc() at netbsd:config_found_sm_loc+0x4d
> ppbattach() at netbsd:ppbattach+0x1c5
> config_attach_loc() at netbsd:config_attach_loc+0x1a8
> config_found_sm_loc() at netbsd:config_found_sm_loc+0x4d
> pci_probe_device() at netbsd:pci_probe_device+0x586
> pci_enumerate_bus() at netbsd:pci_enumerate_bus+0x1b7
> pcirescan() at netbsd:pcirescan+0x4e
> pciattach() at netbsd:pciattach+0x186
> config_attach_loc() at netbsd:config_attach_loc+0x1a8
> config_found_sm_loc() at netbsd:config_found_sm_loc+0x4d
> ppbattach() at netbsd:ppbattach+0x1c5
> config_attach_loc() at netbsd:config_attach_loc+0x1a8
> config_found_sm_loc() at netbsd:config_found_sm_loc+0x4d
> pci_probe_device() at netbsd:pci_probe_device+0x586
> pci_enumerate_bus() at netbsd:pci_enumerate_bus+0x1b7
> pcirescan() at netbsd:pcirescan+0x4e
> pciattach() at netbsd:pciattach+0x186
> config_attach_loc() at netbsd:config_attach_loc+0x1a8
> config_found_sm_loc() at netbsd:config_found_sm_loc+0x4d
> mp_pci_scan() at netbsd:mp_pci_scan+0xa4
> amd64_mainbus_attach() at netbsd:amd64_mainbus_attach+0x237
> mainbus_attach() at netbsd:mainbus_attach+0x70
> config_attach_loc() at netbsd:config_attach_loc+0x1a8
> cpu_configure() at netbsd:cpu_configure+0x2b
> main() at netbsd:main+0x311
> 
> Cheers,
> 
> Patrick


Re: CVS commit: src/sys [freeze on boot]

2020-01-20 Thread Paul Goyette

On Mon, 20 Jan 2020, Andrew Doran wrote:


Hi,

This also happened the last time I touched rw_downgrade(), and I backed out
the change then, but both times I don't see the bug.  I have some questions:

- Are you running DIAGNOSTIC and/or LOCKDEBUG?  I would be very interested
 to see what happens with a LOCKDEBUG kernel here.


I am running a stock GENERIC kernel, as part of an ``anita install''
operation.  Since it can't boot the install process, it's not possible
to use a custom kernel.  AFAIR, GENERIC includes DIAGNOSTIC but does
not include LOCKDEBUG.


- Do you have an ATI Radeon graphics chip?


The qemu install uses serial console.  It does emulate some graphics
card, but not sure which one.


- Are you using ZFS?


Definitely not!


++--+---+
| Paul Goyette   | PGP Key fingerprint: | E-mail addresses: |
| (Retired)  | FA29 0E3B 35AF E8AE 6651 | p...@whooppee.com |
| Software Developer | 0786 F758 55DE 53BA 7731 | pgoye...@netbsd.org   |
++--+---+


Re: CVS commit: src/sys [freeze on boot]

2020-01-20 Thread Ryo ONODERA
Ryo ONODERA  writes:

> Hi,
>
> Andrew Doran  writes:
>
>> Hi,
>>
>> This also happened the last time I touched rw_downgrade(), and I backed out
>> the change then, but both times I don't see the bug.  I have some questions:
>>
>> - Are you running DIAGNOSTIC and/or LOCKDEBUG?  I would be very interested
>>   to see what happens with a LOCKDEBUG kernel here.
>
> I will enable LOCKDEBUG and DIAGNOSTIC soon.

Sadly DIAGNOSTIC and LOCKDEBUG with i915drmkms(4) makes my LCD black
and I cannot see any messages.
When i915drmkms is disabled, the kernel boots without freeze in
DIAGNOSTIC and LOCKDEBUG case.

However ioctl(2) to ims(4) causes kernel panic.
I feel that this panic is not related to the boot freeze.
See:
$ crash -M netbsd.10.core -N netbsd.10
Crash version 9.99.39, image version 9.99.39.
System panicked: kernel diagnostic assertion "ci->ci_mtx_count == -1" failed: 
file "/usr/src/sys/kern/kern_synch.c", line 676 mi_switch: cpu0: ci_mtx_count 
(-2) != -1 (block with spin-mutex held)
Backtrace from time of crash is available.
crash> bt
_KERNEL_OPT_NARCNET() at 0
_KERNEL_OPT_ACPI_SCANPCI() at _KERNEL_OPT_ACPI_SCANPCI
sys_reboot() at sys_reboot
vpanic() at vpanic+0x181
kern_assert() at kern_assert+0x48
mi_switch() at mi_switch+0x9b8
sleepq_block() at sleepq_block+0x1cb
turnstile_block() at turnstile_block+0x5bd
mutex_enter() at mutex_enter+0x31d
iic_acquire_bus() at iic_acquire_bus+0x2a
ihidev_softintr() at ihidev_softintr+0x27
softint_dispatch() at softint_dispatch+0xdb
DDB lost frame for Xsoftintr+0x4f, trying 0xda8138ef00f0
Xsoftintr() at Xsoftintr+0x4f
--- interrupt ---
1c6f7f77b9463525:
crash>

>> - Do you have an ATI Radeon graphics chip?
>> - Are you using ZFS?
>
> My GPU is in Intel CPU (KabyLake Refresh).
> And I do not use ZFS at all. All partitions are FFSv2 with WAPBL.
>
>> Thanks,
>> Andrew
>>
>>
>> On Mon, Jan 20, 2020 at 12:41:37PM +0900, Ryo ONODERA wrote:
>>> Hi,
>>> 
>>> After this commit, the kernel stalls just before root file system
>>> will be found on my NetBSD/amd64 laptop.
>>> 
>>> Reverting
>>> src/sys/kern/kern_rwlock.c to r1.60
>>> and
>>> src/sys/sys/rwlock.h to r1.12
>>> in latest -current tree and I can get the kernel that works like
>>> before.
>>> 
>>> And on another laptop, the problematic kernel stalls before root file
>>> system detection like my laptop.
>>> 
>>> It may be universal problem.
>>> 
>>> Could you take a look at this problem?
>>> 
>>> Thank you.
>>> 
>>> "Andrew Doran"  writes:
>>> 
>>> > Module Name:  src
>>> > Committed By: ad
>>> > Date: Sun Jan 19 18:34:24 UTC 2020
>>> >
>>> > Modified Files:
>>> >   src/sys/kern: kern_rwlock.c
>>> >   src/sys/sys: rwlock.h
>>> >
>>> > Log Message:
>>> > Tidy rwlocks a bit, no functional change intended.  Mainly:
>>> >
>>> > - rw_downgrade(): do it in a for () loop like all the others.
>>> > - Explicitly carry around RW_NODEBUG - don't be lazy.
>>> > - Remove pointless macros.
>>> > - Don't make assertions conditional on LOCKDEBUG, there's no reason.
>>> > - Make space for a new flag bit (not added yet).
>>> >
>>> >
>>> > To generate a diff of this commit:
>>> > cvs rdiff -u -r1.60 -r1.61 src/sys/kern/kern_rwlock.c
>>> > cvs rdiff -u -r1.12 -r1.13 src/sys/sys/rwlock.h
>>> >
>>> > Please note that diffs are not public domain; they are subject to the
>>> > copyright notices on the relevant files.
>>> >
>>> > Modified files:
>>> >
>>> > Index: src/sys/kern/kern_rwlock.c
>>> > diff -u src/sys/kern/kern_rwlock.c:1.60 src/sys/kern/kern_rwlock.c:1.61
>>> > --- src/sys/kern/kern_rwlock.c:1.60   Sun Jan 12 18:37:10 2020
>>> > +++ src/sys/kern/kern_rwlock.cSun Jan 19 18:34:24 2020
>>> > @@ -1,4 +1,4 @@
>>> > -/*   $NetBSD: kern_rwlock.c,v 1.60 2020/01/12 18:37:10 ad Exp $  
>>> > */
>>> > +/*   $NetBSD: kern_rwlock.c,v 1.61 2020/01/19 18:34:24 ad Exp $  
>>> > */
>>> >  
>>> >  /*-
>>> >   * Copyright (c) 2002, 2006, 2007, 2008, 2009, 2019, 2020
>>> > @@ -39,7 +39,9 @@
>>> >   */
>>> >  
>>> >  #include 
>>> > -__KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.60 2020/01/12 18:37:10 ad 
>>> > Exp $");
>>> > +__KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.61 2020/01/19 18:34:24 ad 
>>> > Exp $");
>>> > +
>>> > +#include "opt_lockdebug.h"
>>> >  
>>> >  #define  __RWLOCK_PRIVATE
>>> >  
>>> > @@ -63,58 +65,32 @@ __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.
>>> >   * LOCKDEBUG
>>> >   */
>>> >  
>>> > -#if defined(LOCKDEBUG)
>>> > -
>>> > -#define  RW_WANTLOCK(rw, op) 
>>> > \
>>> > - LOCKDEBUG_WANTLOCK(RW_DEBUG_P(rw), (rw),\
>>> > - (uintptr_t)__builtin_return_address(0), op == RW_READER);
>>> > -#define  RW_LOCKED(rw, op)   
>>> > \
>>> > - LOCKDEBUG_LOCKED(RW_DEBUG_P(rw), (rw), NULL,\
>>> > - (uintptr_t)__builtin_return_address(0), op == RW_READER);
>>> > -#define  RW_UNLOCKED(rw, op) 
>>> > \
>>> > - 

Re: CVS commit: src/sys [freeze on boot]

2020-01-20 Thread Jason Thorpe



> On Jan 20, 2020, at 6:48 AM, Ryo ONODERA  wrote:
> 
> The black screen and ims(4) panic are not related to your change.
> Older src tree with LOCKDEBUG reproduces these problem.

I'll look at the ims(4) issuer.

-- thorpej



Re: CVS commit: src/sys [freeze on boot]

2020-01-20 Thread Patrick Welche
On Mon, Jan 20, 2020 at 12:51:00PM +, Andrew Doran wrote:
> This also happened the last time I touched rw_downgrade(), and I backed out
> the change then, but both times I don't see the bug.  I have some questions:

2 amd64 boxes, let's call them a) and b)

> - Are you running DIAGNOSTIC and/or LOCKDEBUG?  I would be very interested
  a) standard GENERIC kernel, so just DIAGNOSTIC
  b) neither(!)
>   to see what happens with a LOCKDEBUG kernel here.
  I'll try on b) in a minute
> - Do you have an ATI Radeon graphics chip?
  a) no: genfb0 at pci10 dev 3 function 0: Matrox MGA G200eW
  b) yes: vga0 at pci3 dev 0 function 0: ATI Technologies Radeon X300 (RV370)
> - Are you using ZFS?
  no (a nor b)

Cheers,

Patrick


Re: CVS commit: src/sys [freeze on boot]

2020-01-20 Thread Ryo ONODERA
Hi,

Andrew Doran  writes:

> Hi,
>
> This also happened the last time I touched rw_downgrade(), and I backed out
> the change then, but both times I don't see the bug.  I have some questions:
>
> - Are you running DIAGNOSTIC and/or LOCKDEBUG?  I would be very interested
>   to see what happens with a LOCKDEBUG kernel here.

I will enable LOCKDEBUG and DIAGNOSTIC soon.

> - Do you have an ATI Radeon graphics chip?
> - Are you using ZFS?

My GPU is in Intel CPU (KabyLake Refresh).
And I do not use ZFS at all. All partitions are FFSv2 with WAPBL.

> Thanks,
> Andrew
>
>
> On Mon, Jan 20, 2020 at 12:41:37PM +0900, Ryo ONODERA wrote:
>> Hi,
>> 
>> After this commit, the kernel stalls just before root file system
>> will be found on my NetBSD/amd64 laptop.
>> 
>> Reverting
>> src/sys/kern/kern_rwlock.c to r1.60
>> and
>> src/sys/sys/rwlock.h to r1.12
>> in latest -current tree and I can get the kernel that works like
>> before.
>> 
>> And on another laptop, the problematic kernel stalls before root file
>> system detection like my laptop.
>> 
>> It may be universal problem.
>> 
>> Could you take a look at this problem?
>> 
>> Thank you.
>> 
>> "Andrew Doran"  writes:
>> 
>> > Module Name:   src
>> > Committed By:  ad
>> > Date:  Sun Jan 19 18:34:24 UTC 2020
>> >
>> > Modified Files:
>> >src/sys/kern: kern_rwlock.c
>> >src/sys/sys: rwlock.h
>> >
>> > Log Message:
>> > Tidy rwlocks a bit, no functional change intended.  Mainly:
>> >
>> > - rw_downgrade(): do it in a for () loop like all the others.
>> > - Explicitly carry around RW_NODEBUG - don't be lazy.
>> > - Remove pointless macros.
>> > - Don't make assertions conditional on LOCKDEBUG, there's no reason.
>> > - Make space for a new flag bit (not added yet).
>> >
>> >
>> > To generate a diff of this commit:
>> > cvs rdiff -u -r1.60 -r1.61 src/sys/kern/kern_rwlock.c
>> > cvs rdiff -u -r1.12 -r1.13 src/sys/sys/rwlock.h
>> >
>> > Please note that diffs are not public domain; they are subject to the
>> > copyright notices on the relevant files.
>> >
>> > Modified files:
>> >
>> > Index: src/sys/kern/kern_rwlock.c
>> > diff -u src/sys/kern/kern_rwlock.c:1.60 src/sys/kern/kern_rwlock.c:1.61
>> > --- src/sys/kern/kern_rwlock.c:1.60Sun Jan 12 18:37:10 2020
>> > +++ src/sys/kern/kern_rwlock.c Sun Jan 19 18:34:24 2020
>> > @@ -1,4 +1,4 @@
>> > -/*$NetBSD: kern_rwlock.c,v 1.60 2020/01/12 18:37:10 ad Exp $  
>> > */
>> > +/*$NetBSD: kern_rwlock.c,v 1.61 2020/01/19 18:34:24 ad Exp $  
>> > */
>> >  
>> >  /*-
>> >   * Copyright (c) 2002, 2006, 2007, 2008, 2009, 2019, 2020
>> > @@ -39,7 +39,9 @@
>> >   */
>> >  
>> >  #include 
>> > -__KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.60 2020/01/12 18:37:10 ad 
>> > Exp $");
>> > +__KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.61 2020/01/19 18:34:24 ad 
>> > Exp $");
>> > +
>> > +#include "opt_lockdebug.h"
>> >  
>> >  #define   __RWLOCK_PRIVATE
>> >  
>> > @@ -63,58 +65,32 @@ __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.
>> >   * LOCKDEBUG
>> >   */
>> >  
>> > -#if defined(LOCKDEBUG)
>> > -
>> > -#define   RW_WANTLOCK(rw, op) 
>> > \
>> > -  LOCKDEBUG_WANTLOCK(RW_DEBUG_P(rw), (rw),\
>> > -  (uintptr_t)__builtin_return_address(0), op == RW_READER);
>> > -#define   RW_LOCKED(rw, op)   
>> > \
>> > -  LOCKDEBUG_LOCKED(RW_DEBUG_P(rw), (rw), NULL,\
>> > -  (uintptr_t)__builtin_return_address(0), op == RW_READER);
>> > -#define   RW_UNLOCKED(rw, op) 
>> > \
>> > -  LOCKDEBUG_UNLOCKED(RW_DEBUG_P(rw), (rw),\
>> > -  (uintptr_t)__builtin_return_address(0), op == RW_READER);
>> > -#define   RW_DASSERT(rw, cond)
>> > \
>> > -do {  
>> > \
>> > -  if (__predict_false(!(cond)))   \
>> > -  rw_abort(__func__, __LINE__, rw, "assertion failed: " #cond);\
>> > -} while (/* CONSTCOND */ 0);
>> > -
>> > -#else /* LOCKDEBUG */
>> > -
>> > -#define   RW_WANTLOCK(rw, op) /* nothing */
>> > -#define   RW_LOCKED(rw, op)   /* nothing */
>> > -#define   RW_UNLOCKED(rw, op) /* nothing */
>> > -#define   RW_DASSERT(rw, cond)/* nothing */
>> > +#define   RW_DEBUG_P(rw)  (((rw)->rw_owner & RW_NODEBUG) == 0)
>> >  
>> > -#endif/* LOCKDEBUG */
>> > +#define   RW_WANTLOCK(rw, op) \
>> > +LOCKDEBUG_WANTLOCK(RW_DEBUG_P(rw), (rw), \
>> > +(uintptr_t)__builtin_return_address(0), op == RW_READER);
>> > +#define   RW_LOCKED(rw, op) \
>> > +LOCKDEBUG_LOCKED(RW_DEBUG_P(rw), (rw), NULL, \
>> > +(uintptr_t)__builtin_return_address(0), op == RW_READER);
>> > +#define   RW_UNLOCKED(rw, op) \
>> > +LOCKDEBUG_UNLOCKED(RW_DEBUG_P(rw), (rw), \
>> > +(uintptr_t)__builtin_return_address(0), op 

Re: CVS commit: src/sys [freeze on boot]

2020-01-20 Thread Chavdar Ivanov
On Mon, 20 Jan 2020 at 13:06, Ryo ONODERA  wrote:
>
> Hi,
>
> Andrew Doran  writes:
>
> > Hi,
> >
> > This also happened the last time I touched rw_downgrade(), and I backed out
> > the change then, but both times I don't see the bug.  I have some questions:
> >
> > - Are you running DIAGNOSTIC and/or LOCKDEBUG?  I would be very interested
> >   to see what happens with a LOCKDEBUG kernel here.
>
> I will enable LOCKDEBUG and DIAGNOSTIC soon.
>
> > - Do you have an ATI Radeon graphics chip?
> > - Are you using ZFS?
>
> My GPU is in Intel CPU (KabyLake Refresh).
> And I do not use ZFS at all. All partitions are FFSv2 with WAPBL.

If it may be of any help, the laptop I have which freezes upon
discovering the root device (dk1 in my case)

- has a Radeon graphics, which is disabled in boot.cfg (hardware fault
- if I do not disable it, the system gets reset immediately).
- I am using ZFS (two pools, one holding zvols for qemu-nvmm guess and
some iSCSI targets, the other a few zfs folders, especially the
CCACHE_DIR target gets used a lot)
- Also the root is on a GPT disk, EFI boot.

>
> > Thanks,
> > Andrew
> >
> >
> > On Mon, Jan 20, 2020 at 12:41:37PM +0900, Ryo ONODERA wrote:
> >> Hi,
> >>
> >> After this commit, the kernel stalls just before root file system
> >> will be found on my NetBSD/amd64 laptop.
> >>
> >> Reverting
> >> src/sys/kern/kern_rwlock.c to r1.60
> >> and
> >> src/sys/sys/rwlock.h to r1.12
> >> in latest -current tree and I can get the kernel that works like
> >> before.
> >>
> >> And on another laptop, the problematic kernel stalls before root file
> >> system detection like my laptop.
> >>
> >> It may be universal problem.
> >>
> >> Could you take a look at this problem?
> >>
> >> Thank you.
> >>
> >> "Andrew Doran"  writes:
> >>
> >> > Module Name:   src
> >> > Committed By:  ad
> >> > Date:  Sun Jan 19 18:34:24 UTC 2020
> >> >
> >> > Modified Files:
> >> >src/sys/kern: kern_rwlock.c
> >> >src/sys/sys: rwlock.h
> >> >
> >> > Log Message:
> >> > Tidy rwlocks a bit, no functional change intended.  Mainly:
> >> >
> >> > - rw_downgrade(): do it in a for () loop like all the others.
> >> > - Explicitly carry around RW_NODEBUG - don't be lazy.
> >> > - Remove pointless macros.
> >> > - Don't make assertions conditional on LOCKDEBUG, there's no reason.
> >> > - Make space for a new flag bit (not added yet).
> >> >
> >> >
> >> > To generate a diff of this commit:
> >> > cvs rdiff -u -r1.60 -r1.61 src/sys/kern/kern_rwlock.c
> >> > cvs rdiff -u -r1.12 -r1.13 src/sys/sys/rwlock.h
> >> >
> >> > Please note that diffs are not public domain; they are subject to the
> >> > copyright notices on the relevant files.
> >> >
> >> > Modified files:
> >> >
> >> > Index: src/sys/kern/kern_rwlock.c
> >> > diff -u src/sys/kern/kern_rwlock.c:1.60 src/sys/kern/kern_rwlock.c:1.61
> >> > --- src/sys/kern/kern_rwlock.c:1.60Sun Jan 12 18:37:10 2020
> >> > +++ src/sys/kern/kern_rwlock.c Sun Jan 19 18:34:24 2020
> >> > @@ -1,4 +1,4 @@
> >> > -/*$NetBSD: kern_rwlock.c,v 1.60 2020/01/12 18:37:10 ad Exp $
> >> >   */
> >> > +/*$NetBSD: kern_rwlock.c,v 1.61 2020/01/19 18:34:24 ad Exp $
> >> >   */
> >> >
> >> >  /*-
> >> >   * Copyright (c) 2002, 2006, 2007, 2008, 2009, 2019, 2020
> >> > @@ -39,7 +39,9 @@
> >> >   */
> >> >
> >> >  #include 
> >> > -__KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.60 2020/01/12 18:37:10 ad 
> >> > Exp $");
> >> > +__KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.61 2020/01/19 18:34:24 ad 
> >> > Exp $");
> >> > +
> >> > +#include "opt_lockdebug.h"
> >> >
> >> >  #define   __RWLOCK_PRIVATE
> >> >
> >> > @@ -63,58 +65,32 @@ __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.
> >> >   * LOCKDEBUG
> >> >   */
> >> >
> >> > -#if defined(LOCKDEBUG)
> >> > -
> >> > -#define   RW_WANTLOCK(rw, op)   
> >> >   \
> >> > -  LOCKDEBUG_WANTLOCK(RW_DEBUG_P(rw), (rw),\
> >> > -  (uintptr_t)__builtin_return_address(0), op == RW_READER);
> >> > -#define   RW_LOCKED(rw, op) 
> >> >   \
> >> > -  LOCKDEBUG_LOCKED(RW_DEBUG_P(rw), (rw), NULL,\
> >> > -  (uintptr_t)__builtin_return_address(0), op == RW_READER);
> >> > -#define   RW_UNLOCKED(rw, op)   
> >> >   \
> >> > -  LOCKDEBUG_UNLOCKED(RW_DEBUG_P(rw), (rw),\
> >> > -  (uintptr_t)__builtin_return_address(0), op == RW_READER);
> >> > -#define   RW_DASSERT(rw, cond)  
> >> >   \
> >> > -do {
> >> >   \
> >> > -  if (__predict_false(!(cond)))   \
> >> > -  rw_abort(__func__, __LINE__, rw, "assertion failed: " #cond);\
> >> > -} while (/* CONSTCOND */ 0);
> >> > -
> >> > -#else /* LOCKDEBUG */
> >> > -
> >> > -#define   RW_WANTLOCK(rw, op) /* nothing */
> >> > 

Re: CVS commit: src/sys [freeze on boot]

2020-01-20 Thread Ryo ONODERA
Ryo ONODERA  writes:

> Ryo ONODERA  writes:
>
>> Hi,
>>
>> Andrew Doran  writes:
>>
>>> Hi,
>>>
>>> This also happened the last time I touched rw_downgrade(), and I backed out
>>> the change then, but both times I don't see the bug.  I have some questions:
>>>
>>> - Are you running DIAGNOSTIC and/or LOCKDEBUG?  I would be very interested
>>>   to see what happens with a LOCKDEBUG kernel here.
>>
>> I will enable LOCKDEBUG and DIAGNOSTIC soon.
>
> Sadly DIAGNOSTIC and LOCKDEBUG with i915drmkms(4) makes my LCD black
> and I cannot see any messages.
> When i915drmkms is disabled, the kernel boots without freeze in
> DIAGNOSTIC and LOCKDEBUG case.
>
> However ioctl(2) to ims(4) causes kernel panic.
> I feel that this panic is not related to the boot freeze.
> See:
> $ crash -M netbsd.10.core -N netbsd.10
> Crash version 9.99.39, image version 9.99.39.
> System panicked: kernel diagnostic assertion "ci->ci_mtx_count == -1" failed: 
> file "/usr/src/sys/kern/kern_synch.c", line 676 mi_switch: cpu0: ci_mtx_count 
> (-2) != -1 (block with spin-mutex held)
> Backtrace from time of crash is available.
> crash> bt
> _KERNEL_OPT_NARCNET() at 0
> _KERNEL_OPT_ACPI_SCANPCI() at _KERNEL_OPT_ACPI_SCANPCI
> sys_reboot() at sys_reboot
> vpanic() at vpanic+0x181
> kern_assert() at kern_assert+0x48
> mi_switch() at mi_switch+0x9b8
> sleepq_block() at sleepq_block+0x1cb
> turnstile_block() at turnstile_block+0x5bd
> mutex_enter() at mutex_enter+0x31d
> iic_acquire_bus() at iic_acquire_bus+0x2a
> ihidev_softintr() at ihidev_softintr+0x27
> softint_dispatch() at softint_dispatch+0xdb
> DDB lost frame for Xsoftintr+0x4f, trying 0xda8138ef00f0
> Xsoftintr() at Xsoftintr+0x4f
> --- interrupt ---
> 1c6f7f77b9463525:
> crash>

The black screen and ims(4) panic are not related to your change.
Older src tree with LOCKDEBUG reproduces these problem.

Thank you.

>>> - Do you have an ATI Radeon graphics chip?
>>> - Are you using ZFS?
>>
>> My GPU is in Intel CPU (KabyLake Refresh).
>> And I do not use ZFS at all. All partitions are FFSv2 with WAPBL.
>>
>>> Thanks,
>>> Andrew
>>>
>>>
>>> On Mon, Jan 20, 2020 at 12:41:37PM +0900, Ryo ONODERA wrote:
 Hi,
 
 After this commit, the kernel stalls just before root file system
 will be found on my NetBSD/amd64 laptop.
 
 Reverting
 src/sys/kern/kern_rwlock.c to r1.60
 and
 src/sys/sys/rwlock.h to r1.12
 in latest -current tree and I can get the kernel that works like
 before.
 
 And on another laptop, the problematic kernel stalls before root file
 system detection like my laptop.
 
 It may be universal problem.
 
 Could you take a look at this problem?
 
 Thank you.
 
-- 
Ryo ONODERA // r...@tetera.org
PGP fingerprint = 82A2 DC91 76E0 A10A 8ABB  FD1B F404 27FA C7D1 15F3


Re: CVS commit: src/sys [freeze on boot]

2020-01-20 Thread Ryo ONODERA
Hi,

Jason Thorpe  writes:

>> On Jan 20, 2020, at 6:48 AM, Ryo ONODERA  wrote:
>> 
>> The black screen and ims(4) panic are not related to your change.
>> Older src tree with LOCKDEBUG reproduces these problem.
>
> I'll look at the ims(4) issuer.

Thank you very much.
I can test any patch.

> -- thorpej
>

-- 
Ryo ONODERA // r...@tetera.org
PGP fingerprint = 82A2 DC91 76E0 A10A 8ABB  FD1B F404 27FA C7D1 15F3


Re: CVS commit: src/sys [freeze on boot]

2020-01-20 Thread Christos Zoulas
In article <20200120185023.gd28...@homeworld.netbsd.org>,
Andrew Doran   wrote:
>Fix committed with sys/kern/kern_rwlock.c rev 1.62.  I didn't see the
>problem as I am running with LOCKDEBUG.
>
>Apologies for the disruption.

FYI: powerpc/arm do not build anymore...

http://releng.netbsd.org/builds/HEAD/202001201020Z/

christos



Re: CVS commit: src/sys [freeze on boot]

2020-01-20 Thread Jason Thorpe


> On Jan 20, 2020, at 3:44 PM, Christos Zoulas  wrote:
> 
> In article <20200120185023.gd28...@homeworld.netbsd.org>,
> Andrew Doran   wrote:
>> Fix committed with sys/kern/kern_rwlock.c rev 1.62.  I didn't see the
>> problem as I am running with LOCKDEBUG.
>> 
>> Apologies for the disruption.
> 
> FYI: powerpc/arm do not build anymore...

This should fix the powerpc problem:

Index: lock_stubs.S
===
RCS file: /cvsroot/src/sys/arch/powerpc/powerpc/lock_stubs.S,v
retrieving revision 1.10
diff -u -p -r1.10 lock_stubs.S
--- lock_stubs.S28 Feb 2014 05:38:15 -  1.10
+++ lock_stubs.S21 Jan 2020 04:09:26 -
@@ -101,8 +101,8 @@ ENTRY(mutex_exit)
 /*
  * void rw_enter(krwlock_t *krw, krw_t op);
  */
-#if RW_READ_INCR != 16
-#error RW_READ_INCR != 16, clrrXi need fixing
+#if RW_READ_INCR != 32
+#error RW_READ_INCR != 32, clrrXi need fixing
 #endif
 #if RW_OWNER != 0
 #error RW_OWNER != 0, ldptr should be ldptru
@@ -115,7 +115,7 @@ ENTRY(rw_enter)
bne-1f
 
ldptr   %r9,RW_OWNER(%r3)
-   clrrptri %r9,%r9,4  /* clear low 4 bits */
+   clrrptri %r9,%r9,5  /* clear low 5 bits */
addi%r7,%r9,RW_READ_INCR
b   2f
 1:
@@ -140,7 +140,7 @@ ENTRY(rw_tryenter)
bne-1f
 
ldptr   %r9,RW_OWNER(%r3)
-   clrrptri %r9,%r9,4  /* clear low 4 bits */
+   clrrptri %r9,%r9,5  /* clear low 5 bits */
addi%r7,%r9,RW_READ_INCR
b   2f
 
@@ -169,7 +169,7 @@ ENTRY(rw_exit)
andi.   %r0,%r9,RW_WRITE_LOCKED
bne-1f
 
-   clrrptri. %r9,%r9,4 /* clear low 4 bits */
+   clrrptri. %r9,%r9,5 /* clear low 5 bits */
beq-3f  /* if 0, no reader, go slow */
 
addi%r7,%r9,-RW_READ_INCR

> 
> http://releng.netbsd.org/builds/HEAD/202001201020Z/
> 
> christos
> 

-- thorpej



Re: CVS commit: src/sys [freeze on boot]

2020-01-20 Thread Ryo ONODERA
Hi,

Thanks for your quick fix.
It works fine for my laptop now.


On January 21, 2020 3:50:23 AM GMT+09:00, Andrew Doran  wrote:
>Fix committed with sys/kern/kern_rwlock.c rev 1.62.  I didn't see the
>problem as I am running with LOCKDEBUG.
>
>Apologies for the disruption.
>
>Andrew

-- 
Ryo ONODERA // r...@tetera.org
PGP fingerprint = 82A2 DC91 76E0 A10A 8ABB FD1B F404 27FA C7D1 15F3


Re: CVS commit: src/sys [freeze on boot]

2020-01-20 Thread Chavdar Ivanov


На 2020-01-20 в 18:50, Andrew Doran написа:
> Fix committed with sys/kern/kern_rwlock.c rev 1.62.  I didn't see the
> problem as I am running with LOCKDEBUG.
>
> Apologies for the disruption.
All good now, thanks.
> Andrew


Re: CVS commit: src/sys [freeze on boot]

2020-01-20 Thread Masanobu SAITOH
Hi.

On 2020/01/21 2:06, Patrick Welche wrote:
> On Mon, Jan 20, 2020 at 04:12:45PM +, Patrick Welche wrote:
>> On Mon, Jan 20, 2020 at 12:51:00PM +, Andrew Doran wrote:
>>> This also happened the last time I touched rw_downgrade(), and I backed out
>>> the change then, but both times I don't see the bug.  I have some questions:
>>>
>>> - Are you running DIAGNOSTIC and/or LOCKDEBUG?  I would be very interested
>>>   to see what happens with a LOCKDEBUG kernel here.
>>
>> One worked with the addition of LOCKDEBUG. The other didn't, but it seems
>> to be unrelated:
>>
>> db{0}> show panic
>> Panic string: mutex_vector_enter,510: uninitialized lock 
>> (lock=0xbd012366609
>> 0, from=8033dc9d)
>> bt
>> breakpoint() at netbsd:breakpoint+0x5
>> vpanic() at netbsd:vpanic+0x178
>> snprintf() at netbsd:snprintf
>> lockdebug_wantlock() at netbsd:lockdebug_wantlock+0x166
>> mutex_enter() at netbsd:mutex_enter+0x37c
>> ixgbe_getext() at netbsd:ixgbe_getext+0x1d
> 
> ixgbe_getext does mutex_enter(>eh_mtx) but...


I suspect the location of your panic is after the following message
(because of ixgbe_allocate_msix()'s failure):

> aprint_normal(" ETrackID %08x\n", ((uint32_t)high << 16) | low);

If so, could you try the following diff?

--
 Fix the freeing code for some error paths. Found by Patrick Welche.

Index: ix_txrx.c
===
RCS file: /cvsroot/src/sys/dev/pci/ixgbe/ix_txrx.c,v
retrieving revision 1.59
diff -u -p -r1.59 ix_txrx.c
--- ix_txrx.c   20 Jan 2020 07:19:04 -  1.59
+++ ix_txrx.c   21 Jan 2020 06:24:16 -
@@ -2353,3 +2353,24 @@ err_tx_desc:
free(adapter->queues, M_DEVBUF);
return (error);
 } /* ixgbe_allocate_queues */
+
+/
+ * ixgbe_free_queues
+ *
+ *   Free descriptors for the transmit and receive rings, and then
+ *   the memory associated with each.
+ /
+void
+ixgbe_free_queues(struct adapter *adapter)
+{
+   struct ix_queue *que;
+   int i;
+
+   ixgbe_free_transmit_structures(adapter);
+   ixgbe_free_receive_structures(adapter);
+   for (i = 0; i < adapter->num_queues; i++) {
+   que = >queues[i];
+   mutex_destroy(>dc_mtx);
+   }
+   free(adapter->queues, M_DEVBUF);
+} /* ixgbe_free_queues */
Index: ixgbe.c
===
RCS file: /cvsroot/src/sys/dev/pci/ixgbe/ixgbe.c,v
retrieving revision 1.220
diff -u -p -r1.220 ixgbe.c
--- ixgbe.c 3 Jan 2020 12:59:46 -   1.220
+++ ixgbe.c 21 Jan 2020 06:24:16 -
@@ -1059,9 +1059,7 @@ ixgbe_attach(device_t parent, device_t d
error = ixgbe_allocate_msix(adapter, pa);
if (error) {
/* Free allocated queue structures first */
-   ixgbe_free_transmit_structures(adapter);
-   ixgbe_free_receive_structures(adapter);
-   free(adapter->queues, M_DEVBUF);
+   ixgbe_free_queues(adapter);
 
/* Fallback to legacy interrupt */
adapter->feat_en &= ~IXGBE_FEATURE_MSIX;
@@ -1236,9 +1234,7 @@ ixgbe_attach(device_t parent, device_t d
return;
 
 err_late:
-   ixgbe_free_transmit_structures(adapter);
-   ixgbe_free_receive_structures(adapter);
-   free(adapter->queues, M_DEVBUF);
+   ixgbe_free_queues(adapter);
 err_out:
ctrl_ext = IXGBE_READ_REG(>hw, IXGBE_CTRL_EXT);
ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
@@ -3712,13 +3708,7 @@ ixgbe_detach(device_t dev, int flags)
evcnt_detach(>ptc1023);
evcnt_detach(>ptc1522);
 
-   ixgbe_free_transmit_structures(adapter);
-   ixgbe_free_receive_structures(adapter);
-   for (i = 0; i < adapter->num_queues; i++) {
-   struct ix_queue * que = >queues[i];
-   mutex_destroy(>dc_mtx);
-   }
-   free(adapter->queues, M_DEVBUF);
+   ixgbe_free_queues(adapter);
free(adapter->mta, M_DEVBUF);
 
IXGBE_CORE_LOCK_DESTROY(adapter);
Index: ixgbe.h
===
RCS file: /cvsroot/src/sys/dev/pci/ixgbe/ixgbe.h,v
retrieving revision 1.61
diff -u -p -r1.61 ixgbe.h
--- ixgbe.h 20 Jan 2020 07:19:04 -  1.61
+++ ixgbe.h 21 Jan 2020 06:24:16 -
@@ -768,6 +768,7 @@ void ixgbe_deferred_mq_start_work(struct
 void ixgbe_drain_all(struct adapter *);
 
 int  ixgbe_allocate_queues(struct adapter *);
+void ixgbe_free_queues(struct adapter *);
 int  ixgbe_setup_transmit_structures(struct adapter *);
 void ixgbe_free_transmit_structures(struct adapter *);
 int  ixgbe_setup_receive_structures(struct adapter *);
Index: ixgbe_netbsd.c
===
RCS file: