Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-14 Thread Mike Galbraith
On Fri, 2017-07-14 at 14:42 -0500, Josh Poimboeuf wrote:
> 
> Does this fix it?

Yup, both READONLY __bug_table and "extra stern" warning are gone.

> diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
> index 39e702d..aa6b202 100644
> --- a/arch/x86/include/asm/bug.h
> +++ b/arch/x86/include/asm/bug.h
> @@ -35,7 +35,7 @@
>  #define _BUG_FLAGS(ins, flags)   
> \
>  do { \
>   asm volatile("1:\t" ins "\n"\
> -  ".pushsection __bug_table,\"a\"\n" \
> +  ".pushsection __bug_table,\"aw\"\n"\
>"2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n"   \
>"\t"  __BUG_REL(%c0) "\t# bug_entry::file\n"   \
>"\t.word %c1""\t# bug_entry::line\n"   \
> @@ -52,7 +52,7 @@ do {
> \
>  #define _BUG_FLAGS(ins, flags)   
> \
>  do { \
>   asm volatile("1:\t" ins "\n"\
> -  ".pushsection __bug_table,\"a\"\n" \
> +  ".pushsection __bug_table,\"aw\"\n"\
>"2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n"   \
>"\t.word %c0""\t# bug_entry::flags\n"  \
>"\t.org 2b+%c1\n"  \


Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-14 Thread Mike Galbraith
On Fri, 2017-07-14 at 14:42 -0500, Josh Poimboeuf wrote:
> 
> Does this fix it?

Yup, both READONLY __bug_table and "extra stern" warning are gone.

> diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
> index 39e702d..aa6b202 100644
> --- a/arch/x86/include/asm/bug.h
> +++ b/arch/x86/include/asm/bug.h
> @@ -35,7 +35,7 @@
>  #define _BUG_FLAGS(ins, flags)   
> \
>  do { \
>   asm volatile("1:\t" ins "\n"\
> -  ".pushsection __bug_table,\"a\"\n" \
> +  ".pushsection __bug_table,\"aw\"\n"\
>"2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n"   \
>"\t"  __BUG_REL(%c0) "\t# bug_entry::file\n"   \
>"\t.word %c1""\t# bug_entry::line\n"   \
> @@ -52,7 +52,7 @@ do {
> \
>  #define _BUG_FLAGS(ins, flags)   
> \
>  do { \
>   asm volatile("1:\t" ins "\n"\
> -  ".pushsection __bug_table,\"a\"\n" \
> +  ".pushsection __bug_table,\"aw\"\n"\
>"2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n"   \
>"\t.word %c0""\t# bug_entry::flags\n"  \
>"\t.org 2b+%c1\n"  \


Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-14 Thread Josh Poimboeuf
On Fri, Jul 14, 2017 at 06:33:01PM +0200, Mike Galbraith wrote:
> On Fri, 2017-07-14 at 18:10 +0200, Peter Zijlstra wrote:
> > On Fri, Jul 14, 2017 at 05:58:18PM +0200, Mike Galbraith wrote:
> > > On Fri, 2017-07-14 at 17:50 +0200, Peter Zijlstra wrote:
> > 
> > > > Urgh, is for some mysterious reason the __bug_table section of modules
> > > > ending up in RO memory?
> > > > 
> > > > I forever get lost in that link magic :/
> > > 
> > > +1
> > > 
> > > drm.ko
> > >  20 __bug_table   0630      0004bff3  
> > > 2**0
> > >   CONTENTS, ALLOC, LOAD, RELOC, READONLY, DATA
> > > vmlinux
> > >  15 __bug_table   ba84  81af26c0  01af26c0  00cf26c0  
> > > 2**0
> > >   CONTENTS, ALLOC, LOAD, READONLY, DATA
> > > 
> > > Danged if I know... um um RELOC business mucks things up?
> > 
> > Argh, it shouldn't be READONLY for vmlinux either, but apparently that
> > is working for mysterious reasons.
> > 
> > Some architectures were in fact complaining that I broke that, and hence
> > patch:
> > 
> > b5effd3815cc ("debug: Fix __bug_table[] in arch linker scripts")
> > 
> > I think we need professional help with this linking stuff, but who to
> > ask?
> 
> Andy Lutomirski?

Does this fix it?


diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index 39e702d..aa6b202 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -35,7 +35,7 @@
 #define _BUG_FLAGS(ins, flags) \
 do {   \
asm volatile("1:\t" ins "\n"\
-".pushsection __bug_table,\"a\"\n" \
+".pushsection __bug_table,\"aw\"\n"\
 "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n"   \
 "\t"  __BUG_REL(%c0) "\t# bug_entry::file\n"   \
 "\t.word %c1""\t# bug_entry::line\n"   \
@@ -52,7 +52,7 @@ do {  
\
 #define _BUG_FLAGS(ins, flags) \
 do {   \
asm volatile("1:\t" ins "\n"\
-".pushsection __bug_table,\"a\"\n" \
+".pushsection __bug_table,\"aw\"\n"\
 "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n"   \
 "\t.word %c0""\t# bug_entry::flags\n"  \
 "\t.org 2b+%c1\n"  \


Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-14 Thread Josh Poimboeuf
On Fri, Jul 14, 2017 at 06:33:01PM +0200, Mike Galbraith wrote:
> On Fri, 2017-07-14 at 18:10 +0200, Peter Zijlstra wrote:
> > On Fri, Jul 14, 2017 at 05:58:18PM +0200, Mike Galbraith wrote:
> > > On Fri, 2017-07-14 at 17:50 +0200, Peter Zijlstra wrote:
> > 
> > > > Urgh, is for some mysterious reason the __bug_table section of modules
> > > > ending up in RO memory?
> > > > 
> > > > I forever get lost in that link magic :/
> > > 
> > > +1
> > > 
> > > drm.ko
> > >  20 __bug_table   0630      0004bff3  
> > > 2**0
> > >   CONTENTS, ALLOC, LOAD, RELOC, READONLY, DATA
> > > vmlinux
> > >  15 __bug_table   ba84  81af26c0  01af26c0  00cf26c0  
> > > 2**0
> > >   CONTENTS, ALLOC, LOAD, READONLY, DATA
> > > 
> > > Danged if I know... um um RELOC business mucks things up?
> > 
> > Argh, it shouldn't be READONLY for vmlinux either, but apparently that
> > is working for mysterious reasons.
> > 
> > Some architectures were in fact complaining that I broke that, and hence
> > patch:
> > 
> > b5effd3815cc ("debug: Fix __bug_table[] in arch linker scripts")
> > 
> > I think we need professional help with this linking stuff, but who to
> > ask?
> 
> Andy Lutomirski?

Does this fix it?


diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index 39e702d..aa6b202 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -35,7 +35,7 @@
 #define _BUG_FLAGS(ins, flags) \
 do {   \
asm volatile("1:\t" ins "\n"\
-".pushsection __bug_table,\"a\"\n" \
+".pushsection __bug_table,\"aw\"\n"\
 "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n"   \
 "\t"  __BUG_REL(%c0) "\t# bug_entry::file\n"   \
 "\t.word %c1""\t# bug_entry::line\n"   \
@@ -52,7 +52,7 @@ do {  
\
 #define _BUG_FLAGS(ins, flags) \
 do {   \
asm volatile("1:\t" ins "\n"\
-".pushsection __bug_table,\"a\"\n" \
+".pushsection __bug_table,\"aw\"\n"\
 "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n"   \
 "\t.word %c0""\t# bug_entry::flags\n"  \
 "\t.org 2b+%c1\n"  \


Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-14 Thread Mike Galbraith
On Fri, 2017-07-14 at 18:10 +0200, Peter Zijlstra wrote:
> On Fri, Jul 14, 2017 at 05:58:18PM +0200, Mike Galbraith wrote:
> > On Fri, 2017-07-14 at 17:50 +0200, Peter Zijlstra wrote:
> 
> > > Urgh, is for some mysterious reason the __bug_table section of modules
> > > ending up in RO memory?
> > > 
> > > I forever get lost in that link magic :/
> > 
> > +1
> > 
> > drm.ko
> >  20 __bug_table   0630      0004bff3  
> > 2**0
> >   CONTENTS, ALLOC, LOAD, RELOC, READONLY, DATA
> > vmlinux
> >  15 __bug_table   ba84  81af26c0  01af26c0  00cf26c0  
> > 2**0
> >   CONTENTS, ALLOC, LOAD, READONLY, DATA
> > 
> > Danged if I know... um um RELOC business mucks things up?
> 
> Argh, it shouldn't be READONLY for vmlinux either, but apparently that
> is working for mysterious reasons.
> 
> Some architectures were in fact complaining that I broke that, and hence
> patch:
> 
> b5effd3815cc ("debug: Fix __bug_table[] in arch linker scripts")
> 
> I think we need professional help with this linking stuff, but who to
> ask?

Andy Lutomirski?


Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-14 Thread Mike Galbraith
On Fri, 2017-07-14 at 18:10 +0200, Peter Zijlstra wrote:
> On Fri, Jul 14, 2017 at 05:58:18PM +0200, Mike Galbraith wrote:
> > On Fri, 2017-07-14 at 17:50 +0200, Peter Zijlstra wrote:
> 
> > > Urgh, is for some mysterious reason the __bug_table section of modules
> > > ending up in RO memory?
> > > 
> > > I forever get lost in that link magic :/
> > 
> > +1
> > 
> > drm.ko
> >  20 __bug_table   0630      0004bff3  
> > 2**0
> >   CONTENTS, ALLOC, LOAD, RELOC, READONLY, DATA
> > vmlinux
> >  15 __bug_table   ba84  81af26c0  01af26c0  00cf26c0  
> > 2**0
> >   CONTENTS, ALLOC, LOAD, READONLY, DATA
> > 
> > Danged if I know... um um RELOC business mucks things up?
> 
> Argh, it shouldn't be READONLY for vmlinux either, but apparently that
> is working for mysterious reasons.
> 
> Some architectures were in fact complaining that I broke that, and hence
> patch:
> 
> b5effd3815cc ("debug: Fix __bug_table[] in arch linker scripts")
> 
> I think we need professional help with this linking stuff, but who to
> ask?

Andy Lutomirski?


Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-14 Thread Peter Zijlstra
On Fri, Jul 14, 2017 at 05:58:18PM +0200, Mike Galbraith wrote:
> On Fri, 2017-07-14 at 17:50 +0200, Peter Zijlstra wrote:

> > Urgh, is for some mysterious reason the __bug_table section of modules
> > ending up in RO memory?
> > 
> > I forever get lost in that link magic :/
> 
> +1
> 
> drm.ko
>  20 __bug_table   0630      0004bff3  2**0
>   CONTENTS, ALLOC, LOAD, RELOC, READONLY, DATA
> vmlinux
>  15 __bug_table   ba84  81af26c0  01af26c0  00cf26c0  2**0
>   CONTENTS, ALLOC, LOAD, READONLY, DATA
> 
> Danged if I know... um um RELOC business mucks things up?

Argh, it shouldn't be READONLY for vmlinux either, but apparently that
is working for mysterious reasons.

Some architectures were in fact complaining that I broke that, and hence
patch:

b5effd3815cc ("debug: Fix __bug_table[] in arch linker scripts")

I think we need professional help with this linking stuff, but who to
ask?


Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-14 Thread Peter Zijlstra
On Fri, Jul 14, 2017 at 05:58:18PM +0200, Mike Galbraith wrote:
> On Fri, 2017-07-14 at 17:50 +0200, Peter Zijlstra wrote:

> > Urgh, is for some mysterious reason the __bug_table section of modules
> > ending up in RO memory?
> > 
> > I forever get lost in that link magic :/
> 
> +1
> 
> drm.ko
>  20 __bug_table   0630      0004bff3  2**0
>   CONTENTS, ALLOC, LOAD, RELOC, READONLY, DATA
> vmlinux
>  15 __bug_table   ba84  81af26c0  01af26c0  00cf26c0  2**0
>   CONTENTS, ALLOC, LOAD, READONLY, DATA
> 
> Danged if I know... um um RELOC business mucks things up?

Argh, it shouldn't be READONLY for vmlinux either, but apparently that
is working for mysterious reasons.

Some architectures were in fact complaining that I broke that, and hence
patch:

b5effd3815cc ("debug: Fix __bug_table[] in arch linker scripts")

I think we need professional help with this linking stuff, but who to
ask?


Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-14 Thread Mike Galbraith
On Fri, 2017-07-14 at 17:50 +0200, Peter Zijlstra wrote:
> On Fri, Jul 14, 2017 at 03:36:08PM +0200, Mike Galbraith wrote:
> > Ok, a network outage gave me time to go hunting.  Indeed it is a bad
> > interaction with the tree DRM merged into.  All DRM did was to slip a
> > WARN_ON_ONCE() that nouveau triggers into a kernel module where such
> > things no longer warn, they blow the box out of the water.  I made a
> > dinky testcase module (attached), and bisected to the real root
> > 
> > 19d436268dde95389c616bb3819da73f0a8b28a8 is the first bad commit
> > commit 19d436268dde95389c616bb3819da73f0a8b28a8
> > Author: Peter Zijlstra 
> > Date:   Sat Feb 25 08:56:53 2017 +0100
> > 
> > debug: Add _ONCE() logic to report_bug()
> 
> Urgh, is for some mysterious reason the __bug_table section of modules
> ending up in RO memory?
> 
> I forever get lost in that link magic :/

+1

drm.ko
 20 __bug_table   0630      0004bff3  2**0
  CONTENTS, ALLOC, LOAD, RELOC, READONLY, DATA
vmlinux
 15 __bug_table   ba84  81af26c0  01af26c0  00cf26c0  2**0
  CONTENTS, ALLOC, LOAD, READONLY, DATA

Danged if I know... um um RELOC business mucks things up?

-Mike


Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-14 Thread Mike Galbraith
On Fri, 2017-07-14 at 17:50 +0200, Peter Zijlstra wrote:
> On Fri, Jul 14, 2017 at 03:36:08PM +0200, Mike Galbraith wrote:
> > Ok, a network outage gave me time to go hunting.  Indeed it is a bad
> > interaction with the tree DRM merged into.  All DRM did was to slip a
> > WARN_ON_ONCE() that nouveau triggers into a kernel module where such
> > things no longer warn, they blow the box out of the water.  I made a
> > dinky testcase module (attached), and bisected to the real root
> > 
> > 19d436268dde95389c616bb3819da73f0a8b28a8 is the first bad commit
> > commit 19d436268dde95389c616bb3819da73f0a8b28a8
> > Author: Peter Zijlstra 
> > Date:   Sat Feb 25 08:56:53 2017 +0100
> > 
> > debug: Add _ONCE() logic to report_bug()
> 
> Urgh, is for some mysterious reason the __bug_table section of modules
> ending up in RO memory?
> 
> I forever get lost in that link magic :/

+1

drm.ko
 20 __bug_table   0630      0004bff3  2**0
  CONTENTS, ALLOC, LOAD, RELOC, READONLY, DATA
vmlinux
 15 __bug_table   ba84  81af26c0  01af26c0  00cf26c0  2**0
  CONTENTS, ALLOC, LOAD, READONLY, DATA

Danged if I know... um um RELOC business mucks things up?

-Mike


Re: [Nouveau] [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-14 Thread Peter Zijlstra
On Fri, Jul 14, 2017 at 11:20:01AM -0400, Ilia Mirkin wrote:
> On Fri, Jul 14, 2017 at 11:19 AM, Tobias Klausmann
>  wrote:
> > The conversion is a nice catch, but i'd like to have a bit more context, see
> > below!
> >
> > With a better description:
> >
> > Tobias Klausmann 
> 
> I don't think it was meant as a serious patch. WARN_ON_ONCE should
> work. The fix isn't to remove all instances of WARN_ON_ONCE. The fix
> is to fix WARN_ON_ONCE.

Quite so. Clearly I buggered it for modules; that really wasn't the
plan.


Re: [Nouveau] [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-14 Thread Peter Zijlstra
On Fri, Jul 14, 2017 at 11:20:01AM -0400, Ilia Mirkin wrote:
> On Fri, Jul 14, 2017 at 11:19 AM, Tobias Klausmann
>  wrote:
> > The conversion is a nice catch, but i'd like to have a bit more context, see
> > below!
> >
> > With a better description:
> >
> > Tobias Klausmann 
> 
> I don't think it was meant as a serious patch. WARN_ON_ONCE should
> work. The fix isn't to remove all instances of WARN_ON_ONCE. The fix
> is to fix WARN_ON_ONCE.

Quite so. Clearly I buggered it for modules; that really wasn't the
plan.


Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-14 Thread Peter Zijlstra
On Fri, Jul 14, 2017 at 03:36:08PM +0200, Mike Galbraith wrote:
> Ok, a network outage gave me time to go hunting.  Indeed it is a bad
> interaction with the tree DRM merged into.  All DRM did was to slip a
> WARN_ON_ONCE() that nouveau triggers into a kernel module where such
> things no longer warn, they blow the box out of the water.  I made a
> dinky testcase module (attached), and bisected to the real root
> 
> 19d436268dde95389c616bb3819da73f0a8b28a8 is the first bad commit
> commit 19d436268dde95389c616bb3819da73f0a8b28a8
> Author: Peter Zijlstra 
> Date:   Sat Feb 25 08:56:53 2017 +0100
> 
> debug: Add _ONCE() logic to report_bug()

Urgh, is for some mysterious reason the __bug_table section of modules
ending up in RO memory?

I forever get lost in that link magic :/


Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-14 Thread Peter Zijlstra
On Fri, Jul 14, 2017 at 03:36:08PM +0200, Mike Galbraith wrote:
> Ok, a network outage gave me time to go hunting.  Indeed it is a bad
> interaction with the tree DRM merged into.  All DRM did was to slip a
> WARN_ON_ONCE() that nouveau triggers into a kernel module where such
> things no longer warn, they blow the box out of the water.  I made a
> dinky testcase module (attached), and bisected to the real root
> 
> 19d436268dde95389c616bb3819da73f0a8b28a8 is the first bad commit
> commit 19d436268dde95389c616bb3819da73f0a8b28a8
> Author: Peter Zijlstra 
> Date:   Sat Feb 25 08:56:53 2017 +0100
> 
> debug: Add _ONCE() logic to report_bug()

Urgh, is for some mysterious reason the __bug_table section of modules
ending up in RO memory?

I forever get lost in that link magic :/


Re: [Nouveau] [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-14 Thread Ilia Mirkin
On Fri, Jul 14, 2017 at 11:19 AM, Tobias Klausmann
 wrote:
> The conversion is a nice catch, but i'd like to have a bit more context, see
> below!
>
> With a better description:
>
> Tobias Klausmann 

I don't think it was meant as a serious patch. WARN_ON_ONCE should
work. The fix isn't to remove all instances of WARN_ON_ONCE. The fix
is to fix WARN_ON_ONCE.


Re: [Nouveau] [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-14 Thread Ilia Mirkin
On Fri, Jul 14, 2017 at 11:19 AM, Tobias Klausmann
 wrote:
> The conversion is a nice catch, but i'd like to have a bit more context, see
> below!
>
> With a better description:
>
> Tobias Klausmann 

I don't think it was meant as a serious patch. WARN_ON_ONCE should
work. The fix isn't to remove all instances of WARN_ON_ONCE. The fix
is to fix WARN_ON_ONCE.


Re: [Nouveau] [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-14 Thread Tobias Klausmann
The conversion is a nice catch, but i'd like to have a bit more context, 
see below!


With a better description:

Tobias Klausmann 


On 7/14/17 5:10 PM, Karol Herbst wrote:

Yeah, we shouldn't let the machine die. Are there more WARN_ON_ONCE
usage we could convert to WARN_ONCE?

Reviewed-By: Karol Herbst 

On Fri, Jul 14, 2017 at 5:05 PM, Tobias Klausmann
 wrote:

On 7/14/17 3:41 PM, Mike Galbraith wrote:

On Fri, 2017-07-14 at 15:36 +0200, Mike Galbraith wrote:

   All DRM did was to slip a
WARN_ON_ONCE() that nouveau triggers into a kernel module where such
things no longer warn, they blow the box out of the water.

BTW, turn that irksome WARN_ON_ONCE() in drivers/gpu/drm/drm_vblank.c
into a WARN_ONCE(), and all is peachy, you get the warning, box lives.

---
   drivers/gpu/drm/drm_vblank.c |3 ++-
   1 file changed, 2 insertions(+), 1 deletion(-)

--- a/drivers/gpu/drm/drm_vblank.c
+++ b/drivers/gpu/drm/drm_vblank.c
@@ -605,7 +605,8 @@ bool drm_calc_vbltimestamp_from_scanoutp
  */
 if (mode->crtc_clock == 0) {
 DRM_DEBUG("crtc %u: Noop due to uninitialized mode.\n",
pipe);
-   WARN_ON_ONCE(drm_drv_uses_atomic_modeset(dev));
+   WARN_ONCE(drm_drv_uses_atomic_modeset(dev), "%s: report
me.\n",


"report me" seems a bit odd, maybe just uninitialized mode?



+ dev->driver->name);
 return false;
 }



Hey,

confirmed this helps saving the box, but we still have to find the root
cause! Backtrace with the above fix applied (and the one which came in with
the latest drm-fixes merge)!


[1] https://hastebin.com/uyoqifijed.http

Thanks,

Tobias
Reviewed-By: Karol Herbst 
___
Nouveau mailing list
nouv...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-14 Thread Tobias Klausmann
The conversion is a nice catch, but i'd like to have a bit more context, 
see below!


With a better description:

Tobias Klausmann 


On 7/14/17 5:10 PM, Karol Herbst wrote:

Yeah, we shouldn't let the machine die. Are there more WARN_ON_ONCE
usage we could convert to WARN_ONCE?

Reviewed-By: Karol Herbst 

On Fri, Jul 14, 2017 at 5:05 PM, Tobias Klausmann
 wrote:

On 7/14/17 3:41 PM, Mike Galbraith wrote:

On Fri, 2017-07-14 at 15:36 +0200, Mike Galbraith wrote:

   All DRM did was to slip a
WARN_ON_ONCE() that nouveau triggers into a kernel module where such
things no longer warn, they blow the box out of the water.

BTW, turn that irksome WARN_ON_ONCE() in drivers/gpu/drm/drm_vblank.c
into a WARN_ONCE(), and all is peachy, you get the warning, box lives.

---
   drivers/gpu/drm/drm_vblank.c |3 ++-
   1 file changed, 2 insertions(+), 1 deletion(-)

--- a/drivers/gpu/drm/drm_vblank.c
+++ b/drivers/gpu/drm/drm_vblank.c
@@ -605,7 +605,8 @@ bool drm_calc_vbltimestamp_from_scanoutp
  */
 if (mode->crtc_clock == 0) {
 DRM_DEBUG("crtc %u: Noop due to uninitialized mode.\n",
pipe);
-   WARN_ON_ONCE(drm_drv_uses_atomic_modeset(dev));
+   WARN_ONCE(drm_drv_uses_atomic_modeset(dev), "%s: report
me.\n",


"report me" seems a bit odd, maybe just uninitialized mode?



+ dev->driver->name);
 return false;
 }



Hey,

confirmed this helps saving the box, but we still have to find the root
cause! Backtrace with the above fix applied (and the one which came in with
the latest drm-fixes merge)!


[1] https://hastebin.com/uyoqifijed.http

Thanks,

Tobias
Reviewed-By: Karol Herbst 
___
Nouveau mailing list
nouv...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-14 Thread Ilia Mirkin
On Fri, Jul 14, 2017 at 11:15 AM, Mike Galbraith  wrote:
> On Fri, 2017-07-14 at 17:10 +0200, Karol Herbst wrote:
>> Yeah, we shouldn't let the machine die. Are there more WARN_ON_ONCE
>> usage we could convert to WARN_ONCE?
>
> Shooting the messenger is generally considered uncool :)

That's never stopped it from being a popular practice...


Re: [Nouveau] [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-14 Thread Ilia Mirkin
On Fri, Jul 14, 2017 at 11:15 AM, Mike Galbraith  wrote:
> On Fri, 2017-07-14 at 17:10 +0200, Karol Herbst wrote:
>> Yeah, we shouldn't let the machine die. Are there more WARN_ON_ONCE
>> usage we could convert to WARN_ONCE?
>
> Shooting the messenger is generally considered uncool :)

That's never stopped it from being a popular practice...


Re: [Nouveau] [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-14 Thread Mike Galbraith
On Fri, 2017-07-14 at 17:10 +0200, Karol Herbst wrote:
> Yeah, we shouldn't let the machine die. Are there more WARN_ON_ONCE
> usage we could convert to WARN_ONCE?

Shooting the messenger is generally considered uncool :)

-Mike


Re: [Nouveau] [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-14 Thread Mike Galbraith
On Fri, 2017-07-14 at 17:10 +0200, Karol Herbst wrote:
> Yeah, we shouldn't let the machine die. Are there more WARN_ON_ONCE
> usage we could convert to WARN_ONCE?

Shooting the messenger is generally considered uncool :)

-Mike


Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-14 Thread Mike Galbraith
On Fri, 2017-07-14 at 17:05 +0200, Tobias Klausmann wrote:
> On 7/14/17 3:41 PM, Mike Galbraith wrote:
> > On Fri, 2017-07-14 at 15:36 +0200, Mike Galbraith wrote:
> >>   All DRM did was to slip a
> >> WARN_ON_ONCE() that nouveau triggers into a kernel module where such
> >> things no longer warn, they blow the box out of the water.
> > BTW, turn that irksome WARN_ON_ONCE() in drivers/gpu/drm/drm_vblank.c
> > into a WARN_ONCE(), and all is peachy, you get the warning, box lives.
> >
> > ---
> >   drivers/gpu/drm/drm_vblank.c |3 ++-
> >   1 file changed, 2 insertions(+), 1 deletion(-)
> >
> > --- a/drivers/gpu/drm/drm_vblank.c
> > +++ b/drivers/gpu/drm/drm_vblank.c
> > @@ -605,7 +605,8 @@ bool drm_calc_vbltimestamp_from_scanoutp
> >  */
> > if (mode->crtc_clock == 0) {
> > DRM_DEBUG("crtc %u: Noop due to uninitialized mode.\n", pipe);
> > -   WARN_ON_ONCE(drm_drv_uses_atomic_modeset(dev));
> > +   WARN_ONCE(drm_drv_uses_atomic_modeset(dev), "%s: report me.\n",
> > + dev->driver->name);
> >   
> > return false;
> > }
> 
> 
> Hey,
> 
> confirmed this helps saving the box, but we still have to find the root 
> cause! Backtrace with the above fix applied (and the one which came in 
> with the latest drm-fixes merge)!

Yeah, I'll be reporting some extra whining from my 8600 GT backup box.

-Mike


Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-14 Thread Mike Galbraith
On Fri, 2017-07-14 at 17:05 +0200, Tobias Klausmann wrote:
> On 7/14/17 3:41 PM, Mike Galbraith wrote:
> > On Fri, 2017-07-14 at 15:36 +0200, Mike Galbraith wrote:
> >>   All DRM did was to slip a
> >> WARN_ON_ONCE() that nouveau triggers into a kernel module where such
> >> things no longer warn, they blow the box out of the water.
> > BTW, turn that irksome WARN_ON_ONCE() in drivers/gpu/drm/drm_vblank.c
> > into a WARN_ONCE(), and all is peachy, you get the warning, box lives.
> >
> > ---
> >   drivers/gpu/drm/drm_vblank.c |3 ++-
> >   1 file changed, 2 insertions(+), 1 deletion(-)
> >
> > --- a/drivers/gpu/drm/drm_vblank.c
> > +++ b/drivers/gpu/drm/drm_vblank.c
> > @@ -605,7 +605,8 @@ bool drm_calc_vbltimestamp_from_scanoutp
> >  */
> > if (mode->crtc_clock == 0) {
> > DRM_DEBUG("crtc %u: Noop due to uninitialized mode.\n", pipe);
> > -   WARN_ON_ONCE(drm_drv_uses_atomic_modeset(dev));
> > +   WARN_ONCE(drm_drv_uses_atomic_modeset(dev), "%s: report me.\n",
> > + dev->driver->name);
> >   
> > return false;
> > }
> 
> 
> Hey,
> 
> confirmed this helps saving the box, but we still have to find the root 
> cause! Backtrace with the above fix applied (and the one which came in 
> with the latest drm-fixes merge)!

Yeah, I'll be reporting some extra whining from my 8600 GT backup box.

-Mike


Re: [Nouveau] [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-14 Thread Karol Herbst
Yeah, we shouldn't let the machine die. Are there more WARN_ON_ONCE
usage we could convert to WARN_ONCE?

Reviewed-By: Karol Herbst 

On Fri, Jul 14, 2017 at 5:05 PM, Tobias Klausmann
 wrote:
> On 7/14/17 3:41 PM, Mike Galbraith wrote:
>>
>> On Fri, 2017-07-14 at 15:36 +0200, Mike Galbraith wrote:
>>>
>>>   All DRM did was to slip a
>>> WARN_ON_ONCE() that nouveau triggers into a kernel module where such
>>> things no longer warn, they blow the box out of the water.
>>
>> BTW, turn that irksome WARN_ON_ONCE() in drivers/gpu/drm/drm_vblank.c
>> into a WARN_ONCE(), and all is peachy, you get the warning, box lives.
>>
>> ---
>>   drivers/gpu/drm/drm_vblank.c |3 ++-
>>   1 file changed, 2 insertions(+), 1 deletion(-)
>>
>> --- a/drivers/gpu/drm/drm_vblank.c
>> +++ b/drivers/gpu/drm/drm_vblank.c
>> @@ -605,7 +605,8 @@ bool drm_calc_vbltimestamp_from_scanoutp
>>  */
>> if (mode->crtc_clock == 0) {
>> DRM_DEBUG("crtc %u: Noop due to uninitialized mode.\n",
>> pipe);
>> -   WARN_ON_ONCE(drm_drv_uses_atomic_modeset(dev));
>> +   WARN_ONCE(drm_drv_uses_atomic_modeset(dev), "%s: report
>> me.\n",
>> + dev->driver->name);
>> return false;
>> }
>
>
>
> Hey,
>
> confirmed this helps saving the box, but we still have to find the root
> cause! Backtrace with the above fix applied (and the one which came in with
> the latest drm-fixes merge)!
>
>
> [1] https://hastebin.com/uyoqifijed.http
>
> Thanks,
>
> Tobias
>Reviewed-By: Karol Herbst 
> ___
> Nouveau mailing list
> nouv...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-14 Thread Karol Herbst
Yeah, we shouldn't let the machine die. Are there more WARN_ON_ONCE
usage we could convert to WARN_ONCE?

Reviewed-By: Karol Herbst 

On Fri, Jul 14, 2017 at 5:05 PM, Tobias Klausmann
 wrote:
> On 7/14/17 3:41 PM, Mike Galbraith wrote:
>>
>> On Fri, 2017-07-14 at 15:36 +0200, Mike Galbraith wrote:
>>>
>>>   All DRM did was to slip a
>>> WARN_ON_ONCE() that nouveau triggers into a kernel module where such
>>> things no longer warn, they blow the box out of the water.
>>
>> BTW, turn that irksome WARN_ON_ONCE() in drivers/gpu/drm/drm_vblank.c
>> into a WARN_ONCE(), and all is peachy, you get the warning, box lives.
>>
>> ---
>>   drivers/gpu/drm/drm_vblank.c |3 ++-
>>   1 file changed, 2 insertions(+), 1 deletion(-)
>>
>> --- a/drivers/gpu/drm/drm_vblank.c
>> +++ b/drivers/gpu/drm/drm_vblank.c
>> @@ -605,7 +605,8 @@ bool drm_calc_vbltimestamp_from_scanoutp
>>  */
>> if (mode->crtc_clock == 0) {
>> DRM_DEBUG("crtc %u: Noop due to uninitialized mode.\n",
>> pipe);
>> -   WARN_ON_ONCE(drm_drv_uses_atomic_modeset(dev));
>> +   WARN_ONCE(drm_drv_uses_atomic_modeset(dev), "%s: report
>> me.\n",
>> + dev->driver->name);
>> return false;
>> }
>
>
>
> Hey,
>
> confirmed this helps saving the box, but we still have to find the root
> cause! Backtrace with the above fix applied (and the one which came in with
> the latest drm-fixes merge)!
>
>
> [1] https://hastebin.com/uyoqifijed.http
>
> Thanks,
>
> Tobias
>Reviewed-By: Karol Herbst 
> ___
> Nouveau mailing list
> nouv...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-14 Thread Tobias Klausmann

On 7/14/17 3:41 PM, Mike Galbraith wrote:

On Fri, 2017-07-14 at 15:36 +0200, Mike Galbraith wrote:

  All DRM did was to slip a
WARN_ON_ONCE() that nouveau triggers into a kernel module where such
things no longer warn, they blow the box out of the water.

BTW, turn that irksome WARN_ON_ONCE() in drivers/gpu/drm/drm_vblank.c
into a WARN_ONCE(), and all is peachy, you get the warning, box lives.

---
  drivers/gpu/drm/drm_vblank.c |3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

--- a/drivers/gpu/drm/drm_vblank.c
+++ b/drivers/gpu/drm/drm_vblank.c
@@ -605,7 +605,8 @@ bool drm_calc_vbltimestamp_from_scanoutp
 */
if (mode->crtc_clock == 0) {
DRM_DEBUG("crtc %u: Noop due to uninitialized mode.\n", pipe);
-   WARN_ON_ONCE(drm_drv_uses_atomic_modeset(dev));
+   WARN_ONCE(drm_drv_uses_atomic_modeset(dev), "%s: report me.\n",
+ dev->driver->name);
  
  		return false;

}



Hey,

confirmed this helps saving the box, but we still have to find the root 
cause! Backtrace with the above fix applied (and the one which came in 
with the latest drm-fixes merge)!



[1] https://hastebin.com/uyoqifijed.http

Thanks,

Tobias



Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-14 Thread Tobias Klausmann

On 7/14/17 3:41 PM, Mike Galbraith wrote:

On Fri, 2017-07-14 at 15:36 +0200, Mike Galbraith wrote:

  All DRM did was to slip a
WARN_ON_ONCE() that nouveau triggers into a kernel module where such
things no longer warn, they blow the box out of the water.

BTW, turn that irksome WARN_ON_ONCE() in drivers/gpu/drm/drm_vblank.c
into a WARN_ONCE(), and all is peachy, you get the warning, box lives.

---
  drivers/gpu/drm/drm_vblank.c |3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

--- a/drivers/gpu/drm/drm_vblank.c
+++ b/drivers/gpu/drm/drm_vblank.c
@@ -605,7 +605,8 @@ bool drm_calc_vbltimestamp_from_scanoutp
 */
if (mode->crtc_clock == 0) {
DRM_DEBUG("crtc %u: Noop due to uninitialized mode.\n", pipe);
-   WARN_ON_ONCE(drm_drv_uses_atomic_modeset(dev));
+   WARN_ONCE(drm_drv_uses_atomic_modeset(dev), "%s: report me.\n",
+ dev->driver->name);
  
  		return false;

}



Hey,

confirmed this helps saving the box, but we still have to find the root 
cause! Backtrace with the above fix applied (and the one which came in 
with the latest drm-fixes merge)!



[1] https://hastebin.com/uyoqifijed.http

Thanks,

Tobias



Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-14 Thread Mike Galbraith
On Fri, 2017-07-14 at 15:36 +0200, Mike Galbraith wrote:
>  All DRM did was to slip a
> WARN_ON_ONCE() that nouveau triggers into a kernel module where such
> things no longer warn, they blow the box out of the water.

BTW, turn that irksome WARN_ON_ONCE() in drivers/gpu/drm/drm_vblank.c
into a WARN_ONCE(), and all is peachy, you get the warning, box lives.

---
 drivers/gpu/drm/drm_vblank.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

--- a/drivers/gpu/drm/drm_vblank.c
+++ b/drivers/gpu/drm/drm_vblank.c
@@ -605,7 +605,8 @@ bool drm_calc_vbltimestamp_from_scanoutp
 */
if (mode->crtc_clock == 0) {
DRM_DEBUG("crtc %u: Noop due to uninitialized mode.\n", pipe);
-   WARN_ON_ONCE(drm_drv_uses_atomic_modeset(dev));
+   WARN_ONCE(drm_drv_uses_atomic_modeset(dev), "%s: report me.\n",
+ dev->driver->name);
 
return false;
}


Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-14 Thread Mike Galbraith
On Fri, 2017-07-14 at 15:36 +0200, Mike Galbraith wrote:
>  All DRM did was to slip a
> WARN_ON_ONCE() that nouveau triggers into a kernel module where such
> things no longer warn, they blow the box out of the water.

BTW, turn that irksome WARN_ON_ONCE() in drivers/gpu/drm/drm_vblank.c
into a WARN_ONCE(), and all is peachy, you get the warning, box lives.

---
 drivers/gpu/drm/drm_vblank.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

--- a/drivers/gpu/drm/drm_vblank.c
+++ b/drivers/gpu/drm/drm_vblank.c
@@ -605,7 +605,8 @@ bool drm_calc_vbltimestamp_from_scanoutp
 */
if (mode->crtc_clock == 0) {
DRM_DEBUG("crtc %u: Noop due to uninitialized mode.\n", pipe);
-   WARN_ON_ONCE(drm_drv_uses_atomic_modeset(dev));
+   WARN_ONCE(drm_drv_uses_atomic_modeset(dev), "%s: report me.\n",
+ dev->driver->name);
 
return false;
}


Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-14 Thread Mike Galbraith
On Wed, 2017-07-12 at 07:37 -0400, Ilia Mirkin wrote:
> On Wed, Jul 12, 2017 at 7:25 AM, Mike Galbraith  wrote:
> > On Wed, 2017-07-12 at 11:55 +0200, Mike Galbraith wrote:
> >> On Tue, 2017-07-11 at 14:22 -0400, Ilia Mirkin wrote:
> >> >
> >> > Some display stuff did change for 4.13 for GM20x+ boards. If it's not
> >> > too much trouble, a bisect would be pretty useful.
> >>
> >> Bisection seemingly went fine, but the result is odd.
> >>
> >> e98c58e55f68f8785aebfab1f8c9a03d8de0afe1 is the first bad commit
> >
> > But it really really is bad.  Looking at gitk fork in the road leading
> > to it...
> >
> > 52d9d38c183b drm/sti:fix spelling mistake: "compoment" -> "component" - good
> > e4e818cc2d7c drm: make drm_panel.h self-contained - good
> > 9cf8f5802f39 drm: add missing declaration to drm_blend.h  - good
> >
> > Before the git highway splits, all is well.  The lane with commits
> > works fine at both ends, but e98c58e55f68 is busted.  Merge arfifact?
> 
> Hmmm... that tree does not appear to have gotten a v4.12 backmerge at
> any point. The last backmerge from Linus as far as I can tell was
> v4.11-rc7. Could be an interaction with some out-of-tree change.

Ok, a network outage gave me time to go hunting.  Indeed it is a bad
interaction with the tree DRM merged into.  All DRM did was to slip a
WARN_ON_ONCE() that nouveau triggers into a kernel module where such
things no longer warn, they blow the box out of the water.  I made a
dinky testcase module (attached), and bisected to the real root

19d436268dde95389c616bb3819da73f0a8b28a8 is the first bad commit
commit 19d436268dde95389c616bb3819da73f0a8b28a8
Author: Peter Zijlstra 
Date:   Sat Feb 25 08:56:53 2017 +0100

debug: Add _ONCE() logic to report_bug()

Josh suggested moving the _ONCE logic inside the trap handler, using a
bit in the bug_entry::flags field, avoiding the need for the extra
variable.

Sadly this only works for WARN_ON_ONCE(), since the others have
printk() statements prior to triggering the trap.

Still, this saves a fair amount of text and some data:

  text data   filename
  10682460 4530992defconfig-build/vmlinux.orig
  10665111 4530096defconfig-build/vmlinux.patched

Suggested-by: Josh Poimboeuf 
Signed-off-by: Peter Zijlstra (Intel) 
Cc: Andy Lutomirski 
Cc: Arnd Bergmann 
Cc: Borislav Petkov 
Cc: Brian Gerst 
Cc: Denys Vlasenko 
Cc: H. Peter Anvin 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Signed-off-by: Ingo Molnar 

:04 04 9f47f66ec4c234f6ee8e2a09e991c95fe47cf2c1 
3e92aa9e77b39ed075ae2c3bdf041d92ef898f62 M  arch
:04 04 34f70b73d40c82533dd7df9b289106be69e2fa8d 
dd5d7248694a36b3e170f2dca5d9c4121535a990 M  include
:04 04 f6e627b0d378f0a00d2987fdd0c7b215306e6e3c 
b360d4ee2579744cce530184d7dab13493f73ee0 M  lib ---
 kernel/Makefile |2 ++
 kernel/foo.c|   15 +++
 2 files changed, 17 insertions(+)

--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -111,6 +111,8 @@ obj-$(CONFIG_MEMBARRIER) += membarrier.o
 
 obj-$(CONFIG_HAS_IOMEM) += memremap.o
 
+obj-m += foo.o
+
 $(obj)/configs.o: $(obj)/config_data.h
 
 targets += config_data.gz
--- /dev/null
+++ b/kernel/foo.c
@@ -0,0 +1,15 @@
+#include 
+#include 
+
+static int __init foo_init(void)
+{
+	printk(KERN_INFO "foo: module loaded\n");
+	WARN_ON_ONCE(1);
+	return 0;
+}
+
+static void __exit foo_exit(void) { }
+
+module_init(foo_init);
+module_exit(foo_exit);
+MODULE_LICENSE("GPL");


Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-14 Thread Mike Galbraith
On Wed, 2017-07-12 at 07:37 -0400, Ilia Mirkin wrote:
> On Wed, Jul 12, 2017 at 7:25 AM, Mike Galbraith  wrote:
> > On Wed, 2017-07-12 at 11:55 +0200, Mike Galbraith wrote:
> >> On Tue, 2017-07-11 at 14:22 -0400, Ilia Mirkin wrote:
> >> >
> >> > Some display stuff did change for 4.13 for GM20x+ boards. If it's not
> >> > too much trouble, a bisect would be pretty useful.
> >>
> >> Bisection seemingly went fine, but the result is odd.
> >>
> >> e98c58e55f68f8785aebfab1f8c9a03d8de0afe1 is the first bad commit
> >
> > But it really really is bad.  Looking at gitk fork in the road leading
> > to it...
> >
> > 52d9d38c183b drm/sti:fix spelling mistake: "compoment" -> "component" - good
> > e4e818cc2d7c drm: make drm_panel.h self-contained - good
> > 9cf8f5802f39 drm: add missing declaration to drm_blend.h  - good
> >
> > Before the git highway splits, all is well.  The lane with commits
> > works fine at both ends, but e98c58e55f68 is busted.  Merge arfifact?
> 
> Hmmm... that tree does not appear to have gotten a v4.12 backmerge at
> any point. The last backmerge from Linus as far as I can tell was
> v4.11-rc7. Could be an interaction with some out-of-tree change.

Ok, a network outage gave me time to go hunting.  Indeed it is a bad
interaction with the tree DRM merged into.  All DRM did was to slip a
WARN_ON_ONCE() that nouveau triggers into a kernel module where such
things no longer warn, they blow the box out of the water.  I made a
dinky testcase module (attached), and bisected to the real root

19d436268dde95389c616bb3819da73f0a8b28a8 is the first bad commit
commit 19d436268dde95389c616bb3819da73f0a8b28a8
Author: Peter Zijlstra 
Date:   Sat Feb 25 08:56:53 2017 +0100

debug: Add _ONCE() logic to report_bug()

Josh suggested moving the _ONCE logic inside the trap handler, using a
bit in the bug_entry::flags field, avoiding the need for the extra
variable.

Sadly this only works for WARN_ON_ONCE(), since the others have
printk() statements prior to triggering the trap.

Still, this saves a fair amount of text and some data:

  text data   filename
  10682460 4530992defconfig-build/vmlinux.orig
  10665111 4530096defconfig-build/vmlinux.patched

Suggested-by: Josh Poimboeuf 
Signed-off-by: Peter Zijlstra (Intel) 
Cc: Andy Lutomirski 
Cc: Arnd Bergmann 
Cc: Borislav Petkov 
Cc: Brian Gerst 
Cc: Denys Vlasenko 
Cc: H. Peter Anvin 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Signed-off-by: Ingo Molnar 

:04 04 9f47f66ec4c234f6ee8e2a09e991c95fe47cf2c1 
3e92aa9e77b39ed075ae2c3bdf041d92ef898f62 M  arch
:04 04 34f70b73d40c82533dd7df9b289106be69e2fa8d 
dd5d7248694a36b3e170f2dca5d9c4121535a990 M  include
:04 04 f6e627b0d378f0a00d2987fdd0c7b215306e6e3c 
b360d4ee2579744cce530184d7dab13493f73ee0 M  lib ---
 kernel/Makefile |2 ++
 kernel/foo.c|   15 +++
 2 files changed, 17 insertions(+)

--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -111,6 +111,8 @@ obj-$(CONFIG_MEMBARRIER) += membarrier.o
 
 obj-$(CONFIG_HAS_IOMEM) += memremap.o
 
+obj-m += foo.o
+
 $(obj)/configs.o: $(obj)/config_data.h
 
 targets += config_data.gz
--- /dev/null
+++ b/kernel/foo.c
@@ -0,0 +1,15 @@
+#include 
+#include 
+
+static int __init foo_init(void)
+{
+	printk(KERN_INFO "foo: module loaded\n");
+	WARN_ON_ONCE(1);
+	return 0;
+}
+
+static void __exit foo_exit(void) { }
+
+module_init(foo_init);
+module_exit(foo_exit);
+MODULE_LICENSE("GPL");


Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-12 Thread Tobias Klausmann


On 7/12/17 7:19 PM, Mike Galbraith wrote:

On Wed, 2017-07-12 at 07:37 -0400, Ilia Mirkin wrote:

On Wed, Jul 12, 2017 at 7:25 AM, Mike Galbraith  wrote:

On Wed, 2017-07-12 at 11:55 +0200, Mike Galbraith wrote:

On Tue, 2017-07-11 at 14:22 -0400, Ilia Mirkin wrote:

Some display stuff did change for 4.13 for GM20x+ boards. If it's not
too much trouble, a bisect would be pretty useful.

Bisection seemingly went fine, but the result is odd.

e98c58e55f68f8785aebfab1f8c9a03d8de0afe1 is the first bad commit

But it really really is bad.  Looking at gitk fork in the road leading
to it...

52d9d38c183b drm/sti:fix spelling mistake: "compoment" -> "component" - good
e4e818cc2d7c drm: make drm_panel.h self-contained - good
9cf8f5802f39 drm: add missing declaration to drm_blend.h  - good

Before the git highway splits, all is well.  The lane with commits
works fine at both ends, but e98c58e55f68 is busted.  Merge arfifact?

Hmmm... that tree does not appear to have gotten a v4.12 backmerge at
any point. The last backmerge from Linus as far as I can tell was
v4.11-rc7. Could be an interaction with some out-of-tree change.

FWIW, checking out the fingered commit then..

git log --oneline 52d9d38c183b..e98c58e55f68|grep nouveau and reverting
the lot helped not at all.

Checking out 6b7781b42dc9 and reverting the fingered commit did.  Given
the nouveau bits reverted are mostly the vblank changes, CC to Daniel,
maybe he'll know why both GTX 980 and GeForce 8600 GT get all upset.

Either I'm damn lucky, both of my nvidia equipped boxen going boom 100%
repeatably, or there are a lot of folks out there who haven't yet tried
suspend with our latest/greatest kernel.  I suspect the later.

-Mike



I should have had a look at my inbox, would have save me a log of work 
bisecting. Yet i come to the same conclusion:


# first bad commit: [e98c58e55f68f8785aebfab1f8c9a03d8de0afe1] Merge tag 
'drm-misc-next-2017-05-16' of git://anongit.freedesktop.org/git/drm-misc 
into drm-next



I suspect it is some vblank change as it shows up in every trace i have 
seen while bisecting, but that is just a wild guess...


Greetings,

Tobias



Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-12 Thread Tobias Klausmann


On 7/12/17 7:19 PM, Mike Galbraith wrote:

On Wed, 2017-07-12 at 07:37 -0400, Ilia Mirkin wrote:

On Wed, Jul 12, 2017 at 7:25 AM, Mike Galbraith  wrote:

On Wed, 2017-07-12 at 11:55 +0200, Mike Galbraith wrote:

On Tue, 2017-07-11 at 14:22 -0400, Ilia Mirkin wrote:

Some display stuff did change for 4.13 for GM20x+ boards. If it's not
too much trouble, a bisect would be pretty useful.

Bisection seemingly went fine, but the result is odd.

e98c58e55f68f8785aebfab1f8c9a03d8de0afe1 is the first bad commit

But it really really is bad.  Looking at gitk fork in the road leading
to it...

52d9d38c183b drm/sti:fix spelling mistake: "compoment" -> "component" - good
e4e818cc2d7c drm: make drm_panel.h self-contained - good
9cf8f5802f39 drm: add missing declaration to drm_blend.h  - good

Before the git highway splits, all is well.  The lane with commits
works fine at both ends, but e98c58e55f68 is busted.  Merge arfifact?

Hmmm... that tree does not appear to have gotten a v4.12 backmerge at
any point. The last backmerge from Linus as far as I can tell was
v4.11-rc7. Could be an interaction with some out-of-tree change.

FWIW, checking out the fingered commit then..

git log --oneline 52d9d38c183b..e98c58e55f68|grep nouveau and reverting
the lot helped not at all.

Checking out 6b7781b42dc9 and reverting the fingered commit did.  Given
the nouveau bits reverted are mostly the vblank changes, CC to Daniel,
maybe he'll know why both GTX 980 and GeForce 8600 GT get all upset.

Either I'm damn lucky, both of my nvidia equipped boxen going boom 100%
repeatably, or there are a lot of folks out there who haven't yet tried
suspend with our latest/greatest kernel.  I suspect the later.

-Mike



I should have had a look at my inbox, would have save me a log of work 
bisecting. Yet i come to the same conclusion:


# first bad commit: [e98c58e55f68f8785aebfab1f8c9a03d8de0afe1] Merge tag 
'drm-misc-next-2017-05-16' of git://anongit.freedesktop.org/git/drm-misc 
into drm-next



I suspect it is some vblank change as it shows up in every trace i have 
seen while bisecting, but that is just a wild guess...


Greetings,

Tobias



Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-12 Thread Mike Galbraith
On Wed, 2017-07-12 at 07:37 -0400, Ilia Mirkin wrote:
> On Wed, Jul 12, 2017 at 7:25 AM, Mike Galbraith  wrote:
> > On Wed, 2017-07-12 at 11:55 +0200, Mike Galbraith wrote:
> >> On Tue, 2017-07-11 at 14:22 -0400, Ilia Mirkin wrote:
> >> >
> >> > Some display stuff did change for 4.13 for GM20x+ boards. If it's not
> >> > too much trouble, a bisect would be pretty useful.
> >>
> >> Bisection seemingly went fine, but the result is odd.
> >>
> >> e98c58e55f68f8785aebfab1f8c9a03d8de0afe1 is the first bad commit
> >
> > But it really really is bad.  Looking at gitk fork in the road leading
> > to it...
> >
> > 52d9d38c183b drm/sti:fix spelling mistake: "compoment" -> "component" - good
> > e4e818cc2d7c drm: make drm_panel.h self-contained - good
> > 9cf8f5802f39 drm: add missing declaration to drm_blend.h  - good
> >
> > Before the git highway splits, all is well.  The lane with commits
> > works fine at both ends, but e98c58e55f68 is busted.  Merge arfifact?
> 
> Hmmm... that tree does not appear to have gotten a v4.12 backmerge at
> any point. The last backmerge from Linus as far as I can tell was
> v4.11-rc7. Could be an interaction with some out-of-tree change.

FWIW, checking out the fingered commit then..

git log --oneline 52d9d38c183b..e98c58e55f68|grep nouveau and reverting
the lot helped not at all.

Checking out 6b7781b42dc9 and reverting the fingered commit did.  Given
the nouveau bits reverted are mostly the vblank changes, CC to Daniel,
maybe he'll know why both GTX 980 and GeForce 8600 GT get all upset.

Either I'm damn lucky, both of my nvidia equipped boxen going boom 100%
repeatably, or there are a lot of folks out there who haven't yet tried
suspend with our latest/greatest kernel.  I suspect the later.

-Mike



Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-12 Thread Mike Galbraith
On Wed, 2017-07-12 at 07:37 -0400, Ilia Mirkin wrote:
> On Wed, Jul 12, 2017 at 7:25 AM, Mike Galbraith  wrote:
> > On Wed, 2017-07-12 at 11:55 +0200, Mike Galbraith wrote:
> >> On Tue, 2017-07-11 at 14:22 -0400, Ilia Mirkin wrote:
> >> >
> >> > Some display stuff did change for 4.13 for GM20x+ boards. If it's not
> >> > too much trouble, a bisect would be pretty useful.
> >>
> >> Bisection seemingly went fine, but the result is odd.
> >>
> >> e98c58e55f68f8785aebfab1f8c9a03d8de0afe1 is the first bad commit
> >
> > But it really really is bad.  Looking at gitk fork in the road leading
> > to it...
> >
> > 52d9d38c183b drm/sti:fix spelling mistake: "compoment" -> "component" - good
> > e4e818cc2d7c drm: make drm_panel.h self-contained - good
> > 9cf8f5802f39 drm: add missing declaration to drm_blend.h  - good
> >
> > Before the git highway splits, all is well.  The lane with commits
> > works fine at both ends, but e98c58e55f68 is busted.  Merge arfifact?
> 
> Hmmm... that tree does not appear to have gotten a v4.12 backmerge at
> any point. The last backmerge from Linus as far as I can tell was
> v4.11-rc7. Could be an interaction with some out-of-tree change.

FWIW, checking out the fingered commit then..

git log --oneline 52d9d38c183b..e98c58e55f68|grep nouveau and reverting
the lot helped not at all.

Checking out 6b7781b42dc9 and reverting the fingered commit did.  Given
the nouveau bits reverted are mostly the vblank changes, CC to Daniel,
maybe he'll know why both GTX 980 and GeForce 8600 GT get all upset.

Either I'm damn lucky, both of my nvidia equipped boxen going boom 100%
repeatably, or there are a lot of folks out there who haven't yet tried
suspend with our latest/greatest kernel.  I suspect the later.

-Mike



Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-12 Thread Ilia Mirkin
On Wed, Jul 12, 2017 at 7:25 AM, Mike Galbraith  wrote:
> On Wed, 2017-07-12 at 11:55 +0200, Mike Galbraith wrote:
>> On Tue, 2017-07-11 at 14:22 -0400, Ilia Mirkin wrote:
>> >
>> > Some display stuff did change for 4.13 for GM20x+ boards. If it's not
>> > too much trouble, a bisect would be pretty useful.
>>
>> Bisection seemingly went fine, but the result is odd.
>>
>> e98c58e55f68f8785aebfab1f8c9a03d8de0afe1 is the first bad commit
>
> But it really really is bad.  Looking at gitk fork in the road leading
> to it...
>
> 52d9d38c183b drm/sti:fix spelling mistake: "compoment" -> "component" - good
> e4e818cc2d7c drm: make drm_panel.h self-contained - good
> 9cf8f5802f39 drm: add missing declaration to drm_blend.h  - good
>
> Before the git highway splits, all is well.  The lane with commits
> works fine at both ends, but e98c58e55f68 is busted.  Merge arfifact?

Hmmm... that tree does not appear to have gotten a v4.12 backmerge at
any point. The last backmerge from Linus as far as I can tell was
v4.11-rc7. Could be an interaction with some out-of-tree change.


Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-12 Thread Ilia Mirkin
On Wed, Jul 12, 2017 at 7:25 AM, Mike Galbraith  wrote:
> On Wed, 2017-07-12 at 11:55 +0200, Mike Galbraith wrote:
>> On Tue, 2017-07-11 at 14:22 -0400, Ilia Mirkin wrote:
>> >
>> > Some display stuff did change for 4.13 for GM20x+ boards. If it's not
>> > too much trouble, a bisect would be pretty useful.
>>
>> Bisection seemingly went fine, but the result is odd.
>>
>> e98c58e55f68f8785aebfab1f8c9a03d8de0afe1 is the first bad commit
>
> But it really really is bad.  Looking at gitk fork in the road leading
> to it...
>
> 52d9d38c183b drm/sti:fix spelling mistake: "compoment" -> "component" - good
> e4e818cc2d7c drm: make drm_panel.h self-contained - good
> 9cf8f5802f39 drm: add missing declaration to drm_blend.h  - good
>
> Before the git highway splits, all is well.  The lane with commits
> works fine at both ends, but e98c58e55f68 is busted.  Merge arfifact?

Hmmm... that tree does not appear to have gotten a v4.12 backmerge at
any point. The last backmerge from Linus as far as I can tell was
v4.11-rc7. Could be an interaction with some out-of-tree change.


Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-12 Thread Mike Galbraith
On Wed, 2017-07-12 at 11:55 +0200, Mike Galbraith wrote:
> On Tue, 2017-07-11 at 14:22 -0400, Ilia Mirkin wrote:
> > 
> > Some display stuff did change for 4.13 for GM20x+ boards. If it's not
> > too much trouble, a bisect would be pretty useful.
> 
> Bisection seemingly went fine, but the result is odd.
> 
> e98c58e55f68f8785aebfab1f8c9a03d8de0afe1 is the first bad commit

But it really really is bad.  Looking at gitk fork in the road leading
to it...

52d9d38c183b drm/sti:fix spelling mistake: "compoment" -> "component" - good
e4e818cc2d7c drm: make drm_panel.h self-contained - good
9cf8f5802f39 drm: add missing declaration to drm_blend.h  - good

Before the git highway splits, all is well.  The lane with commits
works fine at both ends, but e98c58e55f68 is busted.  Merge arfifact?

-Mike


Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-12 Thread Mike Galbraith
On Wed, 2017-07-12 at 11:55 +0200, Mike Galbraith wrote:
> On Tue, 2017-07-11 at 14:22 -0400, Ilia Mirkin wrote:
> > 
> > Some display stuff did change for 4.13 for GM20x+ boards. If it's not
> > too much trouble, a bisect would be pretty useful.
> 
> Bisection seemingly went fine, but the result is odd.
> 
> e98c58e55f68f8785aebfab1f8c9a03d8de0afe1 is the first bad commit

But it really really is bad.  Looking at gitk fork in the road leading
to it...

52d9d38c183b drm/sti:fix spelling mistake: "compoment" -> "component" - good
e4e818cc2d7c drm: make drm_panel.h self-contained - good
9cf8f5802f39 drm: add missing declaration to drm_blend.h  - good

Before the git highway splits, all is well.  The lane with commits
works fine at both ends, but e98c58e55f68 is busted.  Merge arfifact?

-Mike


Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-12 Thread Mike Galbraith
On Tue, 2017-07-11 at 14:22 -0400, Ilia Mirkin wrote:
> 
> Some display stuff did change for 4.13 for GM20x+ boards. If it's not
> too much trouble, a bisect would be pretty useful.

Bisection seemingly went fine, but the result is odd.

e98c58e55f68f8785aebfab1f8c9a03d8de0afe1 is the first bad commit

-Mike





Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-12 Thread Mike Galbraith
On Tue, 2017-07-11 at 14:22 -0400, Ilia Mirkin wrote:
> 
> Some display stuff did change for 4.13 for GM20x+ boards. If it's not
> too much trouble, a bisect would be pretty useful.

Bisection seemingly went fine, but the result is odd.

e98c58e55f68f8785aebfab1f8c9a03d8de0afe1 is the first bad commit

-Mike





Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-12 Thread Mike Galbraith
On Tue, 2017-07-11 at 20:53 +0200, Mike Galbraith wrote:
> On Tue, 2017-07-11 at 14:22 -0400, Ilia Mirkin wrote:
> 
> > Some display stuff did change for 4.13 for GM20x+ boards. If it's not
> > too much trouble, a bisect would be pretty useful.
> 
> Vacation -> back to work happens in the very early AM, so bisection
> will have to wait a bit.

Hm, my backup workstation (old GeForce 8600 GT box) has the same issue,
so perhaps I can bisect it as I work on backlog (multitasking: screw up
multiple tasks concurrently).

-Mike


Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-12 Thread Mike Galbraith
On Tue, 2017-07-11 at 20:53 +0200, Mike Galbraith wrote:
> On Tue, 2017-07-11 at 14:22 -0400, Ilia Mirkin wrote:
> 
> > Some display stuff did change for 4.13 for GM20x+ boards. If it's not
> > too much trouble, a bisect would be pretty useful.
> 
> Vacation -> back to work happens in the very early AM, so bisection
> will have to wait a bit.

Hm, my backup workstation (old GeForce 8600 GT box) has the same issue,
so perhaps I can bisect it as I work on backlog (multitasking: screw up
multiple tasks concurrently).

-Mike


Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-11 Thread Mike Galbraith
On Tue, 2017-07-11 at 14:22 -0400, Ilia Mirkin wrote:
> 
> OK, thanks. So in other words, a fairly standard desktop with a PCIe
> board plugged in. No funny business. (Laptops can create a ton of
> additional weirdness, which I assumed you had since you were talking
> about STR.)

Yup, garden variety deskside box.

> My best guess is that gf119_head_vblank_put either has a bogus head id
> (should be in the 0..3 range) which causes it to do an out-of-bounds
> read on MMIO space, or that the MMIO mapping has already been removed
> by the time nouveau_display_suspend runs. Adding Ben Skeggs for
> additional insight.
> 
> Some display stuff did change for 4.13 for GM20x+ boards. If it's not
> too much trouble, a bisect would be pretty useful.

Vacation -> back to work happens in the very early AM, so bisection
will have to wait a bit.

-Mike


Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-11 Thread Mike Galbraith
On Tue, 2017-07-11 at 14:22 -0400, Ilia Mirkin wrote:
> 
> OK, thanks. So in other words, a fairly standard desktop with a PCIe
> board plugged in. No funny business. (Laptops can create a ton of
> additional weirdness, which I assumed you had since you were talking
> about STR.)

Yup, garden variety deskside box.

> My best guess is that gf119_head_vblank_put either has a bogus head id
> (should be in the 0..3 range) which causes it to do an out-of-bounds
> read on MMIO space, or that the MMIO mapping has already been removed
> by the time nouveau_display_suspend runs. Adding Ben Skeggs for
> additional insight.
> 
> Some display stuff did change for 4.13 for GM20x+ boards. If it's not
> too much trouble, a bisect would be pretty useful.

Vacation -> back to work happens in the very early AM, so bisection
will have to wait a bit.

-Mike


Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-11 Thread Ilia Mirkin
On Tue, Jul 11, 2017 at 2:08 PM, Mike Galbraith  wrote:
> On Tue, 2017-07-11 at 13:51 -0400, Ilia Mirkin wrote:
>> Some details that may be useful in analysis of the bug:
>>
>> 1. lspci -nn -d 10de:
>
> 01:00.0 VGA compatible controller [0300]: NVIDIA Corporation GM204 [GeForce 
> GTX 980] [10de:13c0] (rev a1)
> 01:00.1 Audio device [0403]: NVIDIA Corporation GM204 High Definition Audio 
> Controller [10de:0fbb] (rev a1
>
>> 2. What displays, if any, you have plugged into the NVIDIA board when
>> this happens?
>
> A Philips 273V, via DVI.
>
>> 3. Any boot parameters, esp relating to ACPI, PM, or related?
>
> None for those, what's there that will be unfamiliar to you are for
> patches that aren't applied.
>
> nortsched hpc_cpusets skew_tick=1 ftrace_dump_on_oops audit=0
> nodelayacct cgroup_disable=memory rtkthreads=1 rtworkqueues=2 panic=60
> ignore_loglevel crashkernel=256M,high

OK, thanks. So in other words, a fairly standard desktop with a PCIe
board plugged in. No funny business. (Laptops can create a ton of
additional weirdness, which I assumed you had since you were talking
about STR.)

My best guess is that gf119_head_vblank_put either has a bogus head id
(should be in the 0..3 range) which causes it to do an out-of-bounds
read on MMIO space, or that the MMIO mapping has already been removed
by the time nouveau_display_suspend runs. Adding Ben Skeggs for
additional insight.

Some display stuff did change for 4.13 for GM20x+ boards. If it's not
too much trouble, a bisect would be pretty useful.

Cheers,

  -ilia


Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-11 Thread Ilia Mirkin
On Tue, Jul 11, 2017 at 2:08 PM, Mike Galbraith  wrote:
> On Tue, 2017-07-11 at 13:51 -0400, Ilia Mirkin wrote:
>> Some details that may be useful in analysis of the bug:
>>
>> 1. lspci -nn -d 10de:
>
> 01:00.0 VGA compatible controller [0300]: NVIDIA Corporation GM204 [GeForce 
> GTX 980] [10de:13c0] (rev a1)
> 01:00.1 Audio device [0403]: NVIDIA Corporation GM204 High Definition Audio 
> Controller [10de:0fbb] (rev a1
>
>> 2. What displays, if any, you have plugged into the NVIDIA board when
>> this happens?
>
> A Philips 273V, via DVI.
>
>> 3. Any boot parameters, esp relating to ACPI, PM, or related?
>
> None for those, what's there that will be unfamiliar to you are for
> patches that aren't applied.
>
> nortsched hpc_cpusets skew_tick=1 ftrace_dump_on_oops audit=0
> nodelayacct cgroup_disable=memory rtkthreads=1 rtworkqueues=2 panic=60
> ignore_loglevel crashkernel=256M,high

OK, thanks. So in other words, a fairly standard desktop with a PCIe
board plugged in. No funny business. (Laptops can create a ton of
additional weirdness, which I assumed you had since you were talking
about STR.)

My best guess is that gf119_head_vblank_put either has a bogus head id
(should be in the 0..3 range) which causes it to do an out-of-bounds
read on MMIO space, or that the MMIO mapping has already been removed
by the time nouveau_display_suspend runs. Adding Ben Skeggs for
additional insight.

Some display stuff did change for 4.13 for GM20x+ boards. If it's not
too much trouble, a bisect would be pretty useful.

Cheers,

  -ilia


Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-11 Thread Mike Galbraith
On Tue, 2017-07-11 at 13:51 -0400, Ilia Mirkin wrote:
> Some details that may be useful in analysis of the bug:
> 
> 1. lspci -nn -d 10de:

01:00.0 VGA compatible controller [0300]: NVIDIA Corporation GM204 [GeForce GTX 
980] [10de:13c0] (rev a1)
01:00.1 Audio device [0403]: NVIDIA Corporation GM204 High Definition Audio 
Controller [10de:0fbb] (rev a1

> 2. What displays, if any, you have plugged into the NVIDIA board when
> this happens?

A Philips 273V, via DVI.

> 3. Any boot parameters, esp relating to ACPI, PM, or related?

None for those, what's there that will be unfamiliar to you are for
patches that aren't applied.

nortsched hpc_cpusets skew_tick=1 ftrace_dump_on_oops audit=0
nodelayacct cgroup_disable=memory rtkthreads=1 rtworkqueues=2 panic=60
ignore_loglevel crashkernel=256M,high

-Mike


Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-11 Thread Mike Galbraith
On Tue, 2017-07-11 at 13:51 -0400, Ilia Mirkin wrote:
> Some details that may be useful in analysis of the bug:
> 
> 1. lspci -nn -d 10de:

01:00.0 VGA compatible controller [0300]: NVIDIA Corporation GM204 [GeForce GTX 
980] [10de:13c0] (rev a1)
01:00.1 Audio device [0403]: NVIDIA Corporation GM204 High Definition Audio 
Controller [10de:0fbb] (rev a1

> 2. What displays, if any, you have plugged into the NVIDIA board when
> this happens?

A Philips 273V, via DVI.

> 3. Any boot parameters, esp relating to ACPI, PM, or related?

None for those, what's there that will be unfamiliar to you are for
patches that aren't applied.

nortsched hpc_cpusets skew_tick=1 ftrace_dump_on_oops audit=0
nodelayacct cgroup_disable=memory rtkthreads=1 rtworkqueues=2 panic=60
ignore_loglevel crashkernel=256M,high

-Mike


Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-11 Thread Ilia Mirkin
Some details that may be useful in analysis of the bug:

1. lspci -nn -d 10de:
2. What displays, if any, you have plugged into the NVIDIA board when
this happens?
3. Any boot parameters, esp relating to ACPI, PM, or related?

Cheers,

  -ilia

On Tue, Jul 11, 2017 at 1:32 PM, Mike Galbraith  wrote:
> Greetings,
>
> I met $subject in master-rt post drm merge, but taking the config
> (attached) to virgin v4.12-10624-g9967468c0a10, it's reproducible.
>
>   KERNEL: vmlinux-4.12.0.g9967468-preempt.gz
> DUMPFILE: vmcore
> CPUS: 8
> DATE: Tue Jul 11 18:55:28 2017
>   UPTIME: 00:02:03
> LOAD AVERAGE: 3.43, 1.39, 0.52
>TASKS: 467
> NODENAME: homer
>  RELEASE: 4.12.0.g9967468-preempt
>  VERSION: #155 SMP PREEMPT Tue Jul 11 18:18:11 CEST 2017
>  MACHINE: x86_64  (3591 Mhz)
>   MEMORY: 16 GB
>PANIC: "BUG: unable to handle kernel paging request at 
> a022990f"
>  PID: 4658
>  COMMAND: "kworker/u16:26"
> TASK: 8803c6068f80  [THREAD_INFO: 8803c6068f80]
>  CPU: 7
>STATE: TASK_RUNNING (PANIC)
>
> crash> bt
> PID: 4658   TASK: 8803c6068f80  CPU: 7   COMMAND: "kworker/u16:26"
>  #0 [c900039f76a0] machine_kexec at 810481fc
>  #1 [c900039f76f0] __crash_kexec at 81109e3a
>  #2 [c900039f77b0] crash_kexec at 8110adc9
>  #3 [c900039f77c8] oops_end at 8101d059
>  #4 [c900039f77e8] no_context at 81055ce5
>  #5 [c900039f7838] do_page_fault at 81056c5b
>  #6 [c900039f7860] page_fault at 81690a88
> [exception RIP: report_bug+93]
> RIP: 8167227d  RSP: c900039f7918  RFLAGS: 00010002
> RAX: a0229905  RBX: a020af0f  RCX: 0001
> RDX: 0907  RSI: a020af11  RDI: 98f6
> RBP: c900039f7a58   R8: 0001   R9: 03fc
> R10: 81a01906  R11: 8803f84711f8  R12: a02231fb
> R13: 0260  R14: 0004  R15: 0006
> ORIG_RAX:   CS: 0010  SS: 0018
>  #7 [c900039f7910] report_bug at 81672248
>  #8 [c900039f7938] fixup_bug at 8101af85
>  #9 [c900039f7950] do_trap at 8101b0d9
> #10 [c900039f79a0] do_error_trap at 8101b190
> #11 [c900039f7a50] invalid_op at 8169063e
> [exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335]
> RIP: a020af0f  RSP: c900039f7b00  RFLAGS: 00010086
> RAX: a04fa100  RBX: 8803f9550800  RCX: 0001
> RDX: a0228a58  RSI: 0001  RDI: a022321b
> RBP: c900039f7b80   R8:    R9: a020adc0
> R10: a048a1b0  R11: 8803f84711f8  R12: 0001
> R13: 8803f8471000  R14: c900039f7b94  R15: c900039f7bd0
> ORIG_RAX:   CS: 0010  SS: 0018
> #12 [c900039f7b18] gf119_head_vblank_put at a04422f9 [nouveau]
> #13 [c900039f7b88] drm_get_last_vbltimestamp at a020ad91 [drm]
> #14 [c900039f7ba8] drm_update_vblank_count at a020b3e1 [drm]
> #15 [c900039f7c10] drm_vblank_disable_and_save at a020bbe9 [drm]
> #16 [c900039f7c40] drm_crtc_vblank_off at a020c3c0 [drm]
> #17 [c900039f7cb0] nouveau_display_fini at a048a4d6 [nouveau]
> #18 [c900039f7ce0] nouveau_display_suspend at a048ac4f [nouveau]
> #19 [c900039f7d00] nouveau_do_suspend at a047e5ec [nouveau]
> #20 [c900039f7d38] nouveau_pmops_suspend at a047e77d [nouveau]
> #21 [c900039f7d50] pci_pm_suspend at 813b1ff0
> #22 [c900039f7d80] dpm_run_callback at 814c4dbd
> #23 [c900039f7db8] __device_suspend at 814c5a61
> #24 [c900039f7e30] async_suspend at 814c5cfa
> #25 [c900039f7e48] async_run_entry_fn at 81091683
> #26 [c900039f7e70] process_one_work at 810882bc
> #27 [c900039f7eb0] worker_thread at 8108854a
> #28 [c900039f7f10] kthread at 8108e387
> #29 [c900039f7f50] ret_from_fork at 8168fa85
> crash> gdb list *drm_calc_vbltimestamp_from_scanoutpos+335
> 0xa020af0f is in drm_calc_vbltimestamp_from_scanoutpos 
> (drivers/gpu/drm/drm_vblank.c:608).
> 603 /* If mode timing undefined, just return as no-op:
> 604  * Happens during initial modesetting of a crtc.
> 605  */
> 606 if (mode->crtc_clock == 0) {
> 607 DRM_DEBUG("crtc %u: Noop due to uninitialized 
> mode.\n", pipe);
> 608 WARN_ON_ONCE(drm_drv_uses_atomic_modeset(dev));
> 609
> 610 return false;
> 611 }
> 612
> crash> gdb list *report_bug+93
> 0x8167227d is in report_bug (lib/bug.c:177).
> 172 return BUG_TRAP_TYPE_WARN;
> 173
> 174

Re: [regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-11 Thread Ilia Mirkin
Some details that may be useful in analysis of the bug:

1. lspci -nn -d 10de:
2. What displays, if any, you have plugged into the NVIDIA board when
this happens?
3. Any boot parameters, esp relating to ACPI, PM, or related?

Cheers,

  -ilia

On Tue, Jul 11, 2017 at 1:32 PM, Mike Galbraith  wrote:
> Greetings,
>
> I met $subject in master-rt post drm merge, but taking the config
> (attached) to virgin v4.12-10624-g9967468c0a10, it's reproducible.
>
>   KERNEL: vmlinux-4.12.0.g9967468-preempt.gz
> DUMPFILE: vmcore
> CPUS: 8
> DATE: Tue Jul 11 18:55:28 2017
>   UPTIME: 00:02:03
> LOAD AVERAGE: 3.43, 1.39, 0.52
>TASKS: 467
> NODENAME: homer
>  RELEASE: 4.12.0.g9967468-preempt
>  VERSION: #155 SMP PREEMPT Tue Jul 11 18:18:11 CEST 2017
>  MACHINE: x86_64  (3591 Mhz)
>   MEMORY: 16 GB
>PANIC: "BUG: unable to handle kernel paging request at 
> a022990f"
>  PID: 4658
>  COMMAND: "kworker/u16:26"
> TASK: 8803c6068f80  [THREAD_INFO: 8803c6068f80]
>  CPU: 7
>STATE: TASK_RUNNING (PANIC)
>
> crash> bt
> PID: 4658   TASK: 8803c6068f80  CPU: 7   COMMAND: "kworker/u16:26"
>  #0 [c900039f76a0] machine_kexec at 810481fc
>  #1 [c900039f76f0] __crash_kexec at 81109e3a
>  #2 [c900039f77b0] crash_kexec at 8110adc9
>  #3 [c900039f77c8] oops_end at 8101d059
>  #4 [c900039f77e8] no_context at 81055ce5
>  #5 [c900039f7838] do_page_fault at 81056c5b
>  #6 [c900039f7860] page_fault at 81690a88
> [exception RIP: report_bug+93]
> RIP: 8167227d  RSP: c900039f7918  RFLAGS: 00010002
> RAX: a0229905  RBX: a020af0f  RCX: 0001
> RDX: 0907  RSI: a020af11  RDI: 98f6
> RBP: c900039f7a58   R8: 0001   R9: 03fc
> R10: 81a01906  R11: 8803f84711f8  R12: a02231fb
> R13: 0260  R14: 0004  R15: 0006
> ORIG_RAX:   CS: 0010  SS: 0018
>  #7 [c900039f7910] report_bug at 81672248
>  #8 [c900039f7938] fixup_bug at 8101af85
>  #9 [c900039f7950] do_trap at 8101b0d9
> #10 [c900039f79a0] do_error_trap at 8101b190
> #11 [c900039f7a50] invalid_op at 8169063e
> [exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335]
> RIP: a020af0f  RSP: c900039f7b00  RFLAGS: 00010086
> RAX: a04fa100  RBX: 8803f9550800  RCX: 0001
> RDX: a0228a58  RSI: 0001  RDI: a022321b
> RBP: c900039f7b80   R8:    R9: a020adc0
> R10: a048a1b0  R11: 8803f84711f8  R12: 0001
> R13: 8803f8471000  R14: c900039f7b94  R15: c900039f7bd0
> ORIG_RAX:   CS: 0010  SS: 0018
> #12 [c900039f7b18] gf119_head_vblank_put at a04422f9 [nouveau]
> #13 [c900039f7b88] drm_get_last_vbltimestamp at a020ad91 [drm]
> #14 [c900039f7ba8] drm_update_vblank_count at a020b3e1 [drm]
> #15 [c900039f7c10] drm_vblank_disable_and_save at a020bbe9 [drm]
> #16 [c900039f7c40] drm_crtc_vblank_off at a020c3c0 [drm]
> #17 [c900039f7cb0] nouveau_display_fini at a048a4d6 [nouveau]
> #18 [c900039f7ce0] nouveau_display_suspend at a048ac4f [nouveau]
> #19 [c900039f7d00] nouveau_do_suspend at a047e5ec [nouveau]
> #20 [c900039f7d38] nouveau_pmops_suspend at a047e77d [nouveau]
> #21 [c900039f7d50] pci_pm_suspend at 813b1ff0
> #22 [c900039f7d80] dpm_run_callback at 814c4dbd
> #23 [c900039f7db8] __device_suspend at 814c5a61
> #24 [c900039f7e30] async_suspend at 814c5cfa
> #25 [c900039f7e48] async_run_entry_fn at 81091683
> #26 [c900039f7e70] process_one_work at 810882bc
> #27 [c900039f7eb0] worker_thread at 8108854a
> #28 [c900039f7f10] kthread at 8108e387
> #29 [c900039f7f50] ret_from_fork at 8168fa85
> crash> gdb list *drm_calc_vbltimestamp_from_scanoutpos+335
> 0xa020af0f is in drm_calc_vbltimestamp_from_scanoutpos 
> (drivers/gpu/drm/drm_vblank.c:608).
> 603 /* If mode timing undefined, just return as no-op:
> 604  * Happens during initial modesetting of a crtc.
> 605  */
> 606 if (mode->crtc_clock == 0) {
> 607 DRM_DEBUG("crtc %u: Noop due to uninitialized 
> mode.\n", pipe);
> 608 WARN_ON_ONCE(drm_drv_uses_atomic_modeset(dev));
> 609
> 610 return false;
> 611 }
> 612
> crash> gdb list *report_bug+93
> 0x8167227d is in report_bug (lib/bug.c:177).
> 172 return BUG_TRAP_TYPE_WARN;
> 173
> 174 /*
> 175  

[regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-11 Thread Mike Galbraith
Greetings,

I met $subject in master-rt post drm merge, but taking the config
(attached) to virgin v4.12-10624-g9967468c0a10, it's reproducible.

  KERNEL: vmlinux-4.12.0.g9967468-preempt.gz
DUMPFILE: vmcore
CPUS: 8
DATE: Tue Jul 11 18:55:28 2017
  UPTIME: 00:02:03
LOAD AVERAGE: 3.43, 1.39, 0.52
   TASKS: 467
NODENAME: homer
 RELEASE: 4.12.0.g9967468-preempt
 VERSION: #155 SMP PREEMPT Tue Jul 11 18:18:11 CEST 2017
 MACHINE: x86_64  (3591 Mhz)
  MEMORY: 16 GB
   PANIC: "BUG: unable to handle kernel paging request at a022990f"
 PID: 4658
 COMMAND: "kworker/u16:26"
TASK: 8803c6068f80  [THREAD_INFO: 8803c6068f80]
 CPU: 7
   STATE: TASK_RUNNING (PANIC)

crash> bt
PID: 4658   TASK: 8803c6068f80  CPU: 7   COMMAND: "kworker/u16:26"
 #0 [c900039f76a0] machine_kexec at 810481fc
 #1 [c900039f76f0] __crash_kexec at 81109e3a
 #2 [c900039f77b0] crash_kexec at 8110adc9
 #3 [c900039f77c8] oops_end at 8101d059
 #4 [c900039f77e8] no_context at 81055ce5
 #5 [c900039f7838] do_page_fault at 81056c5b
 #6 [c900039f7860] page_fault at 81690a88
[exception RIP: report_bug+93]
RIP: 8167227d  RSP: c900039f7918  RFLAGS: 00010002
RAX: a0229905  RBX: a020af0f  RCX: 0001
RDX: 0907  RSI: a020af11  RDI: 98f6
RBP: c900039f7a58   R8: 0001   R9: 03fc
R10: 81a01906  R11: 8803f84711f8  R12: a02231fb
R13: 0260  R14: 0004  R15: 0006
ORIG_RAX:   CS: 0010  SS: 0018
 #7 [c900039f7910] report_bug at 81672248
 #8 [c900039f7938] fixup_bug at 8101af85
 #9 [c900039f7950] do_trap at 8101b0d9
#10 [c900039f79a0] do_error_trap at 8101b190
#11 [c900039f7a50] invalid_op at 8169063e
[exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335]
RIP: a020af0f  RSP: c900039f7b00  RFLAGS: 00010086
RAX: a04fa100  RBX: 8803f9550800  RCX: 0001
RDX: a0228a58  RSI: 0001  RDI: a022321b
RBP: c900039f7b80   R8:    R9: a020adc0
R10: a048a1b0  R11: 8803f84711f8  R12: 0001
R13: 8803f8471000  R14: c900039f7b94  R15: c900039f7bd0
ORIG_RAX:   CS: 0010  SS: 0018
#12 [c900039f7b18] gf119_head_vblank_put at a04422f9 [nouveau]
#13 [c900039f7b88] drm_get_last_vbltimestamp at a020ad91 [drm]
#14 [c900039f7ba8] drm_update_vblank_count at a020b3e1 [drm]
#15 [c900039f7c10] drm_vblank_disable_and_save at a020bbe9 [drm]
#16 [c900039f7c40] drm_crtc_vblank_off at a020c3c0 [drm]
#17 [c900039f7cb0] nouveau_display_fini at a048a4d6 [nouveau]
#18 [c900039f7ce0] nouveau_display_suspend at a048ac4f [nouveau]
#19 [c900039f7d00] nouveau_do_suspend at a047e5ec [nouveau]
#20 [c900039f7d38] nouveau_pmops_suspend at a047e77d [nouveau]
#21 [c900039f7d50] pci_pm_suspend at 813b1ff0
#22 [c900039f7d80] dpm_run_callback at 814c4dbd
#23 [c900039f7db8] __device_suspend at 814c5a61
#24 [c900039f7e30] async_suspend at 814c5cfa
#25 [c900039f7e48] async_run_entry_fn at 81091683
#26 [c900039f7e70] process_one_work at 810882bc
#27 [c900039f7eb0] worker_thread at 8108854a
#28 [c900039f7f10] kthread at 8108e387
#29 [c900039f7f50] ret_from_fork at 8168fa85
crash> gdb list *drm_calc_vbltimestamp_from_scanoutpos+335
0xa020af0f is in drm_calc_vbltimestamp_from_scanoutpos 
(drivers/gpu/drm/drm_vblank.c:608).
603 /* If mode timing undefined, just return as no-op:
604  * Happens during initial modesetting of a crtc.
605  */
606 if (mode->crtc_clock == 0) {
607 DRM_DEBUG("crtc %u: Noop due to uninitialized mode.\n", 
pipe);
608 WARN_ON_ONCE(drm_drv_uses_atomic_modeset(dev));
609 
610 return false;
611 }
612 
crash> gdb list *report_bug+93
0x8167227d is in report_bug (lib/bug.c:177).
172 return BUG_TRAP_TYPE_WARN;
173 
174 /*
175  * Since this is the only store, concurrency is 
not an issue.
176  */
177 bug->flags |= BUGFLAG_DONE;
178 }
179 }
180 
181 if (warning) {
crash>

config.xz
Description: application/xz


[regression drm/noveau] suspend to ram -> BOOM: exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335

2017-07-11 Thread Mike Galbraith
Greetings,

I met $subject in master-rt post drm merge, but taking the config
(attached) to virgin v4.12-10624-g9967468c0a10, it's reproducible.

  KERNEL: vmlinux-4.12.0.g9967468-preempt.gz
DUMPFILE: vmcore
CPUS: 8
DATE: Tue Jul 11 18:55:28 2017
  UPTIME: 00:02:03
LOAD AVERAGE: 3.43, 1.39, 0.52
   TASKS: 467
NODENAME: homer
 RELEASE: 4.12.0.g9967468-preempt
 VERSION: #155 SMP PREEMPT Tue Jul 11 18:18:11 CEST 2017
 MACHINE: x86_64  (3591 Mhz)
  MEMORY: 16 GB
   PANIC: "BUG: unable to handle kernel paging request at a022990f"
 PID: 4658
 COMMAND: "kworker/u16:26"
TASK: 8803c6068f80  [THREAD_INFO: 8803c6068f80]
 CPU: 7
   STATE: TASK_RUNNING (PANIC)

crash> bt
PID: 4658   TASK: 8803c6068f80  CPU: 7   COMMAND: "kworker/u16:26"
 #0 [c900039f76a0] machine_kexec at 810481fc
 #1 [c900039f76f0] __crash_kexec at 81109e3a
 #2 [c900039f77b0] crash_kexec at 8110adc9
 #3 [c900039f77c8] oops_end at 8101d059
 #4 [c900039f77e8] no_context at 81055ce5
 #5 [c900039f7838] do_page_fault at 81056c5b
 #6 [c900039f7860] page_fault at 81690a88
[exception RIP: report_bug+93]
RIP: 8167227d  RSP: c900039f7918  RFLAGS: 00010002
RAX: a0229905  RBX: a020af0f  RCX: 0001
RDX: 0907  RSI: a020af11  RDI: 98f6
RBP: c900039f7a58   R8: 0001   R9: 03fc
R10: 81a01906  R11: 8803f84711f8  R12: a02231fb
R13: 0260  R14: 0004  R15: 0006
ORIG_RAX:   CS: 0010  SS: 0018
 #7 [c900039f7910] report_bug at 81672248
 #8 [c900039f7938] fixup_bug at 8101af85
 #9 [c900039f7950] do_trap at 8101b0d9
#10 [c900039f79a0] do_error_trap at 8101b190
#11 [c900039f7a50] invalid_op at 8169063e
[exception RIP: drm_calc_vbltimestamp_from_scanoutpos+335]
RIP: a020af0f  RSP: c900039f7b00  RFLAGS: 00010086
RAX: a04fa100  RBX: 8803f9550800  RCX: 0001
RDX: a0228a58  RSI: 0001  RDI: a022321b
RBP: c900039f7b80   R8:    R9: a020adc0
R10: a048a1b0  R11: 8803f84711f8  R12: 0001
R13: 8803f8471000  R14: c900039f7b94  R15: c900039f7bd0
ORIG_RAX:   CS: 0010  SS: 0018
#12 [c900039f7b18] gf119_head_vblank_put at a04422f9 [nouveau]
#13 [c900039f7b88] drm_get_last_vbltimestamp at a020ad91 [drm]
#14 [c900039f7ba8] drm_update_vblank_count at a020b3e1 [drm]
#15 [c900039f7c10] drm_vblank_disable_and_save at a020bbe9 [drm]
#16 [c900039f7c40] drm_crtc_vblank_off at a020c3c0 [drm]
#17 [c900039f7cb0] nouveau_display_fini at a048a4d6 [nouveau]
#18 [c900039f7ce0] nouveau_display_suspend at a048ac4f [nouveau]
#19 [c900039f7d00] nouveau_do_suspend at a047e5ec [nouveau]
#20 [c900039f7d38] nouveau_pmops_suspend at a047e77d [nouveau]
#21 [c900039f7d50] pci_pm_suspend at 813b1ff0
#22 [c900039f7d80] dpm_run_callback at 814c4dbd
#23 [c900039f7db8] __device_suspend at 814c5a61
#24 [c900039f7e30] async_suspend at 814c5cfa
#25 [c900039f7e48] async_run_entry_fn at 81091683
#26 [c900039f7e70] process_one_work at 810882bc
#27 [c900039f7eb0] worker_thread at 8108854a
#28 [c900039f7f10] kthread at 8108e387
#29 [c900039f7f50] ret_from_fork at 8168fa85
crash> gdb list *drm_calc_vbltimestamp_from_scanoutpos+335
0xa020af0f is in drm_calc_vbltimestamp_from_scanoutpos 
(drivers/gpu/drm/drm_vblank.c:608).
603 /* If mode timing undefined, just return as no-op:
604  * Happens during initial modesetting of a crtc.
605  */
606 if (mode->crtc_clock == 0) {
607 DRM_DEBUG("crtc %u: Noop due to uninitialized mode.\n", 
pipe);
608 WARN_ON_ONCE(drm_drv_uses_atomic_modeset(dev));
609 
610 return false;
611 }
612 
crash> gdb list *report_bug+93
0x8167227d is in report_bug (lib/bug.c:177).
172 return BUG_TRAP_TYPE_WARN;
173 
174 /*
175  * Since this is the only store, concurrency is 
not an issue.
176  */
177 bug->flags |= BUGFLAG_DONE;
178 }
179 }
180 
181 if (warning) {
crash>

config.xz
Description: application/xz