Re: [Nouveau] [PATCH v2] bl: fix backlight regression

2018-02-19 Thread Pierre Moreau
A few words about why this change is needed would be nice, as well as what
changed since the v1 (well, looking back to your original patch, absolutely
everything changed :-D).

On 2018-02-19 — 17:09, Karol Herbst wrote:
> fixes d9c0aadc5aa241df26ce8301d34a8418919fb5ae

The formatting does not follow the kernel guidelines, see
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/process/submitting-patches.rst?id=91ab883eb21325ad80f3473633f794c78ac87f51#n183
Also, you might want to use the commit hash from Linus’ tree, rather than from
Ben’s out-of-tree module.

> 
> Suggested-by: Ben Skeggs 
> Signed-off-by: Karol Herbst 
> ---
>  drm/nouveau/nouveau_backlight.c | 10 +-
>  1 file changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/drm/nouveau/nouveau_backlight.c b/drm/nouveau/nouveau_backlight.c
> index 380f3402..55172655 100644
> --- a/drm/nouveau/nouveau_backlight.c
> +++ b/drm/nouveau/nouveau_backlight.c
> @@ -134,7 +134,7 @@ nv50_get_intensity(struct backlight_device *bd)
>   struct nouveau_encoder *nv_encoder = bl_get_data(bd);
>   struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
>   struct nvif_object *device = >client.device.object;
> - int or = nv_encoder->or;
> + int or = ffs(nv_encoder->dcb->or) - 1;

Since you have that change in a few places, wouldn’t it make sense to have it
in an helper function, that given an nv_encoder object would give you back the
correct or value? The function wouldn’t be doing much, I agree, but it would be
less error-prone to forgetting the -1 or using ffs in some cases.

Pierre

>   u32 div = 1025;
>   u32 val;
>  
> @@ -149,7 +149,7 @@ nv50_set_intensity(struct backlight_device *bd)
>   struct nouveau_encoder *nv_encoder = bl_get_data(bd);
>   struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
>   struct nvif_object *device = >client.device.object;
> - int or = nv_encoder->or;
> + int or = ffs(nv_encoder->dcb->or) - 1;
>   u32 div = 1025;
>   u32 val = (bd->props.brightness * div) / 100;
>  
> @@ -170,7 +170,7 @@ nva3_get_intensity(struct backlight_device *bd)
>   struct nouveau_encoder *nv_encoder = bl_get_data(bd);
>   struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
>   struct nvif_object *device = >client.device.object;
> - int or = nv_encoder->or;
> + int or = ffs(nv_encoder->dcb->or) - 1;
>   u32 div, val;
>  
>   div  = nvif_rd32(device, NV50_PDISP_SOR_PWM_DIV(or));
> @@ -188,7 +188,7 @@ nva3_set_intensity(struct backlight_device *bd)
>   struct nouveau_encoder *nv_encoder = bl_get_data(bd);
>   struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
>   struct nvif_object *device = >client.device.object;
> - int or = nv_encoder->or;
> + int or = ffs(nv_encoder->dcb->or) - 1;
>   u32 div, val;
>  
>   div = nvif_rd32(device, NV50_PDISP_SOR_PWM_DIV(or));
> @@ -228,7 +228,7 @@ nv50_backlight_init(struct drm_connector *connector)
>   return -ENODEV;
>   }
>  
> - if (!nvif_rd32(device, NV50_PDISP_SOR_PWM_CTL(nv_encoder->or)))
> + if (!nvif_rd32(device, NV50_PDISP_SOR_PWM_CTL(ffs(nv_encoder->dcb->or) 
> - 1)))
>   return 0;
>  
>   if (drm->client.device.info.chipset <= 0xa0 ||
> -- 
> 2.14.3
> 
> ___
> Nouveau mailing list
> Nouveau@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/nouveau


signature.asc
Description: PGP signature
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH v2] bl: fix backlight regression

2018-02-19 Thread Karol Herbst
fixes d9c0aadc5aa241df26ce8301d34a8418919fb5ae

Suggested-by: Ben Skeggs 
Signed-off-by: Karol Herbst 
---
 drm/nouveau/nouveau_backlight.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drm/nouveau/nouveau_backlight.c b/drm/nouveau/nouveau_backlight.c
index 380f3402..55172655 100644
--- a/drm/nouveau/nouveau_backlight.c
+++ b/drm/nouveau/nouveau_backlight.c
@@ -134,7 +134,7 @@ nv50_get_intensity(struct backlight_device *bd)
struct nouveau_encoder *nv_encoder = bl_get_data(bd);
struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
struct nvif_object *device = >client.device.object;
-   int or = nv_encoder->or;
+   int or = ffs(nv_encoder->dcb->or) - 1;
u32 div = 1025;
u32 val;
 
@@ -149,7 +149,7 @@ nv50_set_intensity(struct backlight_device *bd)
struct nouveau_encoder *nv_encoder = bl_get_data(bd);
struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
struct nvif_object *device = >client.device.object;
-   int or = nv_encoder->or;
+   int or = ffs(nv_encoder->dcb->or) - 1;
u32 div = 1025;
u32 val = (bd->props.brightness * div) / 100;
 
@@ -170,7 +170,7 @@ nva3_get_intensity(struct backlight_device *bd)
struct nouveau_encoder *nv_encoder = bl_get_data(bd);
struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
struct nvif_object *device = >client.device.object;
-   int or = nv_encoder->or;
+   int or = ffs(nv_encoder->dcb->or) - 1;
u32 div, val;
 
div  = nvif_rd32(device, NV50_PDISP_SOR_PWM_DIV(or));
@@ -188,7 +188,7 @@ nva3_set_intensity(struct backlight_device *bd)
struct nouveau_encoder *nv_encoder = bl_get_data(bd);
struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
struct nvif_object *device = >client.device.object;
-   int or = nv_encoder->or;
+   int or = ffs(nv_encoder->dcb->or) - 1;
u32 div, val;
 
div = nvif_rd32(device, NV50_PDISP_SOR_PWM_DIV(or));
@@ -228,7 +228,7 @@ nv50_backlight_init(struct drm_connector *connector)
return -ENODEV;
}
 
-   if (!nvif_rd32(device, NV50_PDISP_SOR_PWM_CTL(nv_encoder->or)))
+   if (!nvif_rd32(device, NV50_PDISP_SOR_PWM_CTL(ffs(nv_encoder->dcb->or) 
- 1)))
return 0;
 
if (drm->client.device.info.chipset <= 0xa0 ||
-- 
2.14.3

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [Intel-gfx] [PATCH 0/5] Fix deadlock on runtime suspend in DRM drivers

2018-02-19 Thread Alex Deucher
On Mon, Feb 19, 2018 at 9:54 AM, Daniel Vetter  wrote:
> On Mon, Feb 19, 2018 at 03:47:42PM +0100, Lukas Wunner wrote:
>> On Mon, Feb 19, 2018 at 03:05:53PM +0100, Daniel Vetter wrote:
>> > On Mon, Feb 19, 2018 at 12:58:17PM +0100, Lukas Wunner wrote:
>> > > On Mon, Feb 19, 2018 at 12:34:43PM +0100, Daniel Vetter wrote:
>> > > > Well, userspace expects hotplug events, even when we runtime suspend
>> > > > stuff. Hence waking shit up with polling. Imo ignoring hotplug events 
>> > > > is a
>> > > > pretty serious policy decision which is ok in the context of
>> > > > vga_switcheroo, but not really as an automatic thing. E.g. usb also 
>> > > > wakes
>> > > > up if you plug something in, even with all the runtime pm stuff 
>> > > > enabled.
>> > > > Same for sata and everything else.
>> > >
>> > > On the MacBook Pro, the HPD pin of external DP and HDMI ports goes into
>> > > the gmux controller, which sends an interrupt on hotplug even if the GPU
>> > > is powered down.
>> > >
>> > > Apparently Optimus has a similar functionality, cf. 3a6536c51d5d.
>> >
>> > Yeah, for vga_switcheroo and gmux setups shutting down polling explicitly
>> > makes sense. I think ideally we'd stop polling in the gmux handler somehow
>> > (maybe by dropping the relevant DRM_CONNECTOR_POLL flags, or outright
>> > stopping it all). But not when runtime suspending the entire gpu (e.g.
>> > idle system that shuts down the screen and everything, before it decides
>> > a few minutes later to do a full system suspend).
>>
>> nouveau, radeon and amdgpu currently use runtime PM *only* on hybrid
>> graphics laptops.
>>
>> Should the drivers later be extended to also use runtime PM in other
>> scenarios (desktop machines, eGPUs), they can easily detect whether
>> to disable polling on runtime suspend by calling apple_gmux_present()
>> on Macs or the equivalent for Optimus/ATPX.
>
> Ah, then I think the current solution is ok (if not entirely clean imo,
> but that can be fixed up whenever it hurts). Implementing runtime pm for
> other cases is up to the driver authors really (probably more pressing
> when the gpu is on the same SoC).

On our APUs, we support fairly fine grained powergating so this mostly
happens auto-magically in hw; no need for runtimepm.  We haven't
supported native analog encoders in last 3 or 4 generations of display
hw, so polling is not much of an issue going forward.  On most
integrated platforms (e.g., laptops and all-in-ones), digital hotplug
is handled by the platform (we get an ACPI ATIF notification) so we
can wake the dGPU.

Alex

> -Daniel
>
>>
>> Thanks,
>>
>> Lukas
>> ___
>> dri-devel mailing list
>> dri-de...@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [Intel-gfx] [PATCH 0/5] Fix deadlock on runtime suspend in DRM drivers

2018-02-19 Thread Daniel Vetter
On Mon, Feb 19, 2018 at 03:47:42PM +0100, Lukas Wunner wrote:
> On Mon, Feb 19, 2018 at 03:05:53PM +0100, Daniel Vetter wrote:
> > On Mon, Feb 19, 2018 at 12:58:17PM +0100, Lukas Wunner wrote:
> > > On Mon, Feb 19, 2018 at 12:34:43PM +0100, Daniel Vetter wrote:
> > > > Well, userspace expects hotplug events, even when we runtime suspend
> > > > stuff. Hence waking shit up with polling. Imo ignoring hotplug events 
> > > > is a
> > > > pretty serious policy decision which is ok in the context of
> > > > vga_switcheroo, but not really as an automatic thing. E.g. usb also 
> > > > wakes
> > > > up if you plug something in, even with all the runtime pm stuff enabled.
> > > > Same for sata and everything else.
> > > 
> > > On the MacBook Pro, the HPD pin of external DP and HDMI ports goes into
> > > the gmux controller, which sends an interrupt on hotplug even if the GPU
> > > is powered down.
> > > 
> > > Apparently Optimus has a similar functionality, cf. 3a6536c51d5d.
> > 
> > Yeah, for vga_switcheroo and gmux setups shutting down polling explicitly
> > makes sense. I think ideally we'd stop polling in the gmux handler somehow
> > (maybe by dropping the relevant DRM_CONNECTOR_POLL flags, or outright
> > stopping it all). But not when runtime suspending the entire gpu (e.g.
> > idle system that shuts down the screen and everything, before it decides
> > a few minutes later to do a full system suspend).
> 
> nouveau, radeon and amdgpu currently use runtime PM *only* on hybrid
> graphics laptops.
> 
> Should the drivers later be extended to also use runtime PM in other
> scenarios (desktop machines, eGPUs), they can easily detect whether
> to disable polling on runtime suspend by calling apple_gmux_present()
> on Macs or the equivalent for Optimus/ATPX.

Ah, then I think the current solution is ok (if not entirely clean imo,
but that can be fixed up whenever it hurts). Implementing runtime pm for
other cases is up to the driver authors really (probably more pressing
when the gpu is on the same SoC).
-Daniel

> 
> Thanks,
> 
> Lukas
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [Intel-gfx] [PATCH 0/5] Fix deadlock on runtime suspend in DRM drivers

2018-02-19 Thread Lukas Wunner
On Mon, Feb 19, 2018 at 03:05:53PM +0100, Daniel Vetter wrote:
> On Mon, Feb 19, 2018 at 12:58:17PM +0100, Lukas Wunner wrote:
> > On Mon, Feb 19, 2018 at 12:34:43PM +0100, Daniel Vetter wrote:
> > > Well, userspace expects hotplug events, even when we runtime suspend
> > > stuff. Hence waking shit up with polling. Imo ignoring hotplug events is a
> > > pretty serious policy decision which is ok in the context of
> > > vga_switcheroo, but not really as an automatic thing. E.g. usb also wakes
> > > up if you plug something in, even with all the runtime pm stuff enabled.
> > > Same for sata and everything else.
> > 
> > On the MacBook Pro, the HPD pin of external DP and HDMI ports goes into
> > the gmux controller, which sends an interrupt on hotplug even if the GPU
> > is powered down.
> > 
> > Apparently Optimus has a similar functionality, cf. 3a6536c51d5d.
> 
> Yeah, for vga_switcheroo and gmux setups shutting down polling explicitly
> makes sense. I think ideally we'd stop polling in the gmux handler somehow
> (maybe by dropping the relevant DRM_CONNECTOR_POLL flags, or outright
> stopping it all). But not when runtime suspending the entire gpu (e.g.
> idle system that shuts down the screen and everything, before it decides
> a few minutes later to do a full system suspend).

nouveau, radeon and amdgpu currently use runtime PM *only* on hybrid
graphics laptops.

Should the drivers later be extended to also use runtime PM in other
scenarios (desktop machines, eGPUs), they can easily detect whether
to disable polling on runtime suspend by calling apple_gmux_present()
on Macs or the equivalent for Optimus/ATPX.

Thanks,

Lukas
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [Intel-gfx] [PATCH 0/5] Fix deadlock on runtime suspend in DRM drivers

2018-02-19 Thread Daniel Vetter
On Mon, Feb 19, 2018 at 12:58:17PM +0100, Lukas Wunner wrote:
> On Mon, Feb 19, 2018 at 12:34:43PM +0100, Daniel Vetter wrote:
> > On Sun, Feb 11, 2018 at 10:38:28AM +0100, Lukas Wunner wrote:
> > > Fix a deadlock on hybrid graphics laptops that's been present since 2013:
> > > 
> > > DRM drivers poll connectors in 10 sec intervals.  The poll worker is
> > > stopped on ->runtime_suspend with cancel_delayed_work_sync().  However
> > > the poll worker invokes the DRM drivers' ->detect callbacks, which call
> > > pm_runtime_get_sync().  If the poll worker starts after runtime suspend
> > > has begun, pm_runtime_get_sync() will wait for runtime suspend to finish
> > > with the intention of runtime resuming the device afterwards.  The result
> > > is a circular wait between poll worker and autosuspend worker.
> > 
> > Don't shut down the poll worker when runtime suspending, that' doesn't
> > work. If you need the poll work, then that means waking up the gpu every
> > few seconds. If you don't need it, then make sure the DRM_CONNECTOR_POLL
> > flags are set correctly (you can update them at runtime, the poll worker
> > will pick that up).
> > 
> > That should fix the deadlock, and it's how we do it in i915 (where igt in
> > CI totally hammers the runtime pm support, and it seems to hold up).
> 
> It would fix the deadlock but it's not an option on dual GPU laptops.
> Power consumption of the discrete GPU is massive (9 W on my machine).
> 
> 
> > > i915, malidp and msm "solved" this issue by not stopping the poll worker
> > > on runtime suspend.  But this results in the GPU bouncing back and forth
> > > between D0 and D3 continuously.  That's a total no-go for GPUs which
> > > runtime suspend to D3cold since every suspend/resume cycle costs a
> > > significant amount of time and energy.  (i915 and malidp do not seem
> > > to acquire a runtime PM ref in the ->detect callbacks, which seems
> > > questionable.  msm however does and would also deadlock if it disabled
> > > the poll worker on runtime suspend.  cc += Archit, Liviu, intel-gfx)
> > 
> > Well, userspace expects hotplug events, even when we runtime suspend
> > stuff. Hence waking shit up with polling. Imo ignoring hotplug events is a
> > pretty serious policy decision which is ok in the context of
> > vga_switcheroo, but not really as an automatic thing. E.g. usb also wakes
> > up if you plug something in, even with all the runtime pm stuff enabled.
> > Same for sata and everything else.
> 
> On the MacBook Pro, the HPD pin of external DP and HDMI ports goes into
> the gmux controller, which sends an interrupt on hotplug even if the GPU
> is powered down.
> 
> Apparently Optimus has a similar functionality, cf. 3a6536c51d5d.

Yeah, for vga_switcheroo and gmux setups shutting down polling explicitly
makes sense. I think ideally we'd stop polling in the gmux handler somehow
(maybe by dropping the relevant DRM_CONNECTOR_POLL flags, or outright
stopping it all). But not when runtime suspending the entire gpu (e.g.
idle system that shuts down the screen and everything, before it decides
a few minutes later to do a full system suspend).

I also think that this approach would lead to cleaner code, having
explicit checks for the (locking) execution context all over the place
tends to result in regrets eventually ime.

> For the rare cases where an external VGA or DVI-A port is present, I guess
> it's reasonable to have the user wake up the machine manually.
> 
> I'm not sure why nouveau polls ports on my laptop, the GK107 only has an
> LVDS and three DP ports, need to investigate.

Yeah, that'd be good to figure out. The probe helpers should shut down the
worker if there's no connector that needs probing. We use that to
enable/disable the poll worker when there's a hotplug storm on the irq
line.

Once that's fixed we can perhaps also untangle the poll-vs-gmux story.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [Intel-gfx] [PATCH 0/5] Fix deadlock on runtime suspend in DRM drivers

2018-02-19 Thread Lukas Wunner
On Mon, Feb 19, 2018 at 12:48:04PM +0100, Daniel Vetter wrote:
> On Thu, Feb 15, 2018 at 06:38:44AM +0100, Lukas Wunner wrote:
> > On Wed, Feb 14, 2018 at 09:58:43AM -0500, Sean Paul wrote:
> > > On Wed, Feb 14, 2018 at 03:43:56PM +0100, Michel Dänzer wrote:
> > > > On 2018-02-14 03:08 PM, Sean Paul wrote:
> > > > > On Wed, Feb 14, 2018 at 10:26:35AM +0100, Maarten Lankhorst wrote:
> > > > >> Op 14-02-18 om 09:46 schreef Lukas Wunner:
> > > > >>> On Sun, Feb 11, 2018 at 10:38:28AM +0100, Lukas Wunner wrote:
> > > >  Fix a deadlock on hybrid graphics laptops that's been present 
> > > >  since 2013:
> > > > >>> This series has been reviewed, consent has been expressed by the 
> > > > >>> most
> > > > >>> interested parties, patch [1/5] which touches files outside 
> > > > >>> drivers/gpu
> > > > >>> has been acked and I've just out a v2 addressing the only objection
> > > > >>> raised.  My plan is thus to wait another two days for comments and,
> > > > >>> barring further objections, push to drm-misc this weekend.
> > > > >>>
> > > > >>> However I'm struggling with the decision whether to push to next or
> > > > >>> fixes.  The series is marked for stable, however the number of
> > > > >>> affected machines is limited and for an issue that's been present
> > > > >>> for 5 years it probably doesn't matter if it soaks another two 
> > > > >>> months
> > > > >>> in linux-next befor it gets backported.  Hence I tend to err on the
> > > > >>> side of caution and push to next, however a case could be made that
> > > > >>> fixes is more appropriate.
> > > > >>>
> > > > >>> I'm lacking experience making such decisions and would be interested
> > > > >>> to learn how you'd handle this.
> > > > >>
> > > > >> I would say fixes, it doesn't look particularly scary. :)
> > > > > 
> > > > > Agreed. If it's good enough for stable, it's good enough for -fixes!
> > > > 
> > > > It's not that simple, is it? Fast-tracking patches (some of which appear
> > > > to be untested) to stable without an immediate cause for urgency seems
> > > > risky to me.
> > > 
> > > /me should be more careful what he says
> > > 
> > > Given where we are in the release cycle, it's barely a fast track.
> > > If these go in -fixes, they'll get in -rc2 and will have plenty of
> > > time to bake. If we were at rc5, it might be a different story.
> > 
> > The patches are marked for stable though, so if they go in through
> > drm-misc-fixes, they may appear in stable kernels before 4.16-final
> > is out.  Greg picks up patches once they're in Linus' tree, though
> > often with a delay of a few days or weeks.  If they go in through
> > drm-misc-next, they're guaranteed not to appear in *any* release
> > before 4.16-final is out.
> > 
> > This allows for differentiation between no-brainer stable fixes that
> > can be sent immediately and scarier, but similarly important stable
> > fixes that should soak for a while.  I'm not sure which category
> > this series belongs to, though it's true what Maarten says, it's
> > not *that* grave a change.
> 
> If you're this concerned about them, then pls do _not_ put cc: stable on
> the patches. Instead get them merged through -fixes (or maybe even -next),
> and once they're sufficiently tested, send a mail to stable@ asking for
> ane explicit backport.

I'm not concerned about them, but would have erred on the side of caution.
However consensus seems to have been that they're sufficiently unscary to
push to -fixes.  Do you disagree with that decision, if so, why?  Can we
amend the dim docs to codify guidelines whether to push to -fixes or -next?
I allowed 1 week for comments, now you're returning from vacation and seem
to be unhappy, was 1 week too short?

Thanks,

Lukas
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [PATCH 0/5] Fix deadlock on runtime suspend in DRM drivers

2018-02-19 Thread Lukas Wunner
On Mon, Feb 19, 2018 at 12:34:43PM +0100, Daniel Vetter wrote:
> On Sun, Feb 11, 2018 at 10:38:28AM +0100, Lukas Wunner wrote:
> > Fix a deadlock on hybrid graphics laptops that's been present since 2013:
> > 
> > DRM drivers poll connectors in 10 sec intervals.  The poll worker is
> > stopped on ->runtime_suspend with cancel_delayed_work_sync().  However
> > the poll worker invokes the DRM drivers' ->detect callbacks, which call
> > pm_runtime_get_sync().  If the poll worker starts after runtime suspend
> > has begun, pm_runtime_get_sync() will wait for runtime suspend to finish
> > with the intention of runtime resuming the device afterwards.  The result
> > is a circular wait between poll worker and autosuspend worker.
> 
> Don't shut down the poll worker when runtime suspending, that' doesn't
> work. If you need the poll work, then that means waking up the gpu every
> few seconds. If you don't need it, then make sure the DRM_CONNECTOR_POLL
> flags are set correctly (you can update them at runtime, the poll worker
> will pick that up).
> 
> That should fix the deadlock, and it's how we do it in i915 (where igt in
> CI totally hammers the runtime pm support, and it seems to hold up).

It would fix the deadlock but it's not an option on dual GPU laptops.
Power consumption of the discrete GPU is massive (9 W on my machine).


> > i915, malidp and msm "solved" this issue by not stopping the poll worker
> > on runtime suspend.  But this results in the GPU bouncing back and forth
> > between D0 and D3 continuously.  That's a total no-go for GPUs which
> > runtime suspend to D3cold since every suspend/resume cycle costs a
> > significant amount of time and energy.  (i915 and malidp do not seem
> > to acquire a runtime PM ref in the ->detect callbacks, which seems
> > questionable.  msm however does and would also deadlock if it disabled
> > the poll worker on runtime suspend.  cc += Archit, Liviu, intel-gfx)
> 
> Well, userspace expects hotplug events, even when we runtime suspend
> stuff. Hence waking shit up with polling. Imo ignoring hotplug events is a
> pretty serious policy decision which is ok in the context of
> vga_switcheroo, but not really as an automatic thing. E.g. usb also wakes
> up if you plug something in, even with all the runtime pm stuff enabled.
> Same for sata and everything else.

On the MacBook Pro, the HPD pin of external DP and HDMI ports goes into
the gmux controller, which sends an interrupt on hotplug even if the GPU
is powered down.

Apparently Optimus has a similar functionality, cf. 3a6536c51d5d.

For the rare cases where an external VGA or DVI-A port is present, I guess
it's reasonable to have the user wake up the machine manually.

I'm not sure why nouveau polls ports on my laptop, the GK107 only has an
LVDS and three DP ports, need to investigate.

Thanks,

Lukas
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [Intel-gfx] [PATCH 0/5] Fix deadlock on runtime suspend in DRM drivers

2018-02-19 Thread Daniel Vetter
On Thu, Feb 15, 2018 at 06:38:44AM +0100, Lukas Wunner wrote:
> On Wed, Feb 14, 2018 at 09:58:43AM -0500, Sean Paul wrote:
> > On Wed, Feb 14, 2018 at 03:43:56PM +0100, Michel Dänzer wrote:
> > > On 2018-02-14 03:08 PM, Sean Paul wrote:
> > > > On Wed, Feb 14, 2018 at 10:26:35AM +0100, Maarten Lankhorst wrote:
> > > >> Op 14-02-18 om 09:46 schreef Lukas Wunner:
> > > >>> On Sun, Feb 11, 2018 at 10:38:28AM +0100, Lukas Wunner wrote:
> > >  Fix a deadlock on hybrid graphics laptops that's been present since 
> > >  2013:
> > > >>> This series has been reviewed, consent has been expressed by the most
> > > >>> interested parties, patch [1/5] which touches files outside 
> > > >>> drivers/gpu
> > > >>> has been acked and I've just out a v2 addressing the only objection
> > > >>> raised.  My plan is thus to wait another two days for comments and,
> > > >>> barring further objections, push to drm-misc this weekend.
> > > >>>
> > > >>> However I'm struggling with the decision whether to push to next or
> > > >>> fixes.  The series is marked for stable, however the number of
> > > >>> affected machines is limited and for an issue that's been present
> > > >>> for 5 years it probably doesn't matter if it soaks another two months
> > > >>> in linux-next befor it gets backported.  Hence I tend to err on the
> > > >>> side of caution and push to next, however a case could be made that
> > > >>> fixes is more appropriate.
> > > >>>
> > > >>> I'm lacking experience making such decisions and would be interested
> > > >>> to learn how you'd handle this.
> > > >>
> > > >> I would say fixes, it doesn't look particularly scary. :)
> > > > 
> > > > Agreed. If it's good enough for stable, it's good enough for -fixes!
> > > 
> > > It's not that simple, is it? Fast-tracking patches (some of which appear
> > > to be untested) to stable without an immediate cause for urgency seems
> > > risky to me.
> > 
> > /me should be more careful what he says
> > 
> > Given where we are in the release cycle, it's barely a fast track.
> > If these go in -fixes, they'll get in -rc2 and will have plenty of
> > time to bake. If we were at rc5, it might be a different story.
> 
> The patches are marked for stable though, so if they go in through
> drm-misc-fixes, they may appear in stable kernels before 4.16-final
> is out.  Greg picks up patches once they're in Linus' tree, though
> often with a delay of a few days or weeks.  If they go in through
> drm-misc-next, they're guaranteed not to appear in *any* release
> before 4.16-final is out.
> 
> This allows for differentiation between no-brainer stable fixes that
> can be sent immediately and scarier, but similarly important stable
> fixes that should soak for a while.  I'm not sure which category
> this series belongs to, though it's true what Maarten says, it's
> not *that* grave a change.

If you're this concerned about them, then pls do _not_ put cc: stable on
the patches. Instead get them merged through -fixes (or maybe even -next),
and once they're sufficiently tested, send a mail to stable@ asking for
ane explicit backport.

Stuff that's marked for stable must be obviuos and tested enough for
backporting right away (which doesn't seem to be the case here).
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [PATCH 0/5] Fix deadlock on runtime suspend in DRM drivers

2018-02-19 Thread Daniel Vetter
On Sun, Feb 11, 2018 at 10:38:28AM +0100, Lukas Wunner wrote:
> Fix a deadlock on hybrid graphics laptops that's been present since 2013:
> 
> DRM drivers poll connectors in 10 sec intervals.  The poll worker is
> stopped on ->runtime_suspend with cancel_delayed_work_sync().  However
> the poll worker invokes the DRM drivers' ->detect callbacks, which call
> pm_runtime_get_sync().  If the poll worker starts after runtime suspend
> has begun, pm_runtime_get_sync() will wait for runtime suspend to finish
> with the intention of runtime resuming the device afterwards.  The result
> is a circular wait between poll worker and autosuspend worker.

Don't shut down the poll worker when runtime suspending, that' doesn't
work. If you need the poll work, then that means waking up the gpu every
few seconds. If you don't need it, then make sure the DRM_CONNECTOR_POLL
flags are set correctly (you can update them at runtime, the poll worker
will pick that up).

That should fix the deadlock, and it's how we do it in i915 (where igt in
CI totally hammers the runtime pm support, and it seems to hold up).

And I guess we need to improve the poll worker docs about this.

> I'm seeing this deadlock so often it's not funny anymore. I've also found
> 3 nouveau bugzillas about it and 1 radeon bugzilla. (See patch [3/5] and
> [4/5].)
> 
> One theoretical solution would be to add a flag to the ->detect callback
> to tell it that it's running in the poll worker's context.  In that case
> it doesn't need to call pm_runtime_get_sync() because the poll worker is
> only enabled while runtime active and we know that ->runtime_suspend
> waits for it to finish.  But this approach would require touching every
> single DRM driver's ->detect hook.  Moreover the ->detect hook is called
> from numerous other places, both in the DRM library as well as in each
> driver, making it necessary to trace every possible call chain and check
> if it's coming from the poll worker or not.  I've tried to do that for
> nouveau (see the call sites listed in the commit message of patch [3/5])
> and concluded that this approach is a nightmare to implement.
> 
> Another reason for the infeasibility of this approach is that ->detect
> is called from within the DRM library without driver involvement, e.g.
> via DRM's sysfs interface.  In those cases, pm_runtime_get_sync() would
> have to be called by the DRM library, but the DRM library is supposed to
> stay generic, wheras runtime PM is driver-specific.
> 
> So instead, I've come up with this much simpler solution which gleans
> from the current task's flags if it's a workqueue worker, retrieves the
> work_struct and checks if it's the poll worker.  All that's needed is
> one small helper in the workqueue code and another small helper in the
> DRM library.  This solution lends itself nicely to stable-backporting.
> 
> The patches for radeon and amdgpu are compile-tested only, I only have a
> MacBook Pro with an Nvidia GK107 to test.  To test the patches, add an
> "msleep(12*1000);" at the top of the driver's ->runtime_suspend hook.
> This ensures that the poll worker runs after ->runtime_suspend has begun.
> Wait 12 sec after the GPU has begun runtime suspend, then check
> /sys/bus/pci/devices/:01:00.0/power/runtime_status.  Without this
> series, the status will be stuck at "suspending" and you'll get hung task
> errors in dmesg after a few minutes.
> 
> i915, malidp and msm "solved" this issue by not stopping the poll worker
> on runtime suspend.  But this results in the GPU bouncing back and forth
> between D0 and D3 continuously.  That's a total no-go for GPUs which
> runtime suspend to D3cold since every suspend/resume cycle costs a
> significant amount of time and energy.  (i915 and malidp do not seem
> to acquire a runtime PM ref in the ->detect callbacks, which seems
> questionable.  msm however does and would also deadlock if it disabled
> the poll worker on runtime suspend.  cc += Archit, Liviu, intel-gfx)

Well, userspace expects hotplug events, even when we runtime suspend
stuff. Hence waking shit up with polling. Imo ignoring hotplug events is a
pretty serious policy decision which is ok in the context of
vga_switcheroo, but not really as an automatic thing. E.g. usb also wakes
up if you plug something in, even with all the runtime pm stuff enabled.
Same for sata and everything else.
-Daniel

> Please review.  Thanks,
> 
> Lukas
> 
> Lukas Wunner (5):
>   workqueue: Allow retrieval of current task's work struct
>   drm: Allow determining if current task is output poll worker
>   drm/nouveau: Fix deadlock on runtime suspend
>   drm/radeon: Fix deadlock on runtime suspend
>   drm/amdgpu: Fix deadlock on runtime suspend
> 
>  drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c | 58 +---
>  drivers/gpu/drm/drm_probe_helper.c | 14 +
>  drivers/gpu/drm/nouveau/nouveau_connector.c| 18 +--
>  drivers/gpu/drm/radeon/radeon_connectors.c | 74 
> 

Re: [Nouveau] [PATCH 1/7] PCI: Restore BARs on runtime resume despite being unbound

2018-02-19 Thread Rafael J. Wysocki
On Sun, Feb 18, 2018 at 9:38 AM, Lukas Wunner  wrote:
> PCI devices not bound to a driver are supposed to stay in D0 during
> runtime suspend.  But they may have a parent which is bound and can be
> transitioned to D3cold at runtime.  Once the parent goes to D3cold, the
> unbound child may go to D3cold as well.  When the child comes out of
> D3cold, its BARs are uninitialized and thus inaccessible when a driver
> tries to probe.
>
> One example are recent hybrid graphics laptops which cut power to the
> discrete GPU when the root port above it goes to ACPI power state D3.
> Users may provoke this by unbinding the GPU driver and allowing runtime
> PM on the GPU via sysfs:  The PM core will then treat the GPU as
> "suspended", which in turn allows the root port to runtime suspend,
> causing the power resources listed in its _PR3 object to be powered off.
> The GPU's BARs will be uninitialized when a driver later probes it.
>
> Another example are hybrid graphics laptops where the GPU itself (rather
> than the root port) is capable of runtime suspending to D3cold.  If the
> GPU's integrated HDA controller is not bound and the GPU's driver
> decides to runtime suspend to D3cold, the HDA controller's BARs will be
> uninitialized when a driver later probes it.
>
> Fix by restoring the BARs on runtime resume if the device is not bound.
> This is sufficient to fix the above-mentioned use cases.  Other use
> cases might require a full-blown pci_save_state() / pci_restore_state()
> or execution of fixups.  We can add that once use cases materialize,
> let's not inflate the code unnecessarily.
>
> Cc: Bjorn Helgaas 
> Cc: Rafael J. Wysocki 
> Signed-off-by: Lukas Wunner 

Reviewed-by: Rafael J. Wysocki 

> ---
>  drivers/pci/pci-driver.c | 8 ++--
>  drivers/pci/pci.c| 2 +-
>  drivers/pci/pci.h| 1 +
>  3 files changed, 8 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
> index 3bed6beda051..51b11cbd48f6 100644
> --- a/drivers/pci/pci-driver.c
> +++ b/drivers/pci/pci-driver.c
> @@ -1277,10 +1277,14 @@ static int pci_pm_runtime_resume(struct device *dev)
>
> /*
>  * If pci_dev->driver is not set (unbound), the device should
> -* always remain in D0 regardless of the runtime PM status
> +* always remain in D0 regardless of the runtime PM status.
> +* But if its parent can go to D3cold, this device may have
> +* been in D3cold as well and require restoration of its BARs.
>  */
> -   if (!pci_dev->driver)
> +   if (!pci_dev->driver) {
> +   pci_restore_bars(pci_dev);
> return 0;
> +   }
>
> if (!pm || !pm->runtime_resume)
> return -ENOSYS;
> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> index f6a4dd10d9b0..f694650235f2 100644
> --- a/drivers/pci/pci.c
> +++ b/drivers/pci/pci.c
> @@ -563,7 +563,7 @@ int pci_wait_for_pending(struct pci_dev *dev, int pos, 
> u16 mask)
>   * Restore the BAR values for a given device, so as to make it
>   * accessible by its driver.
>   */
> -static void pci_restore_bars(struct pci_dev *dev)
> +void pci_restore_bars(struct pci_dev *dev)
>  {
> int i;
>
> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> index fcd81911b127..29dc15bbe3bf 100644
> --- a/drivers/pci/pci.h
> +++ b/drivers/pci/pci.h
> @@ -83,6 +83,7 @@ void pci_allocate_cap_save_buffers(struct pci_dev *dev);
>  void pci_free_cap_save_buffers(struct pci_dev *dev);
>  bool pci_bridge_d3_possible(struct pci_dev *dev);
>  void pci_bridge_d3_update(struct pci_dev *dev);
> +void pci_restore_bars(struct pci_dev *dev);
>
>  static inline void pci_wakeup_event(struct pci_dev *dev)
>  {
> --
> 2.15.1
>
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [PATCH] drm/nouveau/bl: Fix oops on driver unbind

2018-02-19 Thread Pierre Moreau
My bad; I did test suspend/resume, but not unbinding. Shouldn’t that
happen on shutdown as well, as the driver gets unloaded? I don’t remember
seeing that issue there though.

On 2018-02-17 — 13:40, Lukas Wunner wrote:
> Unbinding nouveau on a dual GPU MacBook Pro oopses because we iterate
> over the bl_connectors list in nouveau_backlight_exit() but skipped
> initializing it in nouveau_backlight_init().  Stacktrace for posterity:
> 
> BUG: unable to handle kernel NULL pointer dereference at 0010
> IP: nouveau_backlight_exit+0x2b/0x70 [nouveau]
> nouveau_display_destroy+0x29/0x80 [nouveau]
> nouveau_drm_unload+0x65/0xe0 [nouveau]
> drm_dev_unregister+0x3c/0xe0 [drm]
> drm_put_dev+0x2e/0x60 [drm]
> nouveau_drm_device_remove+0x47/0x70 [nouveau]
> pci_device_remove+0x36/0xb0
> device_release_driver_internal+0x157/0x220
> driver_detach+0x39/0x70
> bus_remove_driver+0x51/0xd0
> pci_unregister_driver+0x2a/0xa0
> nouveau_drm_exit+0x15/0xfb0 [nouveau]
> SyS_delete_module+0x18c/0x290
> system_call_fast_compare_end+0xc/0x6f
> 
> Fixes: b53ac1ee12a3 ("drm/nouveau/bl: Do not register interface if Apple GMUX 
> detected")
> Cc: sta...@vger.kernel.org # v4.10+
> Cc: Pierre Moreau 
> Signed-off-by: Lukas Wunner 
> ---
> I reviewed the patch causing the oops but unfortunately missed this, sorry!
> 
>  drivers/gpu/drm/nouveau/nouveau_backlight.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c 
> b/drivers/gpu/drm/nouveau/nouveau_backlight.c
> index 380f340204e8..f56f60f695e1 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_backlight.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c
> @@ -268,13 +268,13 @@ nouveau_backlight_init(struct drm_device *dev)
>   struct nvif_device *device = >client.device;
>   struct drm_connector *connector;
>  
> + INIT_LIST_HEAD(>bl_connectors);
> +

We could instead have an early return in the exit function if
`apple_gmux_present()` or `drm->bl_connectors` is null, but your current fix
seems better.

Reviewed-by: Pierre Moreau 

Thank you for the fix!
Pierre

>   if (apple_gmux_present()) {
>   NV_INFO(drm, "Apple GMUX detected: not registering Nouveau 
> backlight interface\n");
>   return 0;
>   }
>  
> - INIT_LIST_HEAD(>bl_connectors);
> -
>   list_for_each_entry(connector, >mode_config.connector_list, head) {
>   if (connector->connector_type != DRM_MODE_CONNECTOR_LVDS &&
>   connector->connector_type != DRM_MODE_CONNECTOR_eDP)
> -- 
> 2.15.1
> 


signature.asc
Description: PGP signature
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/nouveau