On 2015/5/21 9:16, Rafael J. Wysocki wrote:
> On Wednesday, May 20, 2015 04:50:13 PM Fu, Zhonghui wrote:
>> On 2015/5/16 8:45, Rafael J. Wysocki wrote:
>>> On Wednesday, May 06, 2015 10:47:24 PM Fu, Zhonghui wrote:
>>>> Some system-hang occur only when multiple device PM methods
>>>> are running asynchronously. So should cancel the synchronization
>>>> of pm-trace, and make it suitable for asynchronous PM environment.
>>>>
>>>> Signed-off-by: Zhonghui Fu <[email protected]>
>>>> ---
>>>>  drivers/base/power/main.c |   53 
>>>> +++++++++++++++++++++-----------------------
>>>>  include/linux/pm-trace.h  |   24 ++++++++++++--------
>>>>  kernel/power/main.c       |    2 +
>>>>  3 files changed, 41 insertions(+), 38 deletions(-)
>>>>
>>>> diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
>>>> index 3d874ec..40daf48 100644
>>>> --- a/drivers/base/power/main.c
>>>> +++ b/drivers/base/power/main.c
>>>> @@ -476,9 +476,6 @@ static int device_resume_noirq(struct device *dev, 
>>>> pm_message_t state, bool asyn
>>>>    char *info = NULL;
>>>>    int error = 0;
>>>>  
>>>> -  TRACE_DEVICE(dev);
>>>> -  TRACE_RESUME(0);
>>>> -
>>>>    if (dev->power.syscore || dev->power.direct_complete)
>>>>            goto Out;
>>>>  
>>>> @@ -506,19 +503,21 @@ static int device_resume_noirq(struct device *dev, 
>>>> pm_message_t state, bool asyn
>>>>            callback = pm_noirq_op(dev->driver->pm, state);
>>>>    }
>>>>  
>>>> +  TRACE_DEVICE_START(dev);
>>>> +  TRACE_RESUME(0);
>>>> +
>>>>    error = dpm_run_callback(callback, dev, state, info);
>>>>    dev->power.is_noirq_suspended = false;
>>>>  
>>>> +  TRACE_DEVICE_END();
>>>>   Out:
>>>>    complete_all(&dev->power.completion);
>>>> -  TRACE_RESUME(error);
>>>>    return error;
>>>>  }
>>>>  
>>>>  static bool is_async(struct device *dev)
>>>>  {
>>>> -  return dev->power.async_suspend && pm_async_enabled
>>>> -          && !pm_trace_is_enabled();
>>>> +  return dev->power.async_suspend && pm_async_enabled;
>>>>  }
>>>>  
>>>>  static void async_resume_noirq(void *data, async_cookie_t cookie)
>>>> @@ -605,9 +604,6 @@ static int device_resume_early(struct device *dev, 
>>>> pm_message_t state, bool asyn
>>>>    char *info = NULL;
>>>>    int error = 0;
>>>>  
>>>> -  TRACE_DEVICE(dev);
>>>> -  TRACE_RESUME(0);
>>>> -
>>>>    if (dev->power.syscore || dev->power.direct_complete)
>>>>            goto Out;
>>>>  
>>>> @@ -635,12 +631,14 @@ static int device_resume_early(struct device *dev, 
>>>> pm_message_t state, bool asyn
>>>>            callback = pm_late_early_op(dev->driver->pm, state);
>>>>    }
>>>>  
>>>> +  TRACE_DEVICE_START(dev);
>>>> +  TRACE_RESUME(0);
>>>> +
>>>>    error = dpm_run_callback(callback, dev, state, info);
>>>>    dev->power.is_late_suspended = false;
>>>>  
>>>> +  TRACE_DEVICE_END();
>>>>   Out:
>>>> -  TRACE_RESUME(error);
>>>> -
>>>>    pm_runtime_enable(dev);
>>>>    complete_all(&dev->power.completion);
>>>>    return error;
>>>> @@ -734,9 +732,6 @@ static int device_resume(struct device *dev, 
>>>> pm_message_t state, bool async)
>>>>    int error = 0;
>>>>    DECLARE_DPM_WATCHDOG_ON_STACK(wd);
>>>>  
>>>> -  TRACE_DEVICE(dev);
>>>> -  TRACE_RESUME(0);
>>>> -
>>>>    if (dev->power.syscore)
>>>>            goto Complete;
>>>>  
>>>> @@ -801,9 +796,13 @@ static int device_resume(struct device *dev, 
>>>> pm_message_t state, bool async)
>>>>    }
>>>>  
>>>>   End:
>>>> +  TRACE_DEVICE_START(dev);
>>>> +  TRACE_RESUME(0);
>>>> +
>>>>    error = dpm_run_callback(callback, dev, state, info);
>>>>    dev->power.is_suspended = false;
>>>>  
>>>> +  TRACE_DEVICE_END();
>>>>   Unlock:
>>>>    device_unlock(dev);
>>>>    dpm_watchdog_clear(&wd);
>>>> @@ -811,8 +810,6 @@ static int device_resume(struct device *dev, 
>>>> pm_message_t state, bool async)
>>>>   Complete:
>>>>    complete_all(&dev->power.completion);
>>>>  
>>>> -  TRACE_RESUME(error);
>>>> -
>>>>    return error;
>>>>  }
>>>>  
>>>> @@ -1017,9 +1014,6 @@ static int __device_suspend_noirq(struct device 
>>>> *dev, pm_message_t state, bool a
>>>>    char *info = NULL;
>>>>    int error = 0;
>>>>  
>>>> -  TRACE_DEVICE(dev);
>>>> -  TRACE_SUSPEND(0);
>>>> -
>>>>    if (async_error)
>>>>            goto Complete;
>>>>  
>>>> @@ -1052,15 +1046,18 @@ static int __device_suspend_noirq(struct device 
>>>> *dev, pm_message_t state, bool a
>>>>            callback = pm_noirq_op(dev->driver->pm, state);
>>>>    }
>>>>  
>>>> +  TRACE_DEVICE_START(dev);
>>>> +  TRACE_SUSPEND(0);
>>>> +
>>>>    error = dpm_run_callback(callback, dev, state, info);
>>>>    if (!error)
>>>>            dev->power.is_noirq_suspended = true;
>>>>    else
>>>>            async_error = error;
>>>>  
>>>> +  TRACE_DEVICE_END();
>>>>  Complete:
>>>>    complete_all(&dev->power.completion);
>>>> -  TRACE_SUSPEND(error);
>>>>    return error;
>>>>  }
>>>>  
>>>> @@ -1161,9 +1158,6 @@ static int __device_suspend_late(struct device *dev, 
>>>> pm_message_t state, bool as
>>>>    char *info = NULL;
>>>>    int error = 0;
>>>>  
>>>> -  TRACE_DEVICE(dev);
>>>> -  TRACE_SUSPEND(0);
>>>> -
>>>>    __pm_runtime_disable(dev, false);
>>>>  
>>>>    if (async_error)
>>>> @@ -1198,14 +1192,17 @@ static int __device_suspend_late(struct device 
>>>> *dev, pm_message_t state, bool as
>>>>            callback = pm_late_early_op(dev->driver->pm, state);
>>>>    }
>>>>  
>>>> +  TRACE_DEVICE_START(dev);
>>>> +  TRACE_SUSPEND(0);
>>>> +
>>>>    error = dpm_run_callback(callback, dev, state, info);
>>>>    if (!error)
>>>>            dev->power.is_late_suspended = true;
>>>>    else
>>>>            async_error = error;
>>>>  
>>>> +  TRACE_DEVICE_END();
>>>>  Complete:
>>>> -  TRACE_SUSPEND(error);
>>>>    complete_all(&dev->power.completion);
>>>>    return error;
>>>>  }
>>>> @@ -1346,9 +1343,6 @@ static int __device_suspend(struct device *dev, 
>>>> pm_message_t state, bool async)
>>>>    int error = 0;
>>>>    DECLARE_DPM_WATCHDOG_ON_STACK(wd);
>>>>  
>>>> -  TRACE_DEVICE(dev);
>>>> -  TRACE_SUSPEND(0);
>>>> -
>>>>    dpm_wait_for_children(dev, async);
>>>>  
>>>>    if (async_error)
>>>> @@ -1428,8 +1422,12 @@ static int __device_suspend(struct device *dev, 
>>>> pm_message_t state, bool async)
>>>>            callback = pm_op(dev->driver->pm, state);
>>>>    }
>>>>  
>>>> +  TRACE_DEVICE_START(dev);
>>>> +  TRACE_SUSPEND(0);
>>>> +
>>>>    error = dpm_run_callback(callback, dev, state, info);
>>>>  
>>>> +  TRACE_DEVICE_END();
>>>>   End:
>>>>    if (!error) {
>>>>            struct device *parent = dev->parent;
>>>> @@ -1455,7 +1453,6 @@ static int __device_suspend(struct device *dev, 
>>>> pm_message_t state, bool async)
>>>>    if (error)
>>>>            async_error = error;
>>>>  
>>>> -  TRACE_SUSPEND(error);
>>>>    return error;
>>>>  }
>>>>  
>>>> diff --git a/include/linux/pm-trace.h b/include/linux/pm-trace.h
>>>> index ecbde7a..aa480b1 100644
>>>> --- a/include/linux/pm-trace.h
>>>> +++ b/include/linux/pm-trace.h
>>>> @@ -6,29 +6,33 @@
>>>>  #include <linux/types.h>
>>>>  
>>>>  extern int pm_trace_enabled;
>>>> -
>>>> -static inline int pm_trace_is_enabled(void)
>>>> -{
>>>> -       return pm_trace_enabled;
>>>> -}
>>>> +extern struct mutex pt_mutex;
>>>>  
>>>>  struct device;
>>>>  extern void set_trace_device(struct device *);
>>>>  extern void generate_pm_trace(const void *tracedata, unsigned int user);
>>>>  extern int show_trace_dev_match(char *buf, size_t size);
>>>>  
>>>> -#define TRACE_DEVICE(dev) do { \
>>>> +#define TRACE_DEVICE_START(dev) do { \
>>>>    if (pm_trace_enabled) \
>>>> +          mutex_lock(&pt_mutex); \
>>>>            set_trace_device(dev); \
>>>>    } while(0)
>>>>  
>>>> +#define TRACE_DEVICE_END() \
>>>> +do { \
>>>> +  if (pm_trace_enabled) { \
>>>> +          mutex_unlock(&pt_mutex); \
>>>> +  } \
>>>> +} while (0)
>>>> +
>>> Won't this serialize the whole thing again?
>> Yes, this mutex lock will ultimately serialize all PM operations. But, all 
>> device's PM operations are asynchronous each other at first. So, the PM 
>> operation order of all devices will vary in multiple suspend/resume. This 
>> can be similar to real to an extreme, and helpful to debugging.
> I see.  You're saying that callbacks will be serialized, but if they 
> originally
> would be asynchronous with respect to each other (they may run in parallel 
> IOW),
> their respective ordering may vary between suspend-resume cycles.
>
> The class of bugs you can catch this way is quite limited and the change is
> rather intrusive, so I'm with Pavel on that.
Sorry for late reply.

Although the improvement to PM-trace is limited, I still think it is helpful 
for debugging some special suspend/resume bugs.


Thanks,
Zhonghui
>
>

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to