Re: [Xen-devel] [PATCH v4 1/1] xen/time: do not decrease steal time after live migration on xen

2017-11-01 Thread kbuild test robot
Hi Dongli,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on xen-tip/linux-next]
[also build test ERROR on v4.14-rc7 next-20171018]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Dongli-Zhang/xen-time-do-not-decrease-steal-time-after-live-migration-on-xen/20171102-025719
base:   https://git.kernel.org/pub/scm/linux/kernel/git/xen/tip.git linux-next
config: arm64-allmodconfig (attached as .config)
compiler: aarch64-linux-gnu-gcc (Debian 6.1.1-9) 6.1.1 20160705
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=arm64 

All errors (new ones prefixed by >>):

   drivers/xen/time.c: In function 'xen_accumulate_runstate_time':
   drivers/xen/time.c:92:25: error: implicit declaration of function 'kcalloc' 
[-Werror=implicit-function-declaration]
  runstate_time_delta = kcalloc(num_possible_cpus(),
^~~
   drivers/xen/time.c:92:23: warning: assignment makes pointer from integer 
without a cast [-Wint-conversion]
  runstate_time_delta = kcalloc(num_possible_cpus(),
  ^
>> drivers/xen/time.c:102:31: error: implicit declaration of function 
>> 'kmalloc_array' [-Werror=implicit-function-declaration]
   runstate_time_delta[cpu] = kmalloc_array(RUNSTATE_max,
  ^
   drivers/xen/time.c:102:29: warning: assignment makes pointer from integer 
without a cast [-Wint-conversion]
   runstate_time_delta[cpu] = kmalloc_array(RUNSTATE_max,
^
   drivers/xen/time.c:141:5: error: implicit declaration of function 'kfree' 
[-Werror=implicit-function-declaration]
kfree(runstate_time_delta[cpu]);
^
   cc1: some warnings being treated as errors

vim +/kmalloc_array +102 drivers/xen/time.c

82  
83  void xen_accumulate_runstate_time(int action)
84  {
85  struct vcpu_runstate_info state;
86  int cpu, i;
87  
88  switch (action) {
89  case -1: /* backup runstate time before suspend */
90  WARN_ON_ONCE(unlikely(runstate_time_delta));
91  
  > 92  runstate_time_delta = kcalloc(num_possible_cpus(),
93
sizeof(*runstate_time_delta),
94GFP_KERNEL);
95  if (unlikely(!runstate_time_delta)) {
96  pr_alert("%s: failed to allocate 
runstate_time_delta\n",
97  __func__);
98  return;
99  }
   100  
   101  for_each_possible_cpu(cpu) {
 > 102  runstate_time_delta[cpu] = 
 > kmalloc_array(RUNSTATE_max,
   103
sizeof(**runstate_time_delta),
   104GFP_KERNEL);
   105  if (unlikely(!runstate_time_delta[cpu])) {
   106  pr_alert("%s: failed to allocate 
runstate_time_delta[%d]\n",
   107  __func__, cpu);
   108  action = 0;
   109  goto reclaim_mem;
   110  }
   111  
   112  xen_get_runstate_snapshot_cpu_delta(, 
cpu);
   113  memcpy(runstate_time_delta[cpu],
   114  state.time,
   115  RUNSTATE_max * 
sizeof(**runstate_time_delta));
   116  }
   117  break;
   118  
   119  case 0: /* backup runstate time after resume */
   120  if (unlikely(!runstate_time_delta)) {
   121  pr_alert("%s: cannot accumulate runstate time 
as runstate_time_delta is NULL\n",
   122  __func__);
   123  return;
   124  }
   125  
   126  for_each_possible_cpu(cpu) {
   127  for (i = 0; i < RUNSTATE_max; i++)
   128  per_cpu(old_runstate_time, cpu)[i] +=
   129  
runstate_time_delta[cpu][i];
   130  }
   131  break;
   132  
   133  default: /* do not accumulate runstate time for checkpointing */
   134  break;
   135  }
   136  
   137  reclaim_mem:
   138  if (action != -1 && runstate_time_delta) {
   139  for_each_possible_cpu(cpu) {
   140 

Re: [Xen-devel] [PATCH v4 1/1] xen/time: do not decrease steal time after live migration on xen

2017-10-30 Thread Juergen Gross
On 30/10/17 06:26, Dongli Zhang wrote:
> After guest live migration on xen, steal time in /proc/stat
> (cpustat[CPUTIME_STEAL]) might decrease because steal returned by
> xen_steal_lock() might be less than this_rq()->prev_steal_time which is
> derived from previous return value of xen_steal_clock().
> 
> For instance, steal time of each vcpu is 335 before live migration.
> 
> cpu  198 0 368 200064 1962 0 0 1340 0 0
> cpu0 38 0 81 50063 492 0 0 335 0 0
> cpu1 65 0 97 49763 634 0 0 335 0 0
> cpu2 38 0 81 50098 462 0 0 335 0 0
> cpu3 56 0 107 50138 374 0 0 335 0 0
> 
> After live migration, steal time is reduced to 312.
> 
> cpu  200 0 370 200330 1971 0 0 1248 0 0
> cpu0 38 0 82 50123 500 0 0 312 0 0
> cpu1 65 0 97 49832 634 0 0 312 0 0
> cpu2 39 0 82 50167 462 0 0 312 0 0
> cpu3 56 0 107 50207 374 0 0 312 0 0
> 
> Since runstate times are cumulative and cleared during xen live migration
> by xen hypervisor, the idea of this patch is to accumulate runstate times
> to global percpu variables before live migration suspend. Once guest VM is
> resumed, xen_get_runstate_snapshot_cpu() would always return the sum of new
> runstate times and previously accumulated times stored in global percpu
> variables.
> 
> Similar and more severe issue would impact prior linux 4.8-4.10 as
> discussed by Michael Las at
> https://0xstubs.org/debugging-a-flaky-cpu-steal-time-counter-on-a-paravirtualized-xen-guest,
> which would overflow steal time and lead to 100% st usage in top command
> for linux 4.8-4.10. A backport of this patch would fix that issue.
> 
> References: 
> https://0xstubs.org/debugging-a-flaky-cpu-steal-time-counter-on-a-paravirtualized-xen-guest
> Signed-off-by: Dongli Zhang 
> 
> ---
> Changed since v1:
>   * relocate modification to xen_get_runstate_snapshot_cpu
> 
> Changed since v2:
>   * accumulate runstate times before live migration
> 
> Changed since v3:
>   * do not accumulate times in the case of guest checkpointing
> 
> ---
>  drivers/xen/manage.c |  2 ++
>  drivers/xen/time.c   | 83 
> ++--
>  include/xen/interface/vcpu.h |  2 ++
>  include/xen/xen-ops.h|  1 +
>  4 files changed, 86 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
> index c425d03..3dc085d 100644
> --- a/drivers/xen/manage.c
> +++ b/drivers/xen/manage.c
> @@ -72,6 +72,7 @@ static int xen_suspend(void *data)
>   }
>  
>   gnttab_suspend();
> + xen_accumulate_runstate_time(-1);
>   xen_arch_pre_suspend();
>  
>   /*
> @@ -84,6 +85,7 @@ static int xen_suspend(void *data)
> : 0);
>  
>   xen_arch_post_suspend(si->cancelled);
> + xen_accumulate_runstate_time(si->cancelled);
>   gnttab_resume();
>  
>   if (!si->cancelled) {
> diff --git a/drivers/xen/time.c b/drivers/xen/time.c
> index ac5f23f..18e2b76 100644
> --- a/drivers/xen/time.c
> +++ b/drivers/xen/time.c
> @@ -19,6 +19,9 @@
>  /* runstate info updated by Xen */
>  static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
>  
> +static DEFINE_PER_CPU(u64[RUNSTATE_max], old_runstate_time);
> +static u64 **runstate_time_delta;
> +
>  /* return an consistent snapshot of 64-bit time/counter value */
>  static u64 get64(const u64 *p)
>  {
> @@ -47,8 +50,8 @@ static u64 get64(const u64 *p)
>   return ret;
>  }
>  
> -static void xen_get_runstate_snapshot_cpu(struct vcpu_runstate_info *res,
> -   unsigned int cpu)
> +static void xen_get_runstate_snapshot_cpu_delta(
> + struct vcpu_runstate_info *res, unsigned int cpu)
>  {
>   u64 state_time;
>   struct vcpu_runstate_info *state;
> @@ -66,6 +69,82 @@ static void xen_get_runstate_snapshot_cpu(struct 
> vcpu_runstate_info *res,
>(state_time & XEN_RUNSTATE_UPDATE));
>  }
>  
> +static void xen_get_runstate_snapshot_cpu(struct vcpu_runstate_info *res,
> +   unsigned int cpu)
> +{
> + int i;
> +
> + xen_get_runstate_snapshot_cpu_delta(res, cpu);
> +
> + for (i = 0; i < RUNSTATE_max; i++)
> + res->time[i] += per_cpu(old_runstate_time, cpu)[i];
> +}
> +
> +void xen_accumulate_runstate_time(int action)
> +{
> + struct vcpu_runstate_info state;
> + int cpu, i;
> +
> + switch (action) {
> + case -1: /* backup runstate time before suspend */
> + WARN_ON_ONCE(unlikely(runstate_time_delta));
> +
> + runstate_time_delta = kcalloc(num_possible_cpus(),
> +   sizeof(*runstate_time_delta),
> +   GFP_KERNEL);

You know the number of cpus, so you can just allocate an array of
struct vcpu_runstate_info:

struct vcpu_runstate_info *runstate_time_delta;

kcalloc(num_possible_cpus(), sizeof(*runstate_time_delta), GFP_KERNEL);

then ...

> + if 

[Xen-devel] [PATCH v4 1/1] xen/time: do not decrease steal time after live migration on xen

2017-10-29 Thread Dongli Zhang
After guest live migration on xen, steal time in /proc/stat
(cpustat[CPUTIME_STEAL]) might decrease because steal returned by
xen_steal_lock() might be less than this_rq()->prev_steal_time which is
derived from previous return value of xen_steal_clock().

For instance, steal time of each vcpu is 335 before live migration.

cpu  198 0 368 200064 1962 0 0 1340 0 0
cpu0 38 0 81 50063 492 0 0 335 0 0
cpu1 65 0 97 49763 634 0 0 335 0 0
cpu2 38 0 81 50098 462 0 0 335 0 0
cpu3 56 0 107 50138 374 0 0 335 0 0

After live migration, steal time is reduced to 312.

cpu  200 0 370 200330 1971 0 0 1248 0 0
cpu0 38 0 82 50123 500 0 0 312 0 0
cpu1 65 0 97 49832 634 0 0 312 0 0
cpu2 39 0 82 50167 462 0 0 312 0 0
cpu3 56 0 107 50207 374 0 0 312 0 0

Since runstate times are cumulative and cleared during xen live migration
by xen hypervisor, the idea of this patch is to accumulate runstate times
to global percpu variables before live migration suspend. Once guest VM is
resumed, xen_get_runstate_snapshot_cpu() would always return the sum of new
runstate times and previously accumulated times stored in global percpu
variables.

Similar and more severe issue would impact prior linux 4.8-4.10 as
discussed by Michael Las at
https://0xstubs.org/debugging-a-flaky-cpu-steal-time-counter-on-a-paravirtualized-xen-guest,
which would overflow steal time and lead to 100% st usage in top command
for linux 4.8-4.10. A backport of this patch would fix that issue.

References: 
https://0xstubs.org/debugging-a-flaky-cpu-steal-time-counter-on-a-paravirtualized-xen-guest
Signed-off-by: Dongli Zhang 

---
Changed since v1:
  * relocate modification to xen_get_runstate_snapshot_cpu

Changed since v2:
  * accumulate runstate times before live migration

Changed since v3:
  * do not accumulate times in the case of guest checkpointing

---
 drivers/xen/manage.c |  2 ++
 drivers/xen/time.c   | 83 ++--
 include/xen/interface/vcpu.h |  2 ++
 include/xen/xen-ops.h|  1 +
 4 files changed, 86 insertions(+), 2 deletions(-)

diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index c425d03..3dc085d 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -72,6 +72,7 @@ static int xen_suspend(void *data)
}
 
gnttab_suspend();
+   xen_accumulate_runstate_time(-1);
xen_arch_pre_suspend();
 
/*
@@ -84,6 +85,7 @@ static int xen_suspend(void *data)
: 0);
 
xen_arch_post_suspend(si->cancelled);
+   xen_accumulate_runstate_time(si->cancelled);
gnttab_resume();
 
if (!si->cancelled) {
diff --git a/drivers/xen/time.c b/drivers/xen/time.c
index ac5f23f..18e2b76 100644
--- a/drivers/xen/time.c
+++ b/drivers/xen/time.c
@@ -19,6 +19,9 @@
 /* runstate info updated by Xen */
 static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
 
+static DEFINE_PER_CPU(u64[RUNSTATE_max], old_runstate_time);
+static u64 **runstate_time_delta;
+
 /* return an consistent snapshot of 64-bit time/counter value */
 static u64 get64(const u64 *p)
 {
@@ -47,8 +50,8 @@ static u64 get64(const u64 *p)
return ret;
 }
 
-static void xen_get_runstate_snapshot_cpu(struct vcpu_runstate_info *res,
- unsigned int cpu)
+static void xen_get_runstate_snapshot_cpu_delta(
+   struct vcpu_runstate_info *res, unsigned int cpu)
 {
u64 state_time;
struct vcpu_runstate_info *state;
@@ -66,6 +69,82 @@ static void xen_get_runstate_snapshot_cpu(struct 
vcpu_runstate_info *res,
 (state_time & XEN_RUNSTATE_UPDATE));
 }
 
+static void xen_get_runstate_snapshot_cpu(struct vcpu_runstate_info *res,
+ unsigned int cpu)
+{
+   int i;
+
+   xen_get_runstate_snapshot_cpu_delta(res, cpu);
+
+   for (i = 0; i < RUNSTATE_max; i++)
+   res->time[i] += per_cpu(old_runstate_time, cpu)[i];
+}
+
+void xen_accumulate_runstate_time(int action)
+{
+   struct vcpu_runstate_info state;
+   int cpu, i;
+
+   switch (action) {
+   case -1: /* backup runstate time before suspend */
+   WARN_ON_ONCE(unlikely(runstate_time_delta));
+
+   runstate_time_delta = kcalloc(num_possible_cpus(),
+ sizeof(*runstate_time_delta),
+ GFP_KERNEL);
+   if (unlikely(!runstate_time_delta)) {
+   pr_alert("%s: failed to allocate runstate_time_delta\n",
+   __func__);
+   return;
+   }
+
+   for_each_possible_cpu(cpu) {
+   runstate_time_delta[cpu] = kmalloc_array(RUNSTATE_max,
+ sizeof(**runstate_time_delta),
+ GFP_KERNEL);
+