[tip:x86/urgent] x86/paravirt: Prevent rtc_cmos platform device init on PV guests

2015-12-19 Thread tip-bot for David Vrabel
Commit-ID:  d8c98a1d1488747625ad6044d423406e17e99b7a
Gitweb: http://git.kernel.org/tip/d8c98a1d1488747625ad6044d423406e17e99b7a
Author: David Vrabel 
AuthorDate: Fri, 11 Dec 2015 09:07:53 -0500
Committer:  Thomas Gleixner 
CommitDate: Sat, 19 Dec 2015 21:35:13 +0100

x86/paravirt: Prevent rtc_cmos platform device init on PV guests

Adding the rtc platform device in non-privileged Xen PV guests causes
an IRQ conflict because these guests do not have legacy PIC and may
allocate irqs in the legacy range.

In a single VCPU Xen PV guest we should have:

/proc/interrupts:
   CPU0
  0:   4934  xen-percpu-virq  timer0
  1:  0  xen-percpu-ipi   spinlock0
  2:  0  xen-percpu-ipi   resched0
  3:  0  xen-percpu-ipi   callfunc0
  4:  0  xen-percpu-virq  debug0
  5:  0  xen-percpu-ipi   callfuncsingle0
  6:  0  xen-percpu-ipi   irqwork0
  7:321   xen-dyn-event xenbus
  8: 90   xen-dyn-event hvc_console
  ...

But hvc_console cannot get its interrupt because it is already in use
by rtc0 and the console does not work.

  genirq: Flags mismatch irq 8.  (hvc_console) vs.  (rtc0)

We can avoid this problem by realizing that unprivileged PV guests (both
Xen and lguests) are not supposed to have rtc_cmos device and so
adding it is not necessary.

Privileged guests (i.e. Xen's dom0) do use it but they should not have
irq conflicts since they allocate irqs above legacy range (above
gsi_top, in fact).

Instead of explicitly testing whether the guest is privileged we can
extend pv_info structure to include information about guest's RTC
support.

Reported-and-tested-by: Sander Eikelenboom 
Signed-off-by: David Vrabel 
Signed-off-by: Boris Ostrovsky 
Cc: vkuzn...@redhat.com
Cc: xen-de...@lists.xenproject.org
Cc: konrad.w...@oracle.com
Cc: sta...@vger.kernel.org # 4.2+
Link: 
http://lkml.kernel.org/r/1449842873-2613-1-git-send-email-boris.ostrov...@oracle.com
Signed-off-by: Thomas Gleixner 
---
 arch/x86/include/asm/paravirt.h   | 6 ++
 arch/x86/include/asm/paravirt_types.h | 5 +
 arch/x86/include/asm/processor.h  | 1 +
 arch/x86/kernel/rtc.c | 3 +++
 arch/x86/lguest/boot.c| 1 +
 arch/x86/xen/enlighten.c  | 4 +++-
 6 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 10d0596..c759b3c 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -19,6 +19,12 @@ static inline int paravirt_enabled(void)
return pv_info.paravirt_enabled;
 }
 
+static inline int paravirt_has_feature(unsigned int feature)
+{
+   WARN_ON_ONCE(!pv_info.paravirt_enabled);
+   return (pv_info.features & feature);
+}
+
 static inline void load_sp0(struct tss_struct *tss,
 struct thread_struct *thread)
 {
diff --git a/arch/x86/include/asm/paravirt_types.h 
b/arch/x86/include/asm/paravirt_types.h
index 31247b5..3d44191 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -70,9 +70,14 @@ struct pv_info {
 #endif
 
int paravirt_enabled;
+   unsigned int features;/* valid only if paravirt_enabled is set */
const char *name;
 };
 
+#define paravirt_has(x) paravirt_has_feature(PV_SUPPORTED_##x)
+/* Supported features */
+#define PV_SUPPORTED_RTC(1<<0)
+
 struct pv_init_ops {
/*
 * Patch may replace one of the defined code sequences with
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 6752225..2d5a50c 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -472,6 +472,7 @@ static inline unsigned long current_top_of_stack(void)
 #else
 #define __cpuidnative_cpuid
 #define paravirt_enabled() 0
+#define paravirt_has(x)0
 
 static inline void load_sp0(struct tss_struct *tss,
struct thread_struct *thread)
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index cd96852..4af8d06 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -200,6 +200,9 @@ static __init int add_rtc_cmos(void)
}
 #endif
 
+   if (paravirt_enabled() && !paravirt_has(RTC))
+   return -ENODEV;
+
platform_device_register(&rtc_device);
dev_info(&rtc_device.dev,
 "registered platform RTC device (no PNP device found)\n");
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index a0d09f6..a43b2ea 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -1414,6 +1414,7 @@ __init void lguest_init(void)
pv_info.kernel_rpl = 1;
/* Everyone except Xen runs with this set. */
pv_info.shared_kernel_pmd = 1;
+   pv_info.features = 0;
 
/*
 * We set up all the lguest overrides for sensitive operations.  These
diff --gi

[tip:locking/core] locking/pvqspinlock, x86: Enable PV qspinlock for Xen

2015-05-08 Thread tip-bot for David Vrabel
Commit-ID:  e95e6f176c61dd0e7bd9fdfb4956df1f9bfe99d4
Gitweb: http://git.kernel.org/tip/e95e6f176c61dd0e7bd9fdfb4956df1f9bfe99d4
Author: David Vrabel 
AuthorDate: Fri, 24 Apr 2015 14:56:40 -0400
Committer:  Ingo Molnar 
CommitDate: Fri, 8 May 2015 12:37:18 +0200

locking/pvqspinlock, x86: Enable PV qspinlock for Xen

This patch adds the necessary Xen specific code to allow Xen to
support the CPU halting and kicking operations needed by the queue
spinlock PV code.

Signed-off-by: David Vrabel 
Signed-off-by: Waiman Long 
Signed-off-by: Peter Zijlstra (Intel) 
Cc: Andrew Morton 
Cc: Boris Ostrovsky 
Cc: Borislav Petkov 
Cc: Daniel J Blueman 
Cc: Douglas Hatch 
Cc: H. Peter Anvin 
Cc: Konrad Rzeszutek Wilk 
Cc: Linus Torvalds 
Cc: Oleg Nesterov 
Cc: Paolo Bonzini 
Cc: Paul E. McKenney 
Cc: Peter Zijlstra 
Cc: Raghavendra K T 
Cc: Rik van Riel 
Cc: Scott J Norton 
Cc: Thomas Gleixner 
Cc: virtualizat...@lists.linux-foundation.org
Cc: xen-de...@lists.xenproject.org
Link: 
http://lkml.kernel.org/r/1429901803-29771-12-git-send-email-waiman.l...@hp.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/xen/spinlock.c | 64 +
 kernel/Kconfig.locks|  2 +-
 2 files changed, 61 insertions(+), 5 deletions(-)

diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 956374c..af907a9 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -17,6 +17,56 @@
 #include "xen-ops.h"
 #include "debugfs.h"
 
+static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
+static DEFINE_PER_CPU(char *, irq_name);
+static bool xen_pvspin = true;
+
+#ifdef CONFIG_QUEUED_SPINLOCK
+
+#include 
+
+static void xen_qlock_kick(int cpu)
+{
+   xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
+}
+
+/*
+ * Halt the current CPU & release it back to the host
+ */
+static void xen_qlock_wait(u8 *byte, u8 val)
+{
+   int irq = __this_cpu_read(lock_kicker_irq);
+
+   /* If kicker interrupts not initialized yet, just spin */
+   if (irq == -1)
+   return;
+
+   /* clear pending */
+   xen_clear_irq_pending(irq);
+   barrier();
+
+   /*
+* We check the byte value after clearing pending IRQ to make sure
+* that we won't miss a wakeup event because of the clearing.
+*
+* The sync_clear_bit() call in xen_clear_irq_pending() is atomic.
+* So it is effectively a memory barrier for x86.
+*/
+   if (READ_ONCE(*byte) != val)
+   return;
+
+   /*
+* If an interrupt happens here, it will leave the wakeup irq
+* pending, which will cause xen_poll_irq() to return
+* immediately.
+*/
+
+   /* Block until irq becomes pending (or perhaps a spurious wakeup) */
+   xen_poll_irq(irq);
+}
+
+#else /* CONFIG_QUEUED_SPINLOCK */
+
 enum xen_contention_stat {
TAKEN_SLOW,
TAKEN_SLOW_PICKUP,
@@ -100,12 +150,9 @@ struct xen_lock_waiting {
__ticket_t want;
 };
 
-static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
-static DEFINE_PER_CPU(char *, irq_name);
 static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting);
 static cpumask_t waiting_cpus;
 
-static bool xen_pvspin = true;
 __visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
 {
int irq = __this_cpu_read(lock_kicker_irq);
@@ -217,6 +264,7 @@ static void xen_unlock_kick(struct arch_spinlock *lock, 
__ticket_t next)
}
}
 }
+#endif /* CONFIG_QUEUED_SPINLOCK */
 
 static irqreturn_t dummy_handler(int irq, void *dev_id)
 {
@@ -280,8 +328,16 @@ void __init xen_init_spinlocks(void)
return;
}
printk(KERN_DEBUG "xen: PV spinlocks enabled\n");
+#ifdef CONFIG_QUEUED_SPINLOCK
+   __pv_init_lock_hash();
+   pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
+   pv_lock_ops.queued_spin_unlock = 
PV_CALLEE_SAVE(__pv_queued_spin_unlock);
+   pv_lock_ops.wait = xen_qlock_wait;
+   pv_lock_ops.kick = xen_qlock_kick;
+#else
pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning);
pv_lock_ops.unlock_kick = xen_unlock_kick;
+#endif
 }
 
 /*
@@ -310,7 +366,7 @@ static __init int xen_parse_nopvspin(char *arg)
 }
 early_param("xen_nopvspin", xen_parse_nopvspin);
 
-#ifdef CONFIG_XEN_DEBUG_FS
+#if defined(CONFIG_XEN_DEBUG_FS) && !defined(CONFIG_QUEUED_SPINLOCK)
 
 static struct dentry *d_spin_debug;
 
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
index 4379eef..95dd758 100644
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks
@@ -240,7 +240,7 @@ config ARCH_USE_QUEUED_SPINLOCK
 
 config QUEUED_SPINLOCK
def_bool y if ARCH_USE_QUEUED_SPINLOCK
-   depends on SMP && (!PARAVIRT_SPINLOCKS || !XEN)
+   depends on SMP
 
 config ARCH_USE_QUEUE_RWLOCK
bool
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/major

[tip:timers/core] x86: xen: Sync the wallclock when the system time is set

2013-06-28 Thread tip-bot for David Vrabel
Commit-ID:  5584880e44e49c587059801faa2a9f7d22619c48
Gitweb: http://git.kernel.org/tip/5584880e44e49c587059801faa2a9f7d22619c48
Author: David Vrabel 
AuthorDate: Thu, 27 Jun 2013 11:35:47 +0100
Committer:  Thomas Gleixner 
CommitDate: Fri, 28 Jun 2013 23:15:06 +0200

x86: xen: Sync the wallclock when the system time is set

Currently the Xen wallclock is only updated every 11 minutes if NTP is
synchronized to its clock source (using the sync_cmos_clock() work).
If a guest is started before NTP is synchronized it may see an
incorrect wallclock time.

Use the pvclock_gtod notifier chain to receive a notification when the
system time has changed and update the wallclock to match.

This chain is called on every timer tick and we want to avoid an extra
(expensive) hypercall on every tick.  Because dom0 has historically
never provided a very accurate wallclock and guests do not expect one,
we can do this simply: the wallclock is only updated if the clock was
set.

Signed-off-by: David Vrabel 
Cc: Konrad Rzeszutek Wilk 
Cc: John Stultz 
Cc: 
Link: 
http://lkml.kernel.org/r/1372329348-20841-5-git-send-email-david.vra...@citrix.com
Signed-off-by: Thomas Gleixner 
---
 arch/x86/xen/time.c | 28 
 1 file changed, 28 insertions(+)

diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index a1947ac..3364850 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -14,6 +14,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -212,6 +213,30 @@ static int xen_set_wallclock(const struct timespec *now)
return HYPERVISOR_dom0_op(&op);
 }
 
+static int xen_pvclock_gtod_notify(struct notifier_block *nb, unsigned long 
was_set,
+  void *priv)
+{
+   struct timespec now;
+   struct xen_platform_op op;
+
+   if (!was_set)
+   return NOTIFY_OK;
+
+   now = __current_kernel_time();
+
+   op.cmd = XENPF_settime;
+   op.u.settime.secs = now.tv_sec;
+   op.u.settime.nsecs = now.tv_nsec;
+   op.u.settime.system_time = xen_clocksource_read();
+
+   (void)HYPERVISOR_dom0_op(&op);
+   return NOTIFY_OK;
+}
+
+static struct notifier_block xen_pvclock_gtod_notifier = {
+   .notifier_call = xen_pvclock_gtod_notify,
+};
+
 static struct clocksource xen_clocksource __read_mostly = {
.name = "xen",
.rating = 400,
@@ -473,6 +498,9 @@ static void __init xen_time_init(void)
xen_setup_runstate_info(cpu);
xen_setup_timer(cpu);
xen_setup_cpu_clockevents();
+
+   if (xen_initial_domain())
+   pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
 }
 
 void __init xen_init_time_ops(void)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:timers/core] timekeeping: Indicate that clock was set in the pvclock gtod notifier

2013-06-28 Thread tip-bot for David Vrabel
Commit-ID:  780427f0e113b4c77dfff4d258c05a902cdb0eb9
Gitweb: http://git.kernel.org/tip/780427f0e113b4c77dfff4d258c05a902cdb0eb9
Author: David Vrabel 
AuthorDate: Thu, 27 Jun 2013 11:35:46 +0100
Committer:  Thomas Gleixner 
CommitDate: Fri, 28 Jun 2013 23:15:06 +0200

timekeeping: Indicate that clock was set in the pvclock gtod notifier

If the clock was set (stepped), set the action parameter to functions
in the pvclock gtod notifier chain to non-zero.  This allows the
callee to only do work if the clock was stepped.

This will be used on Xen as the synchronization of the Xen wallclock
to the control domain's (dom0) system time will be done with this
notifier and updating on every timer tick is unnecessary and too
expensive.

Signed-off-by: David Vrabel 
Cc: Konrad Rzeszutek Wilk 
Cc: John Stultz 
Cc: 
Link: 
http://lkml.kernel.org/r/1372329348-20841-4-git-send-email-david.vra...@citrix.com
Signed-off-by: Thomas Gleixner 
---
 include/linux/pvclock_gtod.h |  7 +++
 kernel/time/timekeeping.c| 30 ++
 2 files changed, 25 insertions(+), 12 deletions(-)

diff --git a/include/linux/pvclock_gtod.h b/include/linux/pvclock_gtod.h
index 0ca7582..a71d2db 100644
--- a/include/linux/pvclock_gtod.h
+++ b/include/linux/pvclock_gtod.h
@@ -3,6 +3,13 @@
 
 #include 
 
+/*
+ * The pvclock gtod notifier is called when the system time is updated
+ * and is used to keep guest time synchronized with host time.
+ *
+ * The 'action' parameter in the notifier function is false (0), or
+ * true (non-zero) if system time was stepped.
+ */
 extern int pvclock_gtod_register_notifier(struct notifier_block *nb);
 extern int pvclock_gtod_unregister_notifier(struct notifier_block *nb);
 
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index d8b23a9..846d0a1 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -29,6 +29,7 @@
 
 #define TK_CLEAR_NTP   (1 << 0)
 #define TK_MIRROR  (1 << 1)
+#define TK_CLOCK_WAS_SET   (1 << 2)
 
 static struct timekeeper timekeeper;
 static DEFINE_RAW_SPINLOCK(timekeeper_lock);
@@ -204,9 +205,9 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper 
*tk)
 
 static RAW_NOTIFIER_HEAD(pvclock_gtod_chain);
 
-static void update_pvclock_gtod(struct timekeeper *tk)
+static void update_pvclock_gtod(struct timekeeper *tk, bool was_set)
 {
-   raw_notifier_call_chain(&pvclock_gtod_chain, 0, tk);
+   raw_notifier_call_chain(&pvclock_gtod_chain, was_set, tk);
 }
 
 /**
@@ -220,7 +221,7 @@ int pvclock_gtod_register_notifier(struct notifier_block 
*nb)
 
raw_spin_lock_irqsave(&timekeeper_lock, flags);
ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb);
-   update_pvclock_gtod(tk);
+   update_pvclock_gtod(tk, true);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
return ret;
@@ -252,7 +253,7 @@ static void timekeeping_update(struct timekeeper *tk, 
unsigned int action)
ntp_clear();
}
update_vsyscall(tk);
-   update_pvclock_gtod(tk);
+   update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET);
 
if (action & TK_MIRROR)
memcpy(&shadow_timekeeper, &timekeeper, sizeof(timekeeper));
@@ -512,7 +513,7 @@ int do_settimeofday(const struct timespec *tv)
 
tk_set_xtime(tk, tv);
 
-   timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR);
+   timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
 
write_seqcount_end(&timekeeper_seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@ -556,7 +557,7 @@ int timekeeping_inject_offset(struct timespec *ts)
tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts));
 
 error: /* even if we error out, we forwarded the time, so call update */
-   timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR);
+   timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
 
write_seqcount_end(&timekeeper_seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@ -646,7 +647,7 @@ static int change_clocksource(void *data)
module_put(new->owner);
}
}
-   timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR);
+   timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
 
write_seqcount_end(&timekeeper_seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@ -887,7 +888,7 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
 
__timekeeping_inject_sleeptime(tk, delta);
 
-   timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR);
+   timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
 
write_seqcount_end(&timekeeper_seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@ -969,7 +970,7 @@ static void timekeeping_resume(void)
tk->cycle_last = clock->cycle_last = cycle_now;
tk->ntp_error = 0;
timekeeping_suspended = 0;
- 

[tip:timers/core] x86: xen: Sync the CMOS RTC as well as the Xen wallclock

2013-06-28 Thread tip-bot for David Vrabel
Commit-ID:  47433b8c9d7480a3eebd99df38e857ce85a37cee
Gitweb: http://git.kernel.org/tip/47433b8c9d7480a3eebd99df38e857ce85a37cee
Author: David Vrabel 
AuthorDate: Thu, 27 Jun 2013 11:35:48 +0100
Committer:  Thomas Gleixner 
CommitDate: Fri, 28 Jun 2013 23:15:07 +0200

x86: xen: Sync the CMOS RTC as well as the Xen wallclock

Adjustments to Xen's persistent clock via update_persistent_clock()
don't actually persist, as the Xen wallclock is a software only clock
and modifications to it do not modify the underlying CMOS RTC.

The x86_platform.set_wallclock hook is there to keep the hardware RTC
synchronized. On a guest this is pointless.

On Dom0 we can use the native implementaion which actually updates the
hardware RTC, but we still need to keep the software emulation of RTC
for the guests up to date. The subscription to the pvclock_notifier
allows us to emulate this easily. The notifier is called at every tick
and when the clock was set.

Right now we only use that notifier when the clock was set, but due to
the fact that it is called periodically from the timekeeping update
code, we can utilize it to emulate the NTP driven drift compensation
of update_persistant_clock() for the Xen wall (software) clock.

Add a 11 minutes periodic update to the pvclock_gtod notifier callback
to achieve that. The static variable 'next' which maintains that 11
minutes update cycle is protected by the core code serialization so
there is no need to add a Xen specific serialization mechanism.

[ tglx: Massaged changelog and added a few comments ]

Signed-off-by: David Vrabel 
Cc: Konrad Rzeszutek Wilk 
Cc: John Stultz 
Cc: 
Link: 
http://lkml.kernel.org/r/1372329348-20841-6-git-send-email-david.vra...@citrix.com
Signed-off-by: Thomas Gleixner 
---
 arch/x86/xen/time.c | 45 ++---
 1 file changed, 26 insertions(+), 19 deletions(-)

diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 3364850..7a5671b 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -199,37 +199,42 @@ static void xen_get_wallclock(struct timespec *now)
 
 static int xen_set_wallclock(const struct timespec *now)
 {
-   struct xen_platform_op op;
-
-   /* do nothing for domU */
-   if (!xen_initial_domain())
-   return -1;
-
-   op.cmd = XENPF_settime;
-   op.u.settime.secs = now->tv_sec;
-   op.u.settime.nsecs = now->tv_nsec;
-   op.u.settime.system_time = xen_clocksource_read();
-
-   return HYPERVISOR_dom0_op(&op);
+   return -1;
 }
 
-static int xen_pvclock_gtod_notify(struct notifier_block *nb, unsigned long 
was_set,
-  void *priv)
+static int xen_pvclock_gtod_notify(struct notifier_block *nb,
+  unsigned long was_set, void *priv)
 {
-   struct timespec now;
-   struct xen_platform_op op;
+   /* Protected by the calling core code serialization */
+   static struct timespec next_sync;
 
-   if (!was_set)
-   return NOTIFY_OK;
+   struct xen_platform_op op;
+   struct timespec now;
 
now = __current_kernel_time();
 
+   /*
+* We only take the expensive HV call when the clock was set
+* or when the 11 minutes RTC synchronization time elapsed.
+*/
+   if (!was_set && timespec_compare(&now, &next_sync) < 0)
+   return NOTIFY_OK;
+
op.cmd = XENPF_settime;
op.u.settime.secs = now.tv_sec;
op.u.settime.nsecs = now.tv_nsec;
op.u.settime.system_time = xen_clocksource_read();
 
(void)HYPERVISOR_dom0_op(&op);
+
+   /*
+* Move the next drift compensation time 11 minutes
+* ahead. That's emulating the sync_cmos_clock() update for
+* the hardware RTC.
+*/
+   next_sync = now;
+   next_sync.tv_sec += 11 * 60;
+
return NOTIFY_OK;
 }
 
@@ -513,7 +518,9 @@ void __init xen_init_time_ops(void)
 
x86_platform.calibrate_tsc = xen_tsc_khz;
x86_platform.get_wallclock = xen_get_wallclock;
-   x86_platform.set_wallclock = xen_set_wallclock;
+   /* Dom0 uses the native method to set the hardware RTC. */
+   if (!xen_initial_domain())
+   x86_platform.set_wallclock = xen_set_wallclock;
 }
 
 #ifdef CONFIG_XEN_PVHVM
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:timers/core] hrtimers: Support resuming with two or more CPUs online (but stopped)

2013-06-28 Thread tip-bot for David Vrabel
Commit-ID:  7c4c3a0f18ba57ea2a2985034532303d2929902a
Gitweb: http://git.kernel.org/tip/7c4c3a0f18ba57ea2a2985034532303d2929902a
Author: David Vrabel 
AuthorDate: Thu, 27 Jun 2013 11:35:44 +0100
Committer:  Thomas Gleixner 
CommitDate: Fri, 28 Jun 2013 23:15:06 +0200

hrtimers: Support resuming with two or more CPUs online (but stopped)

hrtimers_resume() only reprograms the timers for the current CPU as it
assumes that all other CPUs are offline at this point in the resume
process. If other CPUs are online then their timers will not be
corrected and they may fire at the wrong time.

When running as a Xen guest, this assumption is not true.  Non-boot
CPUs are only stopped with IRQs disabled instead of offlining them.
This is a performance optimization as disabling the CPUs would add an
unacceptable amount of additional downtime during a live migration (>
200 ms for a 4 VCPU guest).

hrtimers_resume() cannot call on_each_cpu(retrigger_next_event,...)
as the other CPUs will be stopped with IRQs disabled.  Instead, defer
the call to the next softirq.

[ tglx: Separated the xen change out ]

Signed-off-by: David Vrabel 
Cc: Konrad Rzeszutek Wilk  
Cc: John Stultz  
Cc: 
Link: 
http://lkml.kernel.org/r/1372329348-20841-2-git-send-email-david.vra...@citrix.com
Signed-off-by: Thomas Gleixner 
---
 kernel/hrtimer.c | 15 ---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index fd4b13b..e86827e 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -773,15 +773,24 @@ void clock_was_set(void)
 
 /*
  * During resume we might have to reprogram the high resolution timer
- * interrupt (on the local CPU):
+ * interrupt on all online CPUs.  However, all other CPUs will be
+ * stopped with IRQs interrupts disabled so the clock_was_set() call
+ * must be deferred to the softirq.
+ *
+ * The one-shot timer has already been programmed to fire immediately
+ * (see tick_resume_oneshot()) and this interrupt will trigger the
+ * softirq to run early enough to correctly reprogram the timers on
+ * all CPUs.
  */
 void hrtimers_resume(void)
 {
+   struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
+
WARN_ONCE(!irqs_disabled(),
  KERN_INFO "hrtimers_resume() called with IRQs enabled!");
 
-   retrigger_next_event(NULL);
-   timerfd_clock_was_set();
+   cpu_base->clock_was_set = 1;
+   __raise_softirq_irqoff(HRTIMER_SOFTIRQ);
 }
 
 static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:timers/core] timekeeping: Pass flags instead of multiple bools to timekeeping_update()

2013-06-28 Thread tip-bot for David Vrabel
Commit-ID:  04397fe94ad65289884b9862b6a0c722ececaadf
Gitweb: http://git.kernel.org/tip/04397fe94ad65289884b9862b6a0c722ececaadf
Author: David Vrabel 
AuthorDate: Thu, 27 Jun 2013 11:35:45 +0100
Committer:  Thomas Gleixner 
CommitDate: Fri, 28 Jun 2013 23:15:06 +0200

timekeeping: Pass flags instead of multiple bools to timekeeping_update()

Instead of passing multiple bools to timekeeping_updated(), define
flags and use a single 'action' parameter.  It is then more obvious
what each timekeeping_update() call does.

Signed-off-by: David Vrabel 
Cc: Konrad Rzeszutek Wilk 
Cc: John Stultz 
Cc: 
Link: 
http://lkml.kernel.org/r/1372329348-20841-3-git-send-email-david.vra...@citrix.com
Signed-off-by: Thomas Gleixner 
---
 kernel/time/timekeeping.c | 21 -
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 838fc07..d8b23a9 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -27,6 +27,9 @@
 #include "ntp_internal.h"
 #include "timekeeping_internal.h"
 
+#define TK_CLEAR_NTP   (1 << 0)
+#define TK_MIRROR  (1 << 1)
+
 static struct timekeeper timekeeper;
 static DEFINE_RAW_SPINLOCK(timekeeper_lock);
 static seqcount_t timekeeper_seq;
@@ -242,16 +245,16 @@ int pvclock_gtod_unregister_notifier(struct 
notifier_block *nb)
 EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier);
 
 /* must hold timekeeper_lock */
-static void timekeeping_update(struct timekeeper *tk, bool clearntp, bool 
mirror)
+static void timekeeping_update(struct timekeeper *tk, unsigned int action)
 {
-   if (clearntp) {
+   if (action & TK_CLEAR_NTP) {
tk->ntp_error = 0;
ntp_clear();
}
update_vsyscall(tk);
update_pvclock_gtod(tk);
 
-   if (mirror)
+   if (action & TK_MIRROR)
memcpy(&shadow_timekeeper, &timekeeper, sizeof(timekeeper));
 }
 
@@ -509,7 +512,7 @@ int do_settimeofday(const struct timespec *tv)
 
tk_set_xtime(tk, tv);
 
-   timekeeping_update(tk, true, true);
+   timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR);
 
write_seqcount_end(&timekeeper_seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@ -553,7 +556,7 @@ int timekeeping_inject_offset(struct timespec *ts)
tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts));
 
 error: /* even if we error out, we forwarded the time, so call update */
-   timekeeping_update(tk, true, true);
+   timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR);
 
write_seqcount_end(&timekeeper_seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@ -643,7 +646,7 @@ static int change_clocksource(void *data)
module_put(new->owner);
}
}
-   timekeeping_update(tk, true, true);
+   timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR);
 
write_seqcount_end(&timekeeper_seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@ -884,7 +887,7 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
 
__timekeeping_inject_sleeptime(tk, delta);
 
-   timekeeping_update(tk, true, true);
+   timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR);
 
write_seqcount_end(&timekeeper_seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@ -966,7 +969,7 @@ static void timekeeping_resume(void)
tk->cycle_last = clock->cycle_last = cycle_now;
tk->ntp_error = 0;
timekeeping_suspended = 0;
-   timekeeping_update(tk, false, true);
+   timekeeping_update(tk, TK_MIRROR);
write_seqcount_end(&timekeeper_seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
@@ -1419,7 +1422,7 @@ static void update_wall_time(void)
 * updating.
 */
memcpy(real_tk, tk, sizeof(*tk));
-   timekeeping_update(real_tk, false, false);
+   timekeeping_update(real_tk, 0);
write_seqcount_end(&timekeeper_seq);
 out:
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:timers/core] xen: Remove clock_was_set() call in the resume path

2013-06-28 Thread tip-bot for David Vrabel
Commit-ID:  0eb071651474952c8b6daecd36b378e2d01be22c
Gitweb: http://git.kernel.org/tip/0eb071651474952c8b6daecd36b378e2d01be22c
Author: David Vrabel 
AuthorDate: Thu, 27 Jun 2013 11:35:44 +0100
Committer:  Thomas Gleixner 
CommitDate: Fri, 28 Jun 2013 23:15:06 +0200

xen: Remove clock_was_set() call in the resume path

commit 359cdd3f866(xen: maintain clock offset over save/restore) added
a clock_was_set() call into the xen resume code to propagate the
system time changes. With the modified hrtimer resume code, which
makes sure that all cpus are notified this call is not longer necessary.

[ tglx: Separated it from the hrtimer change ]

Signed-off-by: David Vrabel 
Cc: Konrad Rzeszutek Wilk  
Cc: John Stultz  
Cc: 
Link: 
http://lkml.kernel.org/r/1372329348-20841-2-git-send-email-david.vra...@citrix.com
Signed-off-by: Thomas Gleixner 

---
 drivers/xen/manage.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index 412b96c..421da85 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -166,9 +166,6 @@ out_resume:
 
dpm_resume_end(si.cancelled ? PMSG_THAW : PMSG_RESTORE);
 
-   /* Make sure timer events get retriggered on all CPUs */
-   clock_was_set();
-
 out_thaw:
 #ifdef CONFIG_PREEMPT
thaw_processes();
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:perf/core] x86: Allow tracing of functions in arch/x86/kernel /rtc.c

2012-10-24 Thread tip-bot for David Vrabel
Commit-ID:  ce37f400336a34bb6e72c4700f9dcc2a41ff7163
Gitweb: http://git.kernel.org/tip/ce37f400336a34bb6e72c4700f9dcc2a41ff7163
Author: David Vrabel 
AuthorDate: Mon, 8 Oct 2012 13:07:30 +0100
Committer:  Ingo Molnar 
CommitDate: Wed, 24 Oct 2012 13:14:22 +0200

x86: Allow tracing of functions in arch/x86/kernel/rtc.c

Move native_read_tsc() to tsc.c to allow profiling to be
re-enabled for rtc.c.

Signed-off-by: David Vrabel 
Cc: Peter Zijlstra 
Cc: Frederic Weisbecker 
Cc: Steven Rostedt 
Link: 
http://lkml.kernel.org/r/1349698050-6560-1-git-send-email-david.vra...@citrix.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/kernel/Makefile |1 -
 arch/x86/kernel/rtc.c|6 --
 arch/x86/kernel/tsc.c|6 ++
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 91ce48f..9fd5eed 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -9,7 +9,6 @@ CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
 ifdef CONFIG_FUNCTION_TRACER
 # Do not profile debug and lowlevel utilities
 CFLAGS_REMOVE_tsc.o = -pg
-CFLAGS_REMOVE_rtc.o = -pg
 CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
 CFLAGS_REMOVE_pvclock.o = -pg
 CFLAGS_REMOVE_kvmclock.o = -pg
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 4929c1b..801602b 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -195,12 +195,6 @@ void read_persistent_clock(struct timespec *ts)
ts->tv_nsec = 0;
 }
 
-unsigned long long native_read_tsc(void)
-{
-   return __native_read_tsc();
-}
-EXPORT_SYMBOL(native_read_tsc);
-
 
 static struct resource rtc_resources[] = {
[0] = {
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index cfa5d4f..06ccb50 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -77,6 +77,12 @@ unsigned long long
 sched_clock(void) __attribute__((alias("native_sched_clock")));
 #endif
 
+unsigned long long native_read_tsc(void)
+{
+   return __native_read_tsc();
+}
+EXPORT_SYMBOL(native_read_tsc);
+
 int check_tsc_unstable(void)
 {
return tsc_unstable;
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/