[tip:x86/urgent] x86/paravirt: Prevent rtc_cmos platform device init on PV guests
Commit-ID: d8c98a1d1488747625ad6044d423406e17e99b7a Gitweb: http://git.kernel.org/tip/d8c98a1d1488747625ad6044d423406e17e99b7a Author: David Vrabel AuthorDate: Fri, 11 Dec 2015 09:07:53 -0500 Committer: Thomas Gleixner CommitDate: Sat, 19 Dec 2015 21:35:13 +0100 x86/paravirt: Prevent rtc_cmos platform device init on PV guests Adding the rtc platform device in non-privileged Xen PV guests causes an IRQ conflict because these guests do not have legacy PIC and may allocate irqs in the legacy range. In a single VCPU Xen PV guest we should have: /proc/interrupts: CPU0 0: 4934 xen-percpu-virq timer0 1: 0 xen-percpu-ipi spinlock0 2: 0 xen-percpu-ipi resched0 3: 0 xen-percpu-ipi callfunc0 4: 0 xen-percpu-virq debug0 5: 0 xen-percpu-ipi callfuncsingle0 6: 0 xen-percpu-ipi irqwork0 7:321 xen-dyn-event xenbus 8: 90 xen-dyn-event hvc_console ... But hvc_console cannot get its interrupt because it is already in use by rtc0 and the console does not work. genirq: Flags mismatch irq 8. (hvc_console) vs. (rtc0) We can avoid this problem by realizing that unprivileged PV guests (both Xen and lguests) are not supposed to have rtc_cmos device and so adding it is not necessary. Privileged guests (i.e. Xen's dom0) do use it but they should not have irq conflicts since they allocate irqs above legacy range (above gsi_top, in fact). Instead of explicitly testing whether the guest is privileged we can extend pv_info structure to include information about guest's RTC support. Reported-and-tested-by: Sander Eikelenboom Signed-off-by: David Vrabel Signed-off-by: Boris Ostrovsky Cc: vkuzn...@redhat.com Cc: xen-de...@lists.xenproject.org Cc: konrad.w...@oracle.com Cc: sta...@vger.kernel.org # 4.2+ Link: http://lkml.kernel.org/r/1449842873-2613-1-git-send-email-boris.ostrov...@oracle.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/paravirt.h | 6 ++ arch/x86/include/asm/paravirt_types.h | 5 + arch/x86/include/asm/processor.h | 1 + arch/x86/kernel/rtc.c | 3 +++ arch/x86/lguest/boot.c| 1 + arch/x86/xen/enlighten.c | 4 +++- 6 files changed, 19 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 10d0596..c759b3c 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -19,6 +19,12 @@ static inline int paravirt_enabled(void) return pv_info.paravirt_enabled; } +static inline int paravirt_has_feature(unsigned int feature) +{ + WARN_ON_ONCE(!pv_info.paravirt_enabled); + return (pv_info.features & feature); +} + static inline void load_sp0(struct tss_struct *tss, struct thread_struct *thread) { diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 31247b5..3d44191 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -70,9 +70,14 @@ struct pv_info { #endif int paravirt_enabled; + unsigned int features;/* valid only if paravirt_enabled is set */ const char *name; }; +#define paravirt_has(x) paravirt_has_feature(PV_SUPPORTED_##x) +/* Supported features */ +#define PV_SUPPORTED_RTC(1<<0) + struct pv_init_ops { /* * Patch may replace one of the defined code sequences with diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 6752225..2d5a50c 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -472,6 +472,7 @@ static inline unsigned long current_top_of_stack(void) #else #define __cpuidnative_cpuid #define paravirt_enabled() 0 +#define paravirt_has(x)0 static inline void load_sp0(struct tss_struct *tss, struct thread_struct *thread) diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index cd96852..4af8d06 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -200,6 +200,9 @@ static __init int add_rtc_cmos(void) } #endif + if (paravirt_enabled() && !paravirt_has(RTC)) + return -ENODEV; + platform_device_register(&rtc_device); dev_info(&rtc_device.dev, "registered platform RTC device (no PNP device found)\n"); diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index a0d09f6..a43b2ea 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -1414,6 +1414,7 @@ __init void lguest_init(void) pv_info.kernel_rpl = 1; /* Everyone except Xen runs with this set. */ pv_info.shared_kernel_pmd = 1; + pv_info.features = 0; /* * We set up all the lguest overrides for sensitive operations. These diff --gi
[tip:locking/core] locking/pvqspinlock, x86: Enable PV qspinlock for Xen
Commit-ID: e95e6f176c61dd0e7bd9fdfb4956df1f9bfe99d4 Gitweb: http://git.kernel.org/tip/e95e6f176c61dd0e7bd9fdfb4956df1f9bfe99d4 Author: David Vrabel AuthorDate: Fri, 24 Apr 2015 14:56:40 -0400 Committer: Ingo Molnar CommitDate: Fri, 8 May 2015 12:37:18 +0200 locking/pvqspinlock, x86: Enable PV qspinlock for Xen This patch adds the necessary Xen specific code to allow Xen to support the CPU halting and kicking operations needed by the queue spinlock PV code. Signed-off-by: David Vrabel Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Daniel J Blueman Cc: Douglas Hatch Cc: H. Peter Anvin Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Paolo Bonzini Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Raghavendra K T Cc: Rik van Riel Cc: Scott J Norton Cc: Thomas Gleixner Cc: virtualizat...@lists.linux-foundation.org Cc: xen-de...@lists.xenproject.org Link: http://lkml.kernel.org/r/1429901803-29771-12-git-send-email-waiman.l...@hp.com Signed-off-by: Ingo Molnar --- arch/x86/xen/spinlock.c | 64 + kernel/Kconfig.locks| 2 +- 2 files changed, 61 insertions(+), 5 deletions(-) diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 956374c..af907a9 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -17,6 +17,56 @@ #include "xen-ops.h" #include "debugfs.h" +static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; +static DEFINE_PER_CPU(char *, irq_name); +static bool xen_pvspin = true; + +#ifdef CONFIG_QUEUED_SPINLOCK + +#include + +static void xen_qlock_kick(int cpu) +{ + xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); +} + +/* + * Halt the current CPU & release it back to the host + */ +static void xen_qlock_wait(u8 *byte, u8 val) +{ + int irq = __this_cpu_read(lock_kicker_irq); + + /* If kicker interrupts not initialized yet, just spin */ + if (irq == -1) + return; + + /* clear pending */ + xen_clear_irq_pending(irq); + barrier(); + + /* +* We check the byte value after clearing pending IRQ to make sure +* that we won't miss a wakeup event because of the clearing. +* +* The sync_clear_bit() call in xen_clear_irq_pending() is atomic. +* So it is effectively a memory barrier for x86. +*/ + if (READ_ONCE(*byte) != val) + return; + + /* +* If an interrupt happens here, it will leave the wakeup irq +* pending, which will cause xen_poll_irq() to return +* immediately. +*/ + + /* Block until irq becomes pending (or perhaps a spurious wakeup) */ + xen_poll_irq(irq); +} + +#else /* CONFIG_QUEUED_SPINLOCK */ + enum xen_contention_stat { TAKEN_SLOW, TAKEN_SLOW_PICKUP, @@ -100,12 +150,9 @@ struct xen_lock_waiting { __ticket_t want; }; -static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; -static DEFINE_PER_CPU(char *, irq_name); static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting); static cpumask_t waiting_cpus; -static bool xen_pvspin = true; __visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want) { int irq = __this_cpu_read(lock_kicker_irq); @@ -217,6 +264,7 @@ static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next) } } } +#endif /* CONFIG_QUEUED_SPINLOCK */ static irqreturn_t dummy_handler(int irq, void *dev_id) { @@ -280,8 +328,16 @@ void __init xen_init_spinlocks(void) return; } printk(KERN_DEBUG "xen: PV spinlocks enabled\n"); +#ifdef CONFIG_QUEUED_SPINLOCK + __pv_init_lock_hash(); + pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; + pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock); + pv_lock_ops.wait = xen_qlock_wait; + pv_lock_ops.kick = xen_qlock_kick; +#else pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning); pv_lock_ops.unlock_kick = xen_unlock_kick; +#endif } /* @@ -310,7 +366,7 @@ static __init int xen_parse_nopvspin(char *arg) } early_param("xen_nopvspin", xen_parse_nopvspin); -#ifdef CONFIG_XEN_DEBUG_FS +#if defined(CONFIG_XEN_DEBUG_FS) && !defined(CONFIG_QUEUED_SPINLOCK) static struct dentry *d_spin_debug; diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks index 4379eef..95dd758 100644 --- a/kernel/Kconfig.locks +++ b/kernel/Kconfig.locks @@ -240,7 +240,7 @@ config ARCH_USE_QUEUED_SPINLOCK config QUEUED_SPINLOCK def_bool y if ARCH_USE_QUEUED_SPINLOCK - depends on SMP && (!PARAVIRT_SPINLOCKS || !XEN) + depends on SMP config ARCH_USE_QUEUE_RWLOCK bool -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/major
[tip:timers/core] x86: xen: Sync the wallclock when the system time is set
Commit-ID: 5584880e44e49c587059801faa2a9f7d22619c48 Gitweb: http://git.kernel.org/tip/5584880e44e49c587059801faa2a9f7d22619c48 Author: David Vrabel AuthorDate: Thu, 27 Jun 2013 11:35:47 +0100 Committer: Thomas Gleixner CommitDate: Fri, 28 Jun 2013 23:15:06 +0200 x86: xen: Sync the wallclock when the system time is set Currently the Xen wallclock is only updated every 11 minutes if NTP is synchronized to its clock source (using the sync_cmos_clock() work). If a guest is started before NTP is synchronized it may see an incorrect wallclock time. Use the pvclock_gtod notifier chain to receive a notification when the system time has changed and update the wallclock to match. This chain is called on every timer tick and we want to avoid an extra (expensive) hypercall on every tick. Because dom0 has historically never provided a very accurate wallclock and guests do not expect one, we can do this simply: the wallclock is only updated if the clock was set. Signed-off-by: David Vrabel Cc: Konrad Rzeszutek Wilk Cc: John Stultz Cc: Link: http://lkml.kernel.org/r/1372329348-20841-5-git-send-email-david.vra...@citrix.com Signed-off-by: Thomas Gleixner --- arch/x86/xen/time.c | 28 1 file changed, 28 insertions(+) diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index a1947ac..3364850 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -212,6 +213,30 @@ static int xen_set_wallclock(const struct timespec *now) return HYPERVISOR_dom0_op(&op); } +static int xen_pvclock_gtod_notify(struct notifier_block *nb, unsigned long was_set, + void *priv) +{ + struct timespec now; + struct xen_platform_op op; + + if (!was_set) + return NOTIFY_OK; + + now = __current_kernel_time(); + + op.cmd = XENPF_settime; + op.u.settime.secs = now.tv_sec; + op.u.settime.nsecs = now.tv_nsec; + op.u.settime.system_time = xen_clocksource_read(); + + (void)HYPERVISOR_dom0_op(&op); + return NOTIFY_OK; +} + +static struct notifier_block xen_pvclock_gtod_notifier = { + .notifier_call = xen_pvclock_gtod_notify, +}; + static struct clocksource xen_clocksource __read_mostly = { .name = "xen", .rating = 400, @@ -473,6 +498,9 @@ static void __init xen_time_init(void) xen_setup_runstate_info(cpu); xen_setup_timer(cpu); xen_setup_cpu_clockevents(); + + if (xen_initial_domain()) + pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier); } void __init xen_init_time_ops(void) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[tip:timers/core] timekeeping: Indicate that clock was set in the pvclock gtod notifier
Commit-ID: 780427f0e113b4c77dfff4d258c05a902cdb0eb9 Gitweb: http://git.kernel.org/tip/780427f0e113b4c77dfff4d258c05a902cdb0eb9 Author: David Vrabel AuthorDate: Thu, 27 Jun 2013 11:35:46 +0100 Committer: Thomas Gleixner CommitDate: Fri, 28 Jun 2013 23:15:06 +0200 timekeeping: Indicate that clock was set in the pvclock gtod notifier If the clock was set (stepped), set the action parameter to functions in the pvclock gtod notifier chain to non-zero. This allows the callee to only do work if the clock was stepped. This will be used on Xen as the synchronization of the Xen wallclock to the control domain's (dom0) system time will be done with this notifier and updating on every timer tick is unnecessary and too expensive. Signed-off-by: David Vrabel Cc: Konrad Rzeszutek Wilk Cc: John Stultz Cc: Link: http://lkml.kernel.org/r/1372329348-20841-4-git-send-email-david.vra...@citrix.com Signed-off-by: Thomas Gleixner --- include/linux/pvclock_gtod.h | 7 +++ kernel/time/timekeeping.c| 30 ++ 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/include/linux/pvclock_gtod.h b/include/linux/pvclock_gtod.h index 0ca7582..a71d2db 100644 --- a/include/linux/pvclock_gtod.h +++ b/include/linux/pvclock_gtod.h @@ -3,6 +3,13 @@ #include +/* + * The pvclock gtod notifier is called when the system time is updated + * and is used to keep guest time synchronized with host time. + * + * The 'action' parameter in the notifier function is false (0), or + * true (non-zero) if system time was stepped. + */ extern int pvclock_gtod_register_notifier(struct notifier_block *nb); extern int pvclock_gtod_unregister_notifier(struct notifier_block *nb); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index d8b23a9..846d0a1 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -29,6 +29,7 @@ #define TK_CLEAR_NTP (1 << 0) #define TK_MIRROR (1 << 1) +#define TK_CLOCK_WAS_SET (1 << 2) static struct timekeeper timekeeper; static DEFINE_RAW_SPINLOCK(timekeeper_lock); @@ -204,9 +205,9 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk) static RAW_NOTIFIER_HEAD(pvclock_gtod_chain); -static void update_pvclock_gtod(struct timekeeper *tk) +static void update_pvclock_gtod(struct timekeeper *tk, bool was_set) { - raw_notifier_call_chain(&pvclock_gtod_chain, 0, tk); + raw_notifier_call_chain(&pvclock_gtod_chain, was_set, tk); } /** @@ -220,7 +221,7 @@ int pvclock_gtod_register_notifier(struct notifier_block *nb) raw_spin_lock_irqsave(&timekeeper_lock, flags); ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb); - update_pvclock_gtod(tk); + update_pvclock_gtod(tk, true); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); return ret; @@ -252,7 +253,7 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action) ntp_clear(); } update_vsyscall(tk); - update_pvclock_gtod(tk); + update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET); if (action & TK_MIRROR) memcpy(&shadow_timekeeper, &timekeeper, sizeof(timekeeper)); @@ -512,7 +513,7 @@ int do_settimeofday(const struct timespec *tv) tk_set_xtime(tk, tv); - timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR); + timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET); write_seqcount_end(&timekeeper_seq); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); @@ -556,7 +557,7 @@ int timekeeping_inject_offset(struct timespec *ts) tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts)); error: /* even if we error out, we forwarded the time, so call update */ - timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR); + timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET); write_seqcount_end(&timekeeper_seq); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); @@ -646,7 +647,7 @@ static int change_clocksource(void *data) module_put(new->owner); } } - timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR); + timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET); write_seqcount_end(&timekeeper_seq); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); @@ -887,7 +888,7 @@ void timekeeping_inject_sleeptime(struct timespec *delta) __timekeeping_inject_sleeptime(tk, delta); - timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR); + timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET); write_seqcount_end(&timekeeper_seq); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); @@ -969,7 +970,7 @@ static void timekeeping_resume(void) tk->cycle_last = clock->cycle_last = cycle_now; tk->ntp_error = 0; timekeeping_suspended = 0; -
[tip:timers/core] x86: xen: Sync the CMOS RTC as well as the Xen wallclock
Commit-ID: 47433b8c9d7480a3eebd99df38e857ce85a37cee Gitweb: http://git.kernel.org/tip/47433b8c9d7480a3eebd99df38e857ce85a37cee Author: David Vrabel AuthorDate: Thu, 27 Jun 2013 11:35:48 +0100 Committer: Thomas Gleixner CommitDate: Fri, 28 Jun 2013 23:15:07 +0200 x86: xen: Sync the CMOS RTC as well as the Xen wallclock Adjustments to Xen's persistent clock via update_persistent_clock() don't actually persist, as the Xen wallclock is a software only clock and modifications to it do not modify the underlying CMOS RTC. The x86_platform.set_wallclock hook is there to keep the hardware RTC synchronized. On a guest this is pointless. On Dom0 we can use the native implementaion which actually updates the hardware RTC, but we still need to keep the software emulation of RTC for the guests up to date. The subscription to the pvclock_notifier allows us to emulate this easily. The notifier is called at every tick and when the clock was set. Right now we only use that notifier when the clock was set, but due to the fact that it is called periodically from the timekeeping update code, we can utilize it to emulate the NTP driven drift compensation of update_persistant_clock() for the Xen wall (software) clock. Add a 11 minutes periodic update to the pvclock_gtod notifier callback to achieve that. The static variable 'next' which maintains that 11 minutes update cycle is protected by the core code serialization so there is no need to add a Xen specific serialization mechanism. [ tglx: Massaged changelog and added a few comments ] Signed-off-by: David Vrabel Cc: Konrad Rzeszutek Wilk Cc: John Stultz Cc: Link: http://lkml.kernel.org/r/1372329348-20841-6-git-send-email-david.vra...@citrix.com Signed-off-by: Thomas Gleixner --- arch/x86/xen/time.c | 45 ++--- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 3364850..7a5671b 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -199,37 +199,42 @@ static void xen_get_wallclock(struct timespec *now) static int xen_set_wallclock(const struct timespec *now) { - struct xen_platform_op op; - - /* do nothing for domU */ - if (!xen_initial_domain()) - return -1; - - op.cmd = XENPF_settime; - op.u.settime.secs = now->tv_sec; - op.u.settime.nsecs = now->tv_nsec; - op.u.settime.system_time = xen_clocksource_read(); - - return HYPERVISOR_dom0_op(&op); + return -1; } -static int xen_pvclock_gtod_notify(struct notifier_block *nb, unsigned long was_set, - void *priv) +static int xen_pvclock_gtod_notify(struct notifier_block *nb, + unsigned long was_set, void *priv) { - struct timespec now; - struct xen_platform_op op; + /* Protected by the calling core code serialization */ + static struct timespec next_sync; - if (!was_set) - return NOTIFY_OK; + struct xen_platform_op op; + struct timespec now; now = __current_kernel_time(); + /* +* We only take the expensive HV call when the clock was set +* or when the 11 minutes RTC synchronization time elapsed. +*/ + if (!was_set && timespec_compare(&now, &next_sync) < 0) + return NOTIFY_OK; + op.cmd = XENPF_settime; op.u.settime.secs = now.tv_sec; op.u.settime.nsecs = now.tv_nsec; op.u.settime.system_time = xen_clocksource_read(); (void)HYPERVISOR_dom0_op(&op); + + /* +* Move the next drift compensation time 11 minutes +* ahead. That's emulating the sync_cmos_clock() update for +* the hardware RTC. +*/ + next_sync = now; + next_sync.tv_sec += 11 * 60; + return NOTIFY_OK; } @@ -513,7 +518,9 @@ void __init xen_init_time_ops(void) x86_platform.calibrate_tsc = xen_tsc_khz; x86_platform.get_wallclock = xen_get_wallclock; - x86_platform.set_wallclock = xen_set_wallclock; + /* Dom0 uses the native method to set the hardware RTC. */ + if (!xen_initial_domain()) + x86_platform.set_wallclock = xen_set_wallclock; } #ifdef CONFIG_XEN_PVHVM -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[tip:timers/core] hrtimers: Support resuming with two or more CPUs online (but stopped)
Commit-ID: 7c4c3a0f18ba57ea2a2985034532303d2929902a Gitweb: http://git.kernel.org/tip/7c4c3a0f18ba57ea2a2985034532303d2929902a Author: David Vrabel AuthorDate: Thu, 27 Jun 2013 11:35:44 +0100 Committer: Thomas Gleixner CommitDate: Fri, 28 Jun 2013 23:15:06 +0200 hrtimers: Support resuming with two or more CPUs online (but stopped) hrtimers_resume() only reprograms the timers for the current CPU as it assumes that all other CPUs are offline at this point in the resume process. If other CPUs are online then their timers will not be corrected and they may fire at the wrong time. When running as a Xen guest, this assumption is not true. Non-boot CPUs are only stopped with IRQs disabled instead of offlining them. This is a performance optimization as disabling the CPUs would add an unacceptable amount of additional downtime during a live migration (> 200 ms for a 4 VCPU guest). hrtimers_resume() cannot call on_each_cpu(retrigger_next_event,...) as the other CPUs will be stopped with IRQs disabled. Instead, defer the call to the next softirq. [ tglx: Separated the xen change out ] Signed-off-by: David Vrabel Cc: Konrad Rzeszutek Wilk Cc: John Stultz Cc: Link: http://lkml.kernel.org/r/1372329348-20841-2-git-send-email-david.vra...@citrix.com Signed-off-by: Thomas Gleixner --- kernel/hrtimer.c | 15 --- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index fd4b13b..e86827e 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -773,15 +773,24 @@ void clock_was_set(void) /* * During resume we might have to reprogram the high resolution timer - * interrupt (on the local CPU): + * interrupt on all online CPUs. However, all other CPUs will be + * stopped with IRQs interrupts disabled so the clock_was_set() call + * must be deferred to the softirq. + * + * The one-shot timer has already been programmed to fire immediately + * (see tick_resume_oneshot()) and this interrupt will trigger the + * softirq to run early enough to correctly reprogram the timers on + * all CPUs. */ void hrtimers_resume(void) { + struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); + WARN_ONCE(!irqs_disabled(), KERN_INFO "hrtimers_resume() called with IRQs enabled!"); - retrigger_next_event(NULL); - timerfd_clock_was_set(); + cpu_base->clock_was_set = 1; + __raise_softirq_irqoff(HRTIMER_SOFTIRQ); } static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[tip:timers/core] timekeeping: Pass flags instead of multiple bools to timekeeping_update()
Commit-ID: 04397fe94ad65289884b9862b6a0c722ececaadf Gitweb: http://git.kernel.org/tip/04397fe94ad65289884b9862b6a0c722ececaadf Author: David Vrabel AuthorDate: Thu, 27 Jun 2013 11:35:45 +0100 Committer: Thomas Gleixner CommitDate: Fri, 28 Jun 2013 23:15:06 +0200 timekeeping: Pass flags instead of multiple bools to timekeeping_update() Instead of passing multiple bools to timekeeping_updated(), define flags and use a single 'action' parameter. It is then more obvious what each timekeeping_update() call does. Signed-off-by: David Vrabel Cc: Konrad Rzeszutek Wilk Cc: John Stultz Cc: Link: http://lkml.kernel.org/r/1372329348-20841-3-git-send-email-david.vra...@citrix.com Signed-off-by: Thomas Gleixner --- kernel/time/timekeeping.c | 21 - 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 838fc07..d8b23a9 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -27,6 +27,9 @@ #include "ntp_internal.h" #include "timekeeping_internal.h" +#define TK_CLEAR_NTP (1 << 0) +#define TK_MIRROR (1 << 1) + static struct timekeeper timekeeper; static DEFINE_RAW_SPINLOCK(timekeeper_lock); static seqcount_t timekeeper_seq; @@ -242,16 +245,16 @@ int pvclock_gtod_unregister_notifier(struct notifier_block *nb) EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier); /* must hold timekeeper_lock */ -static void timekeeping_update(struct timekeeper *tk, bool clearntp, bool mirror) +static void timekeeping_update(struct timekeeper *tk, unsigned int action) { - if (clearntp) { + if (action & TK_CLEAR_NTP) { tk->ntp_error = 0; ntp_clear(); } update_vsyscall(tk); update_pvclock_gtod(tk); - if (mirror) + if (action & TK_MIRROR) memcpy(&shadow_timekeeper, &timekeeper, sizeof(timekeeper)); } @@ -509,7 +512,7 @@ int do_settimeofday(const struct timespec *tv) tk_set_xtime(tk, tv); - timekeeping_update(tk, true, true); + timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR); write_seqcount_end(&timekeeper_seq); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); @@ -553,7 +556,7 @@ int timekeeping_inject_offset(struct timespec *ts) tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts)); error: /* even if we error out, we forwarded the time, so call update */ - timekeeping_update(tk, true, true); + timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR); write_seqcount_end(&timekeeper_seq); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); @@ -643,7 +646,7 @@ static int change_clocksource(void *data) module_put(new->owner); } } - timekeeping_update(tk, true, true); + timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR); write_seqcount_end(&timekeeper_seq); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); @@ -884,7 +887,7 @@ void timekeeping_inject_sleeptime(struct timespec *delta) __timekeeping_inject_sleeptime(tk, delta); - timekeeping_update(tk, true, true); + timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR); write_seqcount_end(&timekeeper_seq); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); @@ -966,7 +969,7 @@ static void timekeeping_resume(void) tk->cycle_last = clock->cycle_last = cycle_now; tk->ntp_error = 0; timekeeping_suspended = 0; - timekeeping_update(tk, false, true); + timekeeping_update(tk, TK_MIRROR); write_seqcount_end(&timekeeper_seq); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); @@ -1419,7 +1422,7 @@ static void update_wall_time(void) * updating. */ memcpy(real_tk, tk, sizeof(*tk)); - timekeeping_update(real_tk, false, false); + timekeeping_update(real_tk, 0); write_seqcount_end(&timekeeper_seq); out: raw_spin_unlock_irqrestore(&timekeeper_lock, flags); -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[tip:timers/core] xen: Remove clock_was_set() call in the resume path
Commit-ID: 0eb071651474952c8b6daecd36b378e2d01be22c Gitweb: http://git.kernel.org/tip/0eb071651474952c8b6daecd36b378e2d01be22c Author: David Vrabel AuthorDate: Thu, 27 Jun 2013 11:35:44 +0100 Committer: Thomas Gleixner CommitDate: Fri, 28 Jun 2013 23:15:06 +0200 xen: Remove clock_was_set() call in the resume path commit 359cdd3f866(xen: maintain clock offset over save/restore) added a clock_was_set() call into the xen resume code to propagate the system time changes. With the modified hrtimer resume code, which makes sure that all cpus are notified this call is not longer necessary. [ tglx: Separated it from the hrtimer change ] Signed-off-by: David Vrabel Cc: Konrad Rzeszutek Wilk Cc: John Stultz Cc: Link: http://lkml.kernel.org/r/1372329348-20841-2-git-send-email-david.vra...@citrix.com Signed-off-by: Thomas Gleixner --- drivers/xen/manage.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index 412b96c..421da85 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -166,9 +166,6 @@ out_resume: dpm_resume_end(si.cancelled ? PMSG_THAW : PMSG_RESTORE); - /* Make sure timer events get retriggered on all CPUs */ - clock_was_set(); - out_thaw: #ifdef CONFIG_PREEMPT thaw_processes(); -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[tip:perf/core] x86: Allow tracing of functions in arch/x86/kernel /rtc.c
Commit-ID: ce37f400336a34bb6e72c4700f9dcc2a41ff7163 Gitweb: http://git.kernel.org/tip/ce37f400336a34bb6e72c4700f9dcc2a41ff7163 Author: David Vrabel AuthorDate: Mon, 8 Oct 2012 13:07:30 +0100 Committer: Ingo Molnar CommitDate: Wed, 24 Oct 2012 13:14:22 +0200 x86: Allow tracing of functions in arch/x86/kernel/rtc.c Move native_read_tsc() to tsc.c to allow profiling to be re-enabled for rtc.c. Signed-off-by: David Vrabel Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1349698050-6560-1-git-send-email-david.vra...@citrix.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile |1 - arch/x86/kernel/rtc.c|6 -- arch/x86/kernel/tsc.c|6 ++ 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 91ce48f..9fd5eed 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -9,7 +9,6 @@ CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) ifdef CONFIG_FUNCTION_TRACER # Do not profile debug and lowlevel utilities CFLAGS_REMOVE_tsc.o = -pg -CFLAGS_REMOVE_rtc.o = -pg CFLAGS_REMOVE_paravirt-spinlocks.o = -pg CFLAGS_REMOVE_pvclock.o = -pg CFLAGS_REMOVE_kvmclock.o = -pg diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 4929c1b..801602b 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -195,12 +195,6 @@ void read_persistent_clock(struct timespec *ts) ts->tv_nsec = 0; } -unsigned long long native_read_tsc(void) -{ - return __native_read_tsc(); -} -EXPORT_SYMBOL(native_read_tsc); - static struct resource rtc_resources[] = { [0] = { diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index cfa5d4f..06ccb50 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -77,6 +77,12 @@ unsigned long long sched_clock(void) __attribute__((alias("native_sched_clock"))); #endif +unsigned long long native_read_tsc(void) +{ + return __native_read_tsc(); +} +EXPORT_SYMBOL(native_read_tsc); + int check_tsc_unstable(void) { return tsc_unstable; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/