Paul E. McKenney wrote:
> On Thu, Jan 14, 2010 at 01:11:33AM +0100, Jan Kiszka wrote:
>> Paul E. McKenney wrote:
>>> On Tue, Jan 12, 2010 at 09:28:15AM +0100, Jan Kiszka wrote:
>>>> If so, I will try to write something like this the next days. Will
>>>> surely appreciate your review afterwards!
>>> Sounds good!
>>>
>> Here we go, find the commit inlined below. You may be primarily
>> interested in kvm_synchronize_sched_expedited() and the helper thread
>> function kvm_rcu_sync_thread. As I use a dedicated thread I simplified
>> some parts compared to upstream. Light testing indicates that it works.
>>
>> The full set of changes (specifically interesting for anyone who wants
>> to test it) is available on kernel.org. There is now also a
>> commit [1] included for CPU hotplug support, but it's untested as I don't
>> have the required hardware at hand.
> 
> This patch is only for backporting, right?  In mainline, you should be
> able to simply call synchronize_srcu_expedited().

Right. kvm-kmod [1] is like compat-wireless: Take latest modules sources
from the kernel, patch them a bit (what the sync script does), wrap a
bit, and you are able to build and run the result on older kernels.

So I was forced to provide synchronize_srcu_expedited for <= 2.6.31.
Actually, I wrap 2.6.32 as well to have a version that includes your
latest optimization.

> 
>> Thanks in advance,
>> Jan
>>
>> [1] 
>> http://git.kernel.org/?p=virt/kvm/kvm-kmod.git;a=commitdiff;h=81a3ddcd8ea343a9570f3164bb31160ff4be0ab7
>>
>> --------->
>>
>> From fe13d0785586623176879a87949ad626dace192c Mon Sep 17 00:00:00 2001
>> From: Jan Kiszka <[email protected]>
>> Date: Wed, 13 Jan 2010 09:32:50 +0100
>> Subject: [PATCH] Compat support for synchronize_srcu_sync
>>
>> This services was first introduced to 2.6.32 and further optimized in
>> 2.6.33. KVM makes use of it for replacing the rw-sem based slot lock
>> with SRCU.
>>
>> TODO: Add CPU-hotplug support to the sync kthreads.
> 
> Oh.  Never mind my complaint about lack of CPU-hotplug support below.  ;-)
> 
> The function migration_call() in kernel/sched.c has the code you would
> want to harvest for this.  And re-reading your text above, perhaps you
> already have done so.

Yep (stolen from softirq.c):

From 81a3ddcd8ea343a9570f3164bb31160ff4be0ab7 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <[email protected]>
Date: Thu, 14 Jan 2010 00:26:07 +0100
Subject: [PATCH] Add CPU hotplug support for kvmsrcsync thread

Signed-off-by: Jan Kiszka <[email protected]>
---
 srcu.c |   64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 62 insertions(+), 2 deletions(-)

diff --git a/srcu.c b/srcu.c
index 985d627..841394d 100644
--- a/srcu.c
+++ b/srcu.c
@@ -399,13 +399,66 @@ void kvm_synchronize_srcu_expedited(struct srcu_struct 
*sp)
 }
 EXPORT_SYMBOL_GPL(kvm_synchronize_srcu_expedited);
 
+static struct sched_param sync_thread_param = {
+       .sched_priority = MAX_RT_PRIO-1
+};
+
+#ifdef CONFIG_HOTPLUG_CPU
+#include <linux/cpumask.h>
+
+static int cpu_callback(struct notifier_block *nfb, unsigned long action,
+                       void *hcpu)
+{
+       int hotcpu = (unsigned long)hcpu;
+       struct task_struct *p;
+
+       switch (action) {
+       case CPU_UP_PREPARE:
+       case CPU_UP_PREPARE_FROZEN:
+               p = kthread_create(kvm_rcu_sync_thread, hcpu,
+                                  "kvmsrcusync/%d", hotcpu);
+               if (IS_ERR(p)) {
+                       printk(KERN_ERR "kvm: kvmsrcsync for %d failed\n",
+                              hotcpu);
+                       return NOTIFY_BAD;
+               }
+               kthread_bind(p, hotcpu);
+               sched_setscheduler(p, SCHED_FIFO, &sync_thread_param);
+               per_cpu(sync_thread, hotcpu) = p;
+               break;
+       case CPU_ONLINE:
+       case CPU_ONLINE_FROZEN:
+               wake_up_process(per_cpu(sync_thread, hotcpu));
+               break;
+       case CPU_UP_CANCELED:
+       case CPU_UP_CANCELED_FROZEN:
+               if (!per_cpu(sync_thread, hotcpu))
+                       break;
+               /* Unbind so it can run.  Fall thru. */
+               kthread_bind(per_cpu(sync_thread, hotcpu),
+                            cpumask_any(cpu_online_mask));
+       case CPU_DEAD:
+       case CPU_DEAD_FROZEN:
+               p = per_cpu(sync_thread, hotcpu);
+               per_cpu(sync_thread, hotcpu) = NULL;
+               kthread_stop(p);
+               break;
+       }
+       return NOTIFY_OK;
+}
+
+static struct notifier_block cpu_nfb = {
+       .notifier_call = cpu_callback
+};
+#endif /* CONFIG_HOTPLUG_CPU */
+
 int kvm_init_srcu(void)
 {
-       struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
        struct task_struct *p;
        int cpu;
        int err;
 
+       get_online_cpus();
        for_each_online_cpu(cpu) {
                p = kthread_create(kvm_rcu_sync_thread, (void *)(long)cpu,
                                   "kvmsrcusync/%d", cpu);
@@ -413,13 +466,19 @@ int kvm_init_srcu(void)
                        goto error_out;
 
                kthread_bind(p, cpu);
-               sched_setscheduler(p, SCHED_FIFO, &param);
+               sched_setscheduler(p, SCHED_FIFO, &sync_thread_param);
                per_cpu(sync_thread, cpu) = p;
                wake_up_process(p);
        }
+#ifdef CONFIG_HOTPLUG_CPU
+       register_cpu_notifier(&cpu_nfb);
+#endif /* CONFIG_HOTPLUG_CPU */
+       put_online_cpus();
+
        return 0;
 
 error_out:
+       put_online_cpus();
        printk(KERN_ERR "kvm: kvmsrcsync for %d failed\n", cpu);
        err = PTR_ERR(p);
        kvm_exit_srcu();
@@ -430,6 +489,7 @@ void kvm_exit_srcu(void)
 {
        int cpu;
 
+       unregister_cpu_notifier(&cpu_nfb);
        for_each_online_cpu(cpu)
                if (per_cpu(sync_thread, cpu))
                        kthread_stop(per_cpu(sync_thread, cpu));
-- 
1.6.0.2

> 
> Anyway, looks good.  There are some questions interspersed below to make
> sure I understand the situation.  There is one cut-and-paste spelling
> error and another cut-and-paste C-standard violation, the latter being
> my fault, and as far as I know, not a problem in practice.
> 
>> Signed-off-by: Jan Kiszka <[email protected]>
>> ---
>>  external-module-compat-comm.h |   17 ++
>>  srcu.c                        |  382 
>> ++++++++++++++++++++++++++++++-----------
>>  sync                          |   27 ++-
>>  3 files changed, 316 insertions(+), 110 deletions(-)
>>
>> diff --git a/external-module-compat-comm.h b/external-module-compat-comm.h
>> index ca57567..eabf8a1 100644
>> --- a/external-module-compat-comm.h
>> +++ b/external-module-compat-comm.h
>> @@ -1053,3 +1053,20 @@ static inline void kvm_fire_urn(void)
>>  static inline void kvm_fire_urn(void) {}
>>  
>>  #endif /* CONFIG_USER_RETURN_NOTIFIER */
>> +
>> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33)
>> +
>> +#ifdef CONFIG_SMP
>> +void kvm_synchronize_srcu_expedited(struct srcu_struct *sp);
>> +#else
>> +static inline void kvm_synchronize_srcu_expedited(struct srcu_struct *sp) { 
>> }
>> +#endif
>> +
>> +#else
>> +
>> +#define kvm_synchronize_srcu_expedited synchronize_srcu_expedited
>> +
>> +#endif
>> +
>> +int kvm_init_srcu(void);
>> +void kvm_exit_srcu(void);
>> diff --git a/srcu.c b/srcu.c
> 
> Are we in kernel/srcu.c or a kvm-specific file?

We are in kvm-kmod/srcu.c, the wrapping code for srcu on older kernels
(used to work down to 2.6.16).

> 
> My guess is that we are in kernel/srcu.c for a backport-only patch, but
> have to ask!
> 
>> index e9734bc..985d627 100644
>> --- a/srcu.c
>> +++ b/srcu.c
>> @@ -24,8 +24,6 @@
>>   *
>>   */
>>  
>> -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
>> -
>>  #include <linux/module.h>
>>  #include <linux/mutex.h>
>>  #include <linux/percpu.h>
>> @@ -35,6 +33,118 @@
>>  #include <linux/slab.h>
>>  #include <linux/smp.h>
>>  #include <linux/srcu.h>
>> +#include <linux/kthread.h>
>> +
>> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) || \
>> +    (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33) && defined(CONFIG_SMP))
>> +
>> +/*
>> + * srcu_readers_active_idx -- returns approximate number of readers
>> + *  active on the specified rank of per-CPU counters.
>> + */
>> +
>> +static int srcu_readers_active_idx(struct srcu_struct *sp, int idx)
>> +{
>> +    int cpu;
>> +    int sum;
>> +
>> +    sum = 0;
>> +    for_each_possible_cpu(cpu)
>> +            sum += per_cpu_ptr(sp->per_cpu_ref, cpu)->c[idx];
>> +    return sum;
>> +}
>> +
>> +/*
>> + * Helper function for synchronize_srcu() and synchronize_srcu_expedited().
>> + */
>> +static void __synchronize_srcu(struct srcu_struct *sp, void 
>> (*sync_func)(void))
>> +{
>> +    int idx;
>> +
>> +    idx = sp->completed;
>> +    mutex_lock(&sp->mutex);
>> +
>> +    /*
>> +     * Check to see if someone else did the work for us while we were
>> +     * waiting to acquire the lock.  We need -two- advances of
>> +     * the counter, not just one.  If there was but one, we might have
>> +     * shown up -after- our helper's first synchronize_sched(), thus
>> +     * having failed to prevent CPU-reordering races with concurrent
>> +     * srcu_read_unlock()s on other CPUs (see comment below).  So we
>> +     * either (1) wait for two or (2) supply the second ourselves.
>> +     */
>> +
>> +    if ((sp->completed - idx) >= 2) {
>> +            mutex_unlock(&sp->mutex);
>> +            return;
>> +    }
>> +
>> +    sync_func();  /* Force memory barrier on all CPUs. */
>> +
>> +    /*
>> +     * The preceding synchronize_sched() ensures that any CPU that
>> +     * sees the new value of sp->completed will also see any preceding
>> +     * changes to data structures made by this CPU.  This prevents
>> +     * some other CPU from reordering the accesses in its SRCU
>> +     * read-side critical section to precede the corresponding
>> +     * srcu_read_lock() -- ensuring that such references will in
>> +     * fact be protected.
>> +     *
>> +     * So it is now safe to do the flip.
>> +     */
>> +
>> +    idx = sp->completed & 0x1;
>> +    sp->completed++;
>> +
>> +    sync_func();  /* Force memory barrier on all CPUs. */
>> +
>> +    /*
>> +     * At this point, because of the preceding synchronize_sched(),
>> +     * all srcu_read_lock() calls using the old counters have completed.
>> +     * Their corresponding critical sections might well be still
>> +     * executing, but the srcu_read_lock() primitives themselves
>> +     * will have finished executing.
>> +     */
>> +
>> +    while (srcu_readers_active_idx(sp, idx))
>> +            schedule_timeout_interruptible(1);
>> +
>> +    sync_func();  /* Force memory barrier on all CPUs. */
>> +
>> +    /*
>> +     * The preceding synchronize_sched() forces all srcu_read_unlock()
>> +     * primitives that were executing concurrently with the preceding
>> +     * for_each_possible_cpu() loop to have completed by this point.
>> +     * More importantly, it also forces the corresponding SRCU read-side
>> +     * critical sections to have also completed, and the corresponding
>> +     * references to SRCU-protected data items to be dropped.
>> +     *
>> +     * Note:
>> +     *
>> +     *      Despite what you might think at first glance, the
>> +     *      preceding synchronize_sched() -must- be within the
>> +     *      critical section ended by the following mutex_unlock().
>> +     *      Otherwise, a task taking the early exit can race
>> +     *      with a srcu_read_unlock(), which might have executed
>> +     *      just before the preceding srcu_readers_active() check,
>> +     *      and whose CPU might have reordered the srcu_read_unlock()
>> +     *      with the preceding critical section.  In this case, there
>> +     *      is nothing preventing the synchronize_sched() task that is
>> +     *      taking the early exit from freeing a data structure that
>> +     *      is still being referenced (out of order) by the task
>> +     *      doing the srcu_read_unlock().
>> +     *
>> +     *      Alternatively, the comparison with "2" on the early exit
>> +     *      could be changed to "3", but this increases synchronize_srcu()
>> +     *      latency for bulk loads.  So the current code is preferred.
>> +     */
>> +
>> +    mutex_unlock(&sp->mutex);
>> +}
>> +
>> +#endif
>> +
>> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
>>  
>>  #undef kvm_init_srcu_struct
>>  #undef kvm_cleanup_srcu_struct
>> @@ -42,6 +152,11 @@
>>  #undef kvm_srcu_read_unlock
>>  #undef kvm_synchronize_srcu
>>  #undef kvm_srcu_batches_completed
>> +
>> +static int srcu_readers_active_idx(struct srcu_struct *sp, int idx);
>> +static void
>> +__synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void));
>> +
>>  /**
>>   * init_srcu_struct - initialize a sleep-RCU structure
>>   * @sp: structure to initialize.
>> @@ -58,22 +173,6 @@ int kvm_init_srcu_struct(struct srcu_struct *sp)
>>      return (sp->per_cpu_ref ? 0 : -ENOMEM);
>>  }
>>  
>> -/*
>> - * srcu_readers_active_idx -- returns approximate number of readers
>> - *  active on the specified rank of per-CPU counters.
>> - */
>> -
>> -static int srcu_readers_active_idx(struct srcu_struct *sp, int idx)
>> -{
>> -    int cpu;
>> -    int sum;
>> -
>> -    sum = 0;
>> -    for_each_possible_cpu(cpu)
>> -            sum += per_cpu_ptr(sp->per_cpu_ref, cpu)->c[idx];
>> -    return sum;
>> -}
>> -
>>  /**
>>   * srcu_readers_active - returns approximate number of readers.
>>   * @sp: which srcu_struct to count active readers (holding srcu_read_lock).
>> @@ -154,94 +253,14 @@ void kvm_srcu_read_unlock(struct srcu_struct *sp, int 
>> idx)
>>   * synchronizing concurrent updates.  Can block; must be called from
>>   * process context.
>>   *
>> - * Note that it is illegal to call synchornize_srcu() from the corresponding
>> + * Note that it is illegal to call synchronize_srcu() from the corresponding
>>   * SRCU read-side critical section; doing so will result in deadlock.
>>   * However, it is perfectly legal to call synchronize_srcu() on one
>>   * srcu_struct from some other srcu_struct's read-side critical section.
>>   */
>>  void kvm_synchronize_srcu(struct srcu_struct *sp)
>>  {
>> -    int idx;
>> -
>> -    idx = sp->completed;
>> -    mutex_lock(&sp->mutex);
>> -
>> -    /*
>> -     * Check to see if someone else did the work for us while we were
>> -     * waiting to acquire the lock.  We need -two- advances of
>> -     * the counter, not just one.  If there was but one, we might have
>> -     * shown up -after- our helper's first synchronize_sched(), thus
>> -     * having failed to prevent CPU-reordering races with concurrent
>> -     * srcu_read_unlock()s on other CPUs (see comment below).  So we
>> -     * either (1) wait for two or (2) supply the second ourselves.
>> -     */
>> -
>> -    if ((sp->completed - idx) >= 2) {
>> -            mutex_unlock(&sp->mutex);
>> -            return;
>> -    }
>> -
>> -    synchronize_sched();  /* Force memory barrier on all CPUs. */
>> -
>> -    /*
>> -     * The preceding synchronize_sched() ensures that any CPU that
>> -     * sees the new value of sp->completed will also see any preceding
>> -     * changes to data structures made by this CPU.  This prevents
>> -     * some other CPU from reordering the accesses in its SRCU
>> -     * read-side critical section to precede the corresponding
>> -     * srcu_read_lock() -- ensuring that such references will in
>> -     * fact be protected.
>> -     *
>> -     * So it is now safe to do the flip.
>> -     */
>> -
>> -    idx = sp->completed & 0x1;
>> -    sp->completed++;
>> -
>> -    synchronize_sched();  /* Force memory barrier on all CPUs. */
>> -
>> -    /*
>> -     * At this point, because of the preceding synchronize_sched(),
>> -     * all srcu_read_lock() calls using the old counters have completed.
>> -     * Their corresponding critical sections might well be still
>> -     * executing, but the srcu_read_lock() primitives themselves
>> -     * will have finished executing.
>> -     */
>> -
>> -    while (srcu_readers_active_idx(sp, idx))
>> -            schedule_timeout_interruptible(1);
>> -
>> -    synchronize_sched();  /* Force memory barrier on all CPUs. */
>> -
>> -    /*
>> -     * The preceding synchronize_sched() forces all srcu_read_unlock()
>> -     * primitives that were executing concurrently with the preceding
>> -     * for_each_possible_cpu() loop to have completed by this point.
>> -     * More importantly, it also forces the corresponding SRCU read-side
>> -     * critical sections to have also completed, and the corresponding
>> -     * references to SRCU-protected data items to be dropped.
>> -     *
>> -     * Note:
>> -     *
>> -     *      Despite what you might think at first glance, the
>> -     *      preceding synchronize_sched() -must- be within the
>> -     *      critical section ended by the following mutex_unlock().
>> -     *      Otherwise, a task taking the early exit can race
>> -     *      with a srcu_read_unlock(), which might have executed
>> -     *      just before the preceding srcu_readers_active() check,
>> -     *      and whose CPU might have reordered the srcu_read_unlock()
>> -     *      with the preceding critical section.  In this case, there
>> -     *      is nothing preventing the synchronize_sched() task that is
>> -     *      taking the early exit from freeing a data structure that
>> -     *      is still being referenced (out of order) by the task
>> -     *      doing the srcu_read_unlock().
>> -     *
>> -     *      Alternatively, the comparison with "2" on the early exit
>> -     *      could be changed to "3", but this increases synchronize_srcu()
>> -     *      latency for bulk loads.  So the current code is preferred.
>> -     */
>> -
>> -    mutex_unlock(&sp->mutex);
>> +    __synchronize_srcu(sp, synchronize_sched);
>>  }
> 
> Hmmm...  Why not simply "synchronize_srcu()"?  Is this to allow a smaller
> piece of code to cover all the combinations of Linux kernel versions?

This is for kernels that do not have it.

> 
>>  /**
>> @@ -265,3 +284,166 @@ EXPORT_SYMBOL_GPL(kvm_synchronize_srcu);
>>  EXPORT_SYMBOL_GPL(kvm_srcu_batches_completed);
>>  
>>  #endif
>> +
>> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33) && defined(CONFIG_SMP)
>> +
>> +struct sync_req {
>> +    struct list_head list;
>> +    bool pending;
>> +    bool success;
>> +    struct completion done;
>> +};
>> +
>> +static DEFINE_PER_CPU(struct sync_req, sync_req);
>> +static DEFINE_PER_CPU(struct task_struct *, sync_thread);
>> +static DEFINE_MUTEX(rcu_sched_expedited_mutex);
>> +
>> +static long synchronize_sched_expedited_count;
> 
> Sigh!!!  You copied one of my theoretical bugs.
> 
> According to the C standard, overflowing a signed integer is undefined
> (though it has been a -long- time since I have seen hardware that
> did anything other than wrap like you would expect).  My todo list
> includes converting these to "unsigned long".  I can do certainly add
> this instance to my list, but I would of course be quite happy if you
> made the appropriate adjustments.

Will do! :)

> 
> (This effort got preempted by applying lockdep to RCU, given that Thomas
> Gleixner found some non-theoretical RCU-usage bugs.)
> 
>> +static int kvm_rcu_sync_thread(void *data)
>> +{
>> +    int badcpu;
>> +    int cpu = (long)data;
>> +    struct sync_req *req = &per_cpu(sync_req, cpu);
>> +
>> +    set_current_state(TASK_INTERRUPTIBLE);
>> +    while (!kthread_should_stop()) {
>> +            if (!req->pending) {
>> +                    schedule();
>> +                    set_current_state(TASK_INTERRUPTIBLE);
>> +                    continue;
>> +            }
>> +            req->pending = false;
>> +
>> +            preempt_disable();
>> +            badcpu = smp_processor_id();
>> +            if (likely(cpu == badcpu)) {
>> +                    req->success = true;
>> +            } else {
>> +                    req->success = false;
>> +                    WARN_ONCE(1, "kvm_rcu_sync_thread() on CPU %d, "
>> +                              "expected %d\n", badcpu, cpu);
>> +            }
>> +            preempt_enable();
>> +
>> +            complete(&req->done);
>> +    }
>> +    __set_current_state(TASK_RUNNING);
>> +
>> +    return 0;
>> +}
>> +
>> +static void kvm_synchronize_sched_expedited(void)
>> +{
>> +    int cpu;
>> +    bool need_full_sync = 0;
>> +    struct sync_req *req;
>> +    long snap;
>> +    int trycount = 0;
>> +
>> +    smp_mb();  /* ensure prior mod happens before capturing snap. */
>> +    snap = ACCESS_ONCE(synchronize_sched_expedited_count) + 1;
>> +    get_online_cpus();
>> +    while (!mutex_trylock(&rcu_sched_expedited_mutex)) {
>> +            put_online_cpus();
>> +            if (trycount++ < 10)
>> +                    udelay(trycount * num_online_cpus());
>> +            else {
>> +                    synchronize_sched();
>> +                    return;
>> +            }
>> +            if (ACCESS_ONCE(synchronize_sched_expedited_count) - snap > 0) {
>> +                    smp_mb(); /* ensure test happens before caller kfree */
>> +                    return;
>> +            }
>> +            get_online_cpus();
>> +    }
>> +    for_each_online_cpu(cpu) {
>> +            req = &per_cpu(sync_req, cpu);
>> +            init_completion(&req->done);
>> +            smp_wmb();
>> +            req->pending = true;
>> +            wake_up_process(per_cpu(sync_thread, cpu));
>> +    }
>> +    for_each_online_cpu(cpu) {
>> +            req = &per_cpu(sync_req, cpu);
>> +            wait_for_completion(&req->done);
>> +            if (unlikely(!req->success))
>> +                    need_full_sync = 1;
>> +    }
>> +    synchronize_sched_expedited_count++;
>> +    mutex_unlock(&rcu_sched_expedited_mutex);
>> +    put_online_cpus();
>> +    if (need_full_sync)
>> +            synchronize_sched();
>> +}
>> +
>> +/**
>> + * synchronize_srcu_expedited - like synchronize_srcu, but less patient
> 
> "kvm_synchronize_srcu_expedited", right?  ;-)

Yes. But it's actually synchronize_srcu_expedited as we patch all
call-sides in the kvm module code to avoid namespace conflicts on
2.6.32.

> 
>> + * @sp: srcu_struct with which to synchronize.
>> + *
>> + * Flip the completed counter, and wait for the old count to drain to zero.
>> + * As with classic RCU, the updater must use some separate means of
>> + * synchronizing concurrent updates.  Can block; must be called from
>> + * process context.
>> + *
>> + * Note that it is illegal to call synchronize_srcu_expedited()
>> + * from the corresponding SRCU read-side critical section; doing so
>> + * will result in deadlock.  However, it is perfectly legal to call
>> + * synchronize_srcu_expedited() on one srcu_struct from some other
>> + * srcu_struct's read-side critical section.
>> + */
>> +void kvm_synchronize_srcu_expedited(struct srcu_struct *sp)
>> +{
>> +    __synchronize_srcu(sp, kvm_synchronize_sched_expedited);
>> +}
>> +EXPORT_SYMBOL_GPL(kvm_synchronize_srcu_expedited);
>> +
>> +int kvm_init_srcu(void)
>> +{
>> +    struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
>> +    struct task_struct *p;
>> +    int cpu;
>> +    int err;
>> +
>> +    for_each_online_cpu(cpu) {
>> +            p = kthread_create(kvm_rcu_sync_thread, (void *)(long)cpu,
>> +                               "kvmsrcusync/%d", cpu);
>> +            if (IS_ERR(p))
>> +                    goto error_out;
>> +
>> +            kthread_bind(p, cpu);
>> +            sched_setscheduler(p, SCHED_FIFO, &param);
>> +            per_cpu(sync_thread, cpu) = p;
>> +            wake_up_process(p);
>> +    }
>> +    return 0;
>> +
>> +error_out:
>> +    printk(KERN_ERR "kvm: kvmsrcsync for %d failed\n", cpu);
>> +    err = PTR_ERR(p);
>> +    kvm_exit_srcu();
>> +    return err;
>> +}
> 
> CPU hotplug?
> 
>> +void kvm_exit_srcu(void)
>> +{
>> +    int cpu;
>> +
>> +    for_each_online_cpu(cpu)
>> +            if (per_cpu(sync_thread, cpu))
>> +                    kthread_stop(per_cpu(sync_thread, cpu));
>> +}
>> +
>> +#else
>> +
>> +int kvm_init_srcu(void)
>> +{
>> +    return 0;
>> +}
>> +
>> +void kvm_exit_srcu(void)
>> +{
>> +}
>> +
>> +#endif
> 
> I do not understand what follows -- a KVM-specific test suite or some
> such?

Patch script that mangles the source when importing it from the kernel
into kvm-kmod. E.g. we patch synchronize_srcu_expedited to redirect it
to our implementation.

> 
>> diff --git a/sync b/sync
>> index 0eaff31..d67469e 100755
>> --- a/sync
>> +++ b/sync
>> @@ -40,8 +40,9 @@ def __hack(data):
>>          'do_machine_check eventfd_signal get_desc_base get_desc_limit '
>>          'vma_kernel_pagesize native_read_tsc user_return_notifier '
>>          'user_return_notifier_register user_return_notifier_unregister '
>> +        'synchronize_srcu_expedited '
>>          )
>> -    anon_inodes = anon_inodes_exit = False
>> +    kvm_init = kvm_exit = False
>>      mce = False
>>      result = []
>>      pr_fmt = ''
>> @@ -58,24 +59,30 @@ def __hack(data):
>>              pr_fmt = sub(r'#define pr_fmt\([^)]*\) ("[^"]*").*', r'\1', 
>> line) + ' '
>>              line = ''
>>          line = sub(r'pr_debug\(([^),]*)', r'pr_debug(' + pr_fmt + r'\1', 
>> line)
>> -        if match(r'^int kvm_init\('): anon_inodes = True
>> -        if match(r'r = kvm_arch_init\(opaque\);') and anon_inodes:
>> +        if match(r'^int kvm_init\('): kvm_init = True
>> +        if match(r'r = kvm_arch_init\(opaque\);') and kvm_init:
>>              w('\tr = kvm_init_anon_inodes();')
>>              w('\tif (r)')
>>              w('\t\treturn r;\n')
>> +            w('\tr = kvm_init_srcu();')
>> +            w('\tif (r)')
>> +            w('\t\tgoto cleanup_anon_inodes;\n')
>>              w('\tpreempt_notifier_sys_init();')
>>              w('\thrtimer_kallsyms_resolve();\n')
>> -        if match(r'return 0;') and anon_inodes:
>> +        if match(r'return 0;') and kvm_init:
>>              w('\tprintk("loaded kvm module (%s)\\n");\n' % (version,))
>> -        if match(r'return r;') and anon_inodes:
>> -            w('\tkvm_exit_anon_inodes();')
>> +        if match(r'return r;') and kvm_init:
>>              w('\tpreempt_notifier_sys_exit();')
>> -            anon_inodes = False
>> -        if match(r'^void kvm_exit'): anon_inodes_exit = True
>> -        if match(r'\}') and anon_inodes_exit:
>> +            w('\tkvm_exit_srcu();')
>> +            w('cleanup_anon_inodes:')
>>              w('\tkvm_exit_anon_inodes();')
>> +            kvm_init = False
>> +        if match(r'^void kvm_exit'): kvm_exit = True
>> +        if match(r'\}') and kvm_exit:
>>              w('\tpreempt_notifier_sys_exit();')
>> -            anon_inodes_exit = False
>> +            w('\tkvm_exit_srcu();')
>> +            w('\tkvm_exit_anon_inodes();')
>> +            kvm_exit = False
>>          if match(r'^int kvm_arch_init'): kvm_arch_init = True
>>          if match(r'\btsc_khz\b') and kvm_arch_init:
>>              line = sub(r'\btsc_khz\b', 'kvm_tsc_khz', line)
>> -- 
>> 1.6.0.2
>>
> 
> 

Thanks for review!
Jan

[1] http://www.linux-kvm.org/page/Getting_the_kvm_kernel_modules

Attachment: signature.asc
Description: OpenPGP digital signature

Reply via email to