irq: Fix a race condition between vector assigning and cleanup

tip-bot for Jiang Liu Thu, 10 Dec 2015 10:42:26 -0800

Commit-ID:  41c7518a5d14543fa4aa1b5b9994ac26b38c0406
Gitweb:     http://git.kernel.org/tip/41c7518a5d14543fa4aa1b5b9994ac26b38c0406
Author:     Jiang Liu <jiang....@linux.intel.com>
AuthorDate: Mon, 30 Nov 2015 16:09:29 +0800
Committer:  Thomas Gleixner <t...@linutronix.de>
CommitDate: Thu, 10 Dec 2015 19:32:07 +0100


x86/irq: Fix a race condition between vector assigning and cleanup

Joe Lawrence reported an use after release issue related to x86 IRQ
management code. Please refer to the following link for more
information: http://lkml.kernel.org/r/5653b688.4050...@stratus.com

Thomas pointed out that it's caused by a race condition between
__assign_irq_vector() and __send_cleanup_vector(). Based on Thomas'
draft patch, we solve this race condition by:
1) Use move_in_progress to signal that an IRQ cleanup IPI is needed
2) Use old_domain to save old CPU mask for IRQ cleanup
3) Use vector to protect move_in_progress and old_domain

This bugfix patch also helps to get rid of that atomic allocation in
__send_cleanup_vector().

Fixes: a782a7e46bb5 "x86/irq: Store irq descriptor in vector array"
Reported-and-tested-by: Joe Lawrence <joe.lawre...@stratus.com>
Signed-off-by: Jiang Liu <jiang....@linux.intel.com>
Cc: sta...@vger.kernel.org
Link: 
http://lkml.kernel.org/r/1448870970-1461-4-git-send-email-jiang....@linux.intel.com
Signed-off-by: Thomas Gleixner <t...@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 77 +++++++++++++++++++------------------------
 1 file changed, 34 insertions(+), 43 deletions(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 57934ef..b63d6f8 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -117,9 +117,9 @@ static int __assign_irq_vector(int irq, struct 
apic_chip_data *d,
        static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
        static int current_offset = VECTOR_OFFSET_START % 16;
        int cpu, err;
-       unsigned int dest = d->cfg.dest_apicid;
+       unsigned int dest;
 
-       if (d->move_in_progress)
+       if (cpumask_intersects(d->old_domain, cpu_online_mask))
                return -EBUSY;
 
        /* Only try and allocate irqs on cpus that are present */
@@ -144,13 +144,12 @@ static int __assign_irq_vector(int irq, struct 
apic_chip_data *d,
                        cpumask_and(used_cpumask, d->domain, vector_cpumask);
                        err = apic->cpu_mask_to_apicid_and(mask, used_cpumask,
                                                           &dest);
-                       if (err)
-                               break;
-                       cpumask_andnot(d->old_domain, d->domain,
-                                      vector_cpumask);
-                       d->move_in_progress =
-                          cpumask_intersects(d->old_domain, cpu_online_mask);
-                       cpumask_copy(d->domain, used_cpumask);
+                       if (!err) {
+                               cpumask_andnot(d->old_domain, d->domain,
+                                              vector_cpumask);
+                               cpumask_copy(d->domain, used_cpumask);
+                               d->cfg.dest_apicid = dest;
+                       }
                        break;
                }
 
@@ -183,14 +182,12 @@ next:
                /* Found one! */
                current_vector = vector;
                current_offset = offset;
-               if (d->cfg.vector) {
+               if (d->cfg.vector)
                        cpumask_copy(d->old_domain, d->domain);
-                       d->move_in_progress =
-                          cpumask_intersects(d->old_domain, cpu_online_mask);
-               }
+               d->cfg.vector = vector;
+               d->cfg.dest_apicid = dest;
                for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask)
                        per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq);
-               d->cfg.vector = vector;
                cpumask_copy(d->domain, vector_cpumask);
                err = 0;
                break;
@@ -198,7 +195,8 @@ next:
 
        if (!err) {
                /* cache destination APIC IDs into cfg->dest_apicid */
-               d->cfg.dest_apicid = dest;
+               cpumask_and(d->old_domain, d->old_domain, cpu_online_mask);
+               d->move_in_progress = !cpumask_empty(d->old_domain);
        }
 
        return err;
@@ -230,7 +228,7 @@ static int assign_irq_vector_policy(int irq, int node,
 
 static void clear_irq_vector(int irq, struct apic_chip_data *data)
 {
-       struct irq_desc *desc;
+       struct irq_desc *desc = irq_to_desc(irq);
        int cpu, vector = data->cfg.vector;
 
        BUG_ON(!vector);
@@ -239,10 +237,6 @@ static void clear_irq_vector(int irq, struct 
apic_chip_data *data)
        data->cfg.vector = 0;
        cpumask_clear(data->domain);
 
-       if (likely(!data->move_in_progress))
-               return;
-
-       desc = irq_to_desc(irq);
        for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) {
                for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
                     vector++) {
@@ -424,10 +418,13 @@ static void __setup_vector_irq(int cpu)
                struct irq_data *idata = irq_desc_get_irq_data(desc);
 
                data = apic_chip_data(idata);
-               if (!data || !cpumask_test_cpu(cpu, data->domain))
-                       continue;
-               vector = data->cfg.vector;
-               per_cpu(vector_irq, cpu)[vector] = desc;
+               if (data) {
+                       cpumask_clear_cpu(cpu, data->old_domain);
+                       if (cpumask_test_cpu(cpu, data->domain)) {
+                               vector = data->cfg.vector;
+                               per_cpu(vector_irq, cpu)[vector] = desc;
+                       }
+               }
        }
        /* Mark the free vectors */
        for (vector = 0; vector < NR_VECTORS; ++vector) {
@@ -509,20 +506,17 @@ static struct irq_chip lapic_controller = {
 #ifdef CONFIG_SMP
 static void __send_cleanup_vector(struct apic_chip_data *data)
 {
-       cpumask_var_t cleanup_mask;
-
-       if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
-               unsigned int i;
+       unsigned long flags;
 
-               for_each_cpu_and(i, data->old_domain, cpu_online_mask)
-                       apic->send_IPI_mask(cpumask_of(i),
-                                           IRQ_MOVE_CLEANUP_VECTOR);
-       } else {
-               cpumask_and(cleanup_mask, data->old_domain, cpu_online_mask);
-               apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-               free_cpumask_var(cleanup_mask);
-       }
+       raw_spin_lock_irqsave(&vector_lock, flags);
+       if (!data->move_in_progress)
+               goto out_unlock;
        data->move_in_progress = 0;
+       cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
+       if (!cpumask_empty(data->old_domain))
+               apic->send_IPI_mask(data->old_domain, IRQ_MOVE_CLEANUP_VECTOR);
+out_unlock:
+       raw_spin_unlock_irqrestore(&vector_lock, flags);
 }
 
 void send_cleanup_vector(struct irq_cfg *cfg)
@@ -566,14 +560,10 @@ asmlinkage __visible void 
smp_irq_move_cleanup_interrupt(void)
                        goto unlock;
 
                /*
-                * Check if the irq migration is in progress. If so, we
-                * haven't received the cleanup request yet for this irq.
+                * Nothing to cleanup if this cpu is not set
+                * in the old_domain mask.
                 */
-               if (data->move_in_progress)
-                       goto unlock;
-
-               if (vector == data->cfg.vector &&
-                   cpumask_test_cpu(me, data->domain))
+               if (!cpumask_test_cpu(me, data->old_domain))
                        goto unlock;
 
                irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
@@ -589,6 +579,7 @@ asmlinkage __visible void 
smp_irq_move_cleanup_interrupt(void)
                        goto unlock;
                }
                __this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
+               cpumask_clear_cpu(me, data->old_domain);
 unlock:
                raw_spin_unlock(&desc->lock);
        }
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[tip:x86/urgent] x86/irq: Fix a race condition between vector assigning and cleanup

Reply via email to