As we alloc pages with GFP_KERNEL in init_espfix_ap() which is called
before enabled local irq, the lockdep sub-system would trigger a warning.

So we allocate them on the boot CPU side when the target CPU is bringing
up by the primary CPU, and hand them over to the secondary CPU.

And we use alloc_pages_node() with the secondary CPU's node, to make sure
the espfix stack is NUMA-local to the CPU that is going to use it.

Signed-off-by: Zhu Guihua <[email protected]>
---
 arch/x86/kernel/espfix_64.c | 21 +++++++++++++--------
 arch/x86/kernel/smpboot.c   | 14 +++++++-------
 2 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
index 1fb0e00..ce95676 100644
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@@ -141,12 +141,12 @@ void init_espfix_ap(int cpu)
        pud_t pud, *pud_p;
        pmd_t pmd, *pmd_p;
        pte_t pte, *pte_p;
-       int n;
+       int n, node;
        void *stack_page;
        pteval_t ptemask;
 
        /* We only have to do this once... */
-       if (likely(this_cpu_read(espfix_stack)))
+       if (likely(per_cpu(espfix_stack, cpu)))
                return;         /* Already initialized */
 
        addr = espfix_base_addr(cpu);
@@ -164,12 +164,15 @@ void init_espfix_ap(int cpu)
        if (stack_page)
                goto unlock_done;
 
+       node = cpu_to_node(cpu);
        ptemask = __supported_pte_mask;
 
        pud_p = &espfix_pud_page[pud_index(addr)];
        pud = *pud_p;
        if (!pud_present(pud)) {
-               pmd_p = (pmd_t *)__get_free_page(PGALLOC_GFP);
+               struct page *page = alloc_pages_node(node, PGALLOC_GFP, 0);
+
+               pmd_p = (pmd_t *)page_address(page);
                pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask));
                paravirt_alloc_pmd(&init_mm, __pa(pmd_p) >> PAGE_SHIFT);
                for (n = 0; n < ESPFIX_PUD_CLONES; n++)
@@ -179,7 +182,9 @@ void init_espfix_ap(int cpu)
        pmd_p = pmd_offset(&pud, addr);
        pmd = *pmd_p;
        if (!pmd_present(pmd)) {
-               pte_p = (pte_t *)__get_free_page(PGALLOC_GFP);
+               struct page *page = alloc_pages_node(node, PGALLOC_GFP, 0);
+
+               pte_p = (pte_t *)page_address(page);
                pmd = __pmd(__pa(pte_p) | (PGTABLE_PROT & ptemask));
                paravirt_alloc_pte(&init_mm, __pa(pte_p) >> PAGE_SHIFT);
                for (n = 0; n < ESPFIX_PMD_CLONES; n++)
@@ -187,7 +192,7 @@ void init_espfix_ap(int cpu)
        }
 
        pte_p = pte_offset_kernel(&pmd, addr);
-       stack_page = (void *)__get_free_page(GFP_KERNEL);
+       stack_page = page_address(alloc_pages_node(node, GFP_KERNEL, 0));
        pte = __pte(__pa(stack_page) | (__PAGE_KERNEL_RO & ptemask));
        for (n = 0; n < ESPFIX_PTE_CLONES; n++)
                set_pte(&pte_p[n*PTE_STRIDE], pte);
@@ -198,7 +203,7 @@ void init_espfix_ap(int cpu)
 unlock_done:
        mutex_unlock(&espfix_init_mutex);
 done:
-       this_cpu_write(espfix_stack, addr);
-       this_cpu_write(espfix_waddr, (unsigned long)stack_page
-                      + (addr & ~PAGE_MASK));
+       per_cpu(espfix_stack, cpu) = addr;
+       per_cpu(espfix_waddr, cpu) = (unsigned long)stack_page
+                                     + (addr & ~PAGE_MASK);
 }
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 6f5abac..0bd8c1d 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -239,13 +239,6 @@ static void notrace start_secondary(void *unused)
        check_tsc_sync_target();
 
        /*
-        * Enable the espfix hack for this CPU
-        */
-#ifdef CONFIG_X86_ESPFIX64
-       init_espfix_ap(smp_processor_id());
-#endif
-
-       /*
         * We need to hold vector_lock so there the set of online cpus
         * does not change while we are assigning vectors to cpus.  Holding
         * this lock ensures we don't half assign or remove an irq from a cpu.
@@ -854,6 +847,13 @@ static int do_boot_cpu(int apicid, int cpu, struct 
task_struct *idle)
        initial_code = (unsigned long)start_secondary;
        stack_start  = idle->thread.sp;
 
+       /*
+        * Enable the espfix hack for this CPU
+       */
+#ifdef CONFIG_X86_ESPFIX64
+       init_espfix_ap(cpu);
+#endif
+
        /* So we see what's up */
        announce_cpu(cpu, apicid);
 
-- 
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to