On Thu, 2021-01-21 at 15:42 +0000, David Woodhouse wrote:
> [    2.289283] BUG: kernel NULL pointer dereference, address: 0000000000000000
> [    2.289283] #PF: supervisor write access in kernel mode
> [    2.289283] #PF: error_code(0x0002) - not-present page
> [    2.289283] PGD 0 P4D 0 
> [    2.289283] Oops: 0002 [#1] SMP PTI
> [    2.289283] CPU: 32 PID: 0 Comm: swapper/32 Not tainted 5.10.0+ #745
> [    2.289283] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 
> 1.14.0-1.fc33 04/01/2014
> [    2.289283] RIP: 0010:init_x2apic_ldr+0xa0/0xb0


OK... in alloc_clustermask() for each CPU we were preallocating a
cluster_mask and storing it in the global cluster_hotplug_mask.

Then later for each CPU we were taking the preallocated cluster_mask
and setting cluster_hotplug_mask to NULL.

That doesn't parallelise well :)

So... ditch the global variable, let alloc_clustermask() install the
appropriate cluster_mask *directly* into the target CPU's per_cpu data
before it's running. And since we have to calculate the logical APIC ID
for the cluster ID, we might as well set x86_cpu_to_logical_apicid at
the same time.

Now all that init_x2apic_ldr() actually *does* on the target CPU is set
that CPU's bit in the pre-existing cluster_mask.

To reduce the number of loops over all (present or online) CPUs, I've
made it set the per_cpu cluster_mask for *all* CPUs in the cluster in
one pass at boot time. I think the case for later hotplug is also sane;
will have to test that.

But it passes that qemu boot test it was failing earlier, at least...
 
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c 
b/arch/x86/kernel/apic/x2apic_cluster.c
index b0889c48a2ac..74bb4cae8b5b 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -18,7 +18,6 @@ struct cluster_mask {
 static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
 static DEFINE_PER_CPU(cpumask_var_t, ipi_mask);
 static DEFINE_PER_CPU(struct cluster_mask *, cluster_masks);
-static struct cluster_mask *cluster_hotplug_mask;
 
 static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 {
@@ -98,54 +97,61 @@ static u32 x2apic_calc_apicid(unsigned int cpu)
 static void init_x2apic_ldr(void)
 {
        struct cluster_mask *cmsk = this_cpu_read(cluster_masks);
-       u32 cluster, apicid = apic_read(APIC_LDR);
-       unsigned int cpu;
 
-       this_cpu_write(x86_cpu_to_logical_apicid, apicid);
+       BUG_ON(!cmsk);
 
-       if (cmsk)
-               goto update;
-
-       cluster = apicid >> 16;
-       for_each_online_cpu(cpu) {
-               cmsk = per_cpu(cluster_masks, cpu);
-               /* Matching cluster found. Link and update it. */
-               if (cmsk && cmsk->clusterid == cluster)
-                       goto update;
-       }
-       cmsk = cluster_hotplug_mask;
-       cmsk->clusterid = cluster;
-       cluster_hotplug_mask = NULL;
-update:
-       this_cpu_write(cluster_masks, cmsk);
        cpumask_set_cpu(smp_processor_id(), &cmsk->mask);
 }
 
-static int alloc_clustermask(unsigned int cpu, int node)
+static int alloc_clustermask(unsigned int cpu, u32 cluster, int node)
 {
+       struct cluster_mask *cmsk = NULL;
+       u32 apicid;
+
        if (per_cpu(cluster_masks, cpu))
                return 0;
-       /*
-        * If a hotplug spare mask exists, check whether it's on the right
-        * node. If not, free it and allocate a new one.
+
+       /* For the hotplug case, don't always allocate a new one */
+       for_each_online_cpu(cpu) {
+               apicid = apic->cpu_present_to_apicid(cpu);
+               if (apicid != BAD_APICID && apicid >> 4 == cluster) {
+                       cmsk = per_cpu(cluster_masks, cpu);
+                       if (cmsk)
+                               break;
+               }
+       }
+       if (!cmsk)
+               cmsk = kzalloc_node(sizeof(*cmsk), GFP_KERNEL, node);
+       if (!cmsk)
+               return -ENOMEM;
+
+       cmsk->node = node;
+       cmsk->clusterid = cluster;
+
+        /*
+        * As an optimisation during boot, set the cluster_mask for *all*
+        * present CPUs at once, which will include 'cpu'.
         */
-       if (cluster_hotplug_mask) {
-               if (cluster_hotplug_mask->node == node)
-                       return 0;
-               kfree(cluster_hotplug_mask);
+       if (system_state < SYSTEM_RUNNING) {
+               for_each_present_cpu(cpu) {
+                       u32 apicid = apic->cpu_present_to_apicid(cpu);
+                       if (apicid != BAD_APICID && apicid >> 4 == cluster)
+                               per_cpu(cluster_masks, cpu) = cmsk;
+               }
        }
 
-       cluster_hotplug_mask = kzalloc_node(sizeof(*cluster_hotplug_mask),
-                                           GFP_KERNEL, node);
-       if (!cluster_hotplug_mask)
-               return -ENOMEM;
-       cluster_hotplug_mask->node = node;
        return 0;
 }
 
 static int x2apic_prepare_cpu(unsigned int cpu)
 {
-       if (alloc_clustermask(cpu, cpu_to_node(cpu)) < 0)
+       u32 phys_apicid = apic->cpu_present_to_apicid(cpu);
+       u32 cluster = phys_apicid >> 4;
+       u32 logical_apicid = (cluster << 16) | (1 << (phys_apicid & 0xf));
+
+       per_cpu(x86_cpu_to_logical_apicid, cpu) = logical_apicid;
+
+       if (alloc_clustermask(cpu, cluster, cpu_to_node(cpu)) < 0)
                return -ENOMEM;
        if (!zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL))
                return -ENOMEM;

Attachment: smime.p7s
Description: S/MIME cryptographic signature

Reply via email to