This speeds up smp again, by storing the struct processor
in a percpu area and avoiding an expensive cpu_number every call
of current_processor(), as well as getting the cpu_number by
an offset into the percpu area. Untested on 64 bit
and work remains to use other percpu arrays.
TESTED: (NCPUS=8) -smp 1 boots to login shell ~2x slower than uniprocessor
TESTED: (NCPUS=8) -smp 4 boots to INIT but hangs there
TESTED: (NCPUS=1) uniprocessor is a bit faster than normal
---
i386/Makefrag.am| 2 +
i386/i386/cpu_number.c | 8 +++-
i386/i386/cpu_number.h | 13 --
i386/i386/fpu.c | 2 +-
i386/i386/gdt.c | 20 --
i386/i386/gdt.h | 11 +++---
i386/i386/i386asm.sym | 8 +---
i386/i386/locore.S | 21 ++
i386/i386/mp_desc.c | 3 +-
i386/i386/percpu.c | 30 ++
i386/i386/percpu.h | 88 +
i386/i386/pit.c | 2 +-
i386/i386/spl.S | 16
i386/i386at/model_dep.c | 1 +
kern/cpu_number.h | 3 +-
kern/processor.c| 7 +---
kern/processor.h| 18 +++--
kern/startup.c | 3 +-
x86_64/Makefrag.am | 2 +
x86_64/locore.S | 8 ++--
20 files changed, 206 insertions(+), 60 deletions(-)
create mode 100644 i386/i386/percpu.c
create mode 100644 i386/i386/percpu.h
diff --git a/i386/Makefrag.am b/i386/Makefrag.am
index 274e8695..c1724cea 100644
--- a/i386/Makefrag.am
+++ b/i386/Makefrag.am
@@ -108,6 +108,8 @@ libkernel_a_SOURCES += \
i386/i386/irq.c \
i386/i386/irq.h \
i386/i386/msr.h \
+ i386/i386/percpu.c \
+ i386/i386/percpu.h \
i386/i386/pit.c \
i386/i386/pit.h
diff --git a/i386/i386/cpu_number.c b/i386/i386/cpu_number.c
index ef19e11f..83c86d82 100644
--- a/i386/i386/cpu_number.c
+++ b/i386/i386/cpu_number.c
@@ -20,11 +20,17 @@
#include
#include
#include
+#include
#include
#if NCPUS > 1
-int cpu_number(void)
+inline int cpu_number_slow(void)
{
return cpu_id_lut[apic_get_current_cpu()];
}
+
+inline int cpu_number(void)
+{
+ return percpu_get(int, cpu_id);
+}
#endif
diff --git a/i386/i386/cpu_number.h b/i386/i386/cpu_number.h
index 479a847a..b50ae275 100644
--- a/i386/i386/cpu_number.h
+++ b/i386/i386/cpu_number.h
@@ -30,6 +30,8 @@
#ifndef_I386_CPU_NUMBER_H_
#define_I386_CPU_NUMBER_H_
+#define MY(stm)%gs:PERCPU_##stm
+
#ifNCPUS > 1
#ifdef __i386__
@@ -45,8 +47,8 @@
shrl$24, reg;\
movl%cs:CX(cpu_id_lut, reg), reg;\
-/* Never call CPU_NUMBER(%esi) */
-#define CPU_NUMBER(reg)\
+/* Never call CPU_NUMBER_NO_GS(%esi) */
+#define CPU_NUMBER_NO_GS(reg) \
pushl %esi;\
pushl %eax;\
pushl %ebx;\
@@ -63,14 +65,19 @@
movl%esi, reg ;\
popl%esi;\
+#define CPU_NUMBER(reg)\
+ movlMY(CPU_ID), reg;
+
#ifndef __ASSEMBLER__
#include "kern/cpu_number.h"
-int cpu_number(void);
+inline int cpu_number_slow(void);
+inline int cpu_number(void);
#endif
#else /* NCPUS == 1 */
#defineCPU_NUMBER_NO_STACK(reg)
+#defineCPU_NUMBER_NO_GS(reg)
#defineCPU_NUMBER(reg)
#defineCX(addr,reg)addr
diff --git a/i386/i386/fpu.c b/i386/i386/fpu.c
index fefe5e49..e1818683 100644
--- a/i386/i386/fpu.c
+++ b/i386/i386/fpu.c
@@ -119,7 +119,7 @@ init_fpu(void)
#else /* MACH_RING1 */
unsigned int native = 0;
- if (machine_slot[cpu_number()].cpu_type >= CPU_TYPE_I486)
+ if (machine_slot[cpu_number_slow()].cpu_type >= CPU_TYPE_I486)
native = CR0_NE;
/*
diff --git a/i386/i386/gdt.c b/i386/i386/gdt.c
index ddda603b..524b1e85 100644
--- a/i386/i386/gdt.c
+++ b/i386/i386/gdt.c
@@ -35,6 +35,7 @@
#include
#include
+#include
#include "vm_param.h"
#include "seg.h"
@@ -48,7 +49,7 @@ extern
struct real_descriptor gdt[GDTSZ];
static void
-gdt_fill(struct real_descriptor *mygdt)
+gdt_fill(int cpu, struct real_descriptor *mygdt)
{
/* Initialize the kernel code and data segment descriptors. */
#ifdef __x86_64__
@@ -73,6 +74,16 @@ gdt_fill(struct real_descriptor *mygdt)
0x,
ACC_PL_K|ACC_DATA_W, SZ_32);
#endif /* MACH_PV_DESCRIPTORS */
+ vm_offset_t thiscpu = kvtolin(&percpu_array[cpu]);
+ _fill_gdt_descriptor(mygdt, PERCPU_DS,
+ thiscpu,
+ thiscpu + sizeof(struct percpu) - 1,
+#ifdef __x86_64__
+ ACC_PL_K|ACC_DATA_W, SZ_64
+#else
+ ACC_PL_K|ACC_DATA_W, SZ_32
+#endif
+ );
#endif
#ifdef MACH_PV_DESCRIPTORS
@@ -119,15 +130,16 @@ reload_segs(void)
"movw %w1,%%ds\n"
"movw %w1,%%es\n"
+