Module Name: src Committed By: cherry Date: Sat Aug 13 12:37:30 UTC 2011
Modified Files: src/sys/arch/x86/include: cpuvar.h src/sys/arch/xen/x86: cpu.c src/sys/arch/xen/xen: hypervisor.c Log Message: MP probing and startup code To generate a diff of this commit: cvs rdiff -u -r1.44 -r1.45 src/sys/arch/x86/include/cpuvar.h cvs rdiff -u -r1.61 -r1.62 src/sys/arch/xen/x86/cpu.c cvs rdiff -u -r1.55 -r1.56 src/sys/arch/xen/xen/hypervisor.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/x86/include/cpuvar.h diff -u src/sys/arch/x86/include/cpuvar.h:1.44 src/sys/arch/x86/include/cpuvar.h:1.45 --- src/sys/arch/x86/include/cpuvar.h:1.44 Sun Jun 12 03:35:50 2011 +++ src/sys/arch/x86/include/cpuvar.h Sat Aug 13 12:37:30 2011 @@ -1,4 +1,4 @@ -/* $NetBSD: cpuvar.h,v 1.44 2011/06/12 03:35:50 rmind Exp $ */ +/* $NetBSD: cpuvar.h,v 1.45 2011/08/13 12:37:30 cherry Exp $ */ /*- * Copyright (c) 2000, 2007 The NetBSD Foundation, Inc. @@ -67,7 +67,11 @@ #define _X86_CPUVAR_H_ struct cpu_functions { +#ifndef XEN int (*start)(struct cpu_info *, paddr_t); +#else /* XEN */ + int (*start)(struct cpu_info *, vaddr_t); +#endif /* XEN */ int (*stop)(struct cpu_info *); void (*cleanup)(struct cpu_info *); }; Index: src/sys/arch/xen/x86/cpu.c diff -u src/sys/arch/xen/x86/cpu.c:1.61 src/sys/arch/xen/x86/cpu.c:1.62 --- src/sys/arch/xen/x86/cpu.c:1.61 Thu Aug 11 18:11:17 2011 +++ src/sys/arch/xen/x86/cpu.c Sat Aug 13 12:37:30 2011 @@ -1,4 +1,4 @@ -/* $NetBSD: cpu.c,v 1.61 2011/08/11 18:11:17 cherry Exp $ */ +/* $NetBSD: cpu.c,v 1.62 2011/08/13 12:37:30 cherry Exp $ */ /* NetBSD: cpu.c,v 1.18 2004/02/20 17:35:01 yamt Exp */ /*- @@ -66,7 +66,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.61 2011/08/11 18:11:17 cherry Exp $"); +__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.62 2011/08/13 12:37:30 cherry Exp $"); #include "opt_ddb.h" #include "opt_multiprocessor.h" @@ -85,6 +85,7 @@ #include <sys/cpu.h> #include <sys/atomic.h> #include <sys/reboot.h> +#include <sys/idle.h> #include <uvm/uvm.h> @@ -100,6 +101,14 @@ #include <machine/mtrr.h> #include <machine/pio.h> +#ifdef i386 +#include <machine/npx.h> +#else +#include <machine/fpu.h> +#endif + +#include <xen/xen.h> +#include <xen/xen3-public/vcpu.h> #include <xen/vcpuvar.h> #if NLAPIC > 0 @@ -131,7 +140,7 @@ bool sc_wasonline; }; -int mp_cpu_start(struct cpu_info *, paddr_t); +int mp_cpu_start(struct cpu_info *, vaddr_t); void mp_cpu_start_cleanup(struct cpu_info *); const struct cpu_functions mp_cpu_funcs = { mp_cpu_start, NULL, mp_cpu_start_cleanup }; @@ -171,8 +180,6 @@ struct cpu_info *cpu_info_list = &cpu_info_primary; struct cpu_info *phycpu_info_list = &phycpu_info_primary; -static void cpu_set_tss_gates(struct cpu_info *ci); - uint32_t cpus_attached = 1; uint32_t cpus_running = 1; @@ -194,21 +201,6 @@ void cpu_hatch(void *); static void cpu_boot_secondary(struct cpu_info *ci); static void cpu_start_secondary(struct cpu_info *ci); -static void cpu_copy_trampoline(void); - -/* - * Runs once per boot once multiprocessor goo has been detected and - * the local APIC on the boot processor has been mapped. - * - * Called from lapic_boot_init() (from mpbios_scan()). - */ -void -cpu_init_first(void) -{ - - cpu_info_primary.ci_cpuid = lapic_cpu_number(); - cpu_copy_trampoline(); -} #endif /* MULTIPROCESSOR */ static int @@ -317,9 +309,23 @@ vcpu_match(device_t parent, cfdata_t match, void *aux) { struct vcpu_attach_args *vcaa = aux; + struct vcpu_runstate_info vcr; + int error; + + if (strcmp(vcaa->vcaa_name, match->cf_name) == 0) { + error = HYPERVISOR_vcpu_op(VCPUOP_get_runstate_info, + vcaa->vcaa_caa.cpu_number, + &vcr); + switch (error) { + case 0: + return 1; + case -ENOENT: + return 0; + default: + panic("Unknown hypervisor error %d returned on vcpu runstate probe\n", error); + } + } - if (strcmp(vcaa->vcaa_name, match->cf_name) == 0) - return 1; return 0; } @@ -328,9 +334,18 @@ { struct vcpu_attach_args *vcaa = aux; + KASSERT(vcaa->vcaa_caa.cpu_func == NULL); + vcaa->vcaa_caa.cpu_func = &mp_cpu_funcs; cpu_attach_common(parent, self, &vcaa->vcaa_caa); } +static int +vcpu_is_up(struct cpu_info *ci) +{ + KASSERT(ci != NULL); + return HYPERVISOR_vcpu_op(VCPUOP_is_up, ci->ci_cpuid, NULL); +} + static void cpu_vm_init(struct cpu_info *ci) { @@ -395,22 +410,6 @@ aprint_naive(": %s Processor\n", caa->cpu_role == CPU_ROLE_SP ? "Single" : "Boot"); ci = &cpu_info_primary; -#if NLAPIC > 0 - if (cpunum != lapic_cpu_number()) { - /* XXX should be done earlier */ - uint32_t reg; - aprint_verbose("\n"); - aprint_verbose_dev(self, "running CPU at apic %d" - " instead of at expected %d", lapic_cpu_number(), - cpunum); - reg = i82489_readreg(LAPIC_ID); - i82489_writereg(LAPIC_ID, (reg & ~LAPIC_ID_MASK) | - (cpunum << LAPIC_ID_SHIFT)); - } - if (cpunum != lapic_cpu_number()) { - aprint_error_dev(self, "unable to reset apic id\n"); - } -#endif } ci->ci_self = ci; @@ -421,6 +420,7 @@ KASSERT(HYPERVISOR_shared_info != NULL); ci->ci_vcpu = &HYPERVISOR_shared_info->vcpu_info[cpunum]; + KASSERT(ci->ci_func == 0); ci->ci_func = caa->cpu_func; /* Must be called before mi_cpu_attach(). */ @@ -430,12 +430,15 @@ int error; error = mi_cpu_attach(ci); + + KASSERT(ci->ci_data.cpu_idlelwp != NULL); if (error != 0) { aprint_normal("\n"); aprint_error_dev(self, "mi_cpu_attach failed with %d\n", error); return; } + } else { KASSERT(ci->ci_data.cpu_idlelwp != NULL); } @@ -455,15 +458,12 @@ cpu_intr_init(ci); cpu_get_tsc_freq(ci); cpu_init(ci); - cpu_set_tss_gates(ci); -#if NLAPIC > 0 - if (caa->cpu_role != CPU_ROLE_SP) { - /* Enable lapic. */ - lapic_enable(); - lapic_set_lvt(); - lapic_calibrate_timer(); - } -#endif + pmap_cpu_init_late(ci); /* XXX: cosmetic */ + + /* Every processor needs to init it's own ipi h/w (similar to lapic) */ + xen_ipi_init(); + /* XXX: clock_init() */ + /* Make sure DELAY() is initialized. */ DELAY(1); again = true; @@ -479,6 +479,7 @@ x86_errata(); #endif x86_cpu_idle_init(); + break; case CPU_ROLE_BP: @@ -489,22 +490,30 @@ x86_errata(); #endif x86_cpu_idle_init(); + break; case CPU_ROLE_AP: + atomic_or_32(&ci->ci_flags, CPUF_AP); + /* * report on an AP */ #if defined(MULTIPROCESSOR) + /* interrupt handler stack */ cpu_intr_init(ci); + + /* Setup per-cpu memory for gdt */ gdt_alloc_cpu(ci); - cpu_set_tss_gates(ci); + + pmap_cpu_init_late(ci); cpu_start_secondary(ci); + if (ci->ci_flags & CPUF_PRESENT) { struct cpu_info *tmp; - identifycpu(ci); + cpu_identify(ci); tmp = cpu_info_list; while (tmp->ci_next) tmp = tmp->ci_next; @@ -512,7 +521,7 @@ tmp->ci_next = ci; } #else - aprint_error_dev(self, "not started\n"); + aprint_error(": not started\n"); #endif break; @@ -529,7 +538,7 @@ aprint_error_dev(self, "couldn't establish power handler\n"); #endif -#if defined(MULTIPROCESSOR) +#ifdef MPVERBOSE if (mp_verbose) { struct lwp *l = ci->ci_data.cpu_idlelwp; struct pcb *pcb = lwp_getpcb(l); @@ -539,13 +548,13 @@ l, #ifdef i386 (void *)pcb->pcb_esp -#else +#else /* i386 */ (void *)pcb->pcb_rsp -#endif +#endif /* i386 */ ); } -#endif +#endif /* MPVERBOSE */ } /* @@ -593,16 +602,18 @@ atomic_or_32(&cpus_running, ci->ci_cpumask); atomic_or_32(&ci->ci_flags, CPUF_RUNNING); + + /* XXX: register vcpu_register_runstate_memory_area, and figure out how to make sure this VCPU is running ? */ } #ifdef MULTIPROCESSOR + void cpu_boot_secondary_processors(void) { struct cpu_info *ci; u_long i; - for (i = 0; i < maxcpus; i++) { ci = cpu_lookup(i); if (ci == NULL) @@ -646,36 +657,23 @@ } } -void +static void cpu_start_secondary(struct cpu_info *ci) { int i; - struct pmap *kpm = pmap_kernel(); - extern uint32_t mp_pdirpa; - - mp_pdirpa = kpm->pm_pdirpa; /* XXX move elsewhere, not per CPU. */ - - atomic_or_32(&ci->ci_flags, CPUF_AP); aprint_debug_dev(ci->ci_dev, "starting\n"); ci->ci_curlwp = ci->ci_data.cpu_idlelwp; - if (CPU_STARTUP(ci, mp_trampoline_paddr) != 0) + + if (CPU_STARTUP(ci, (vaddr_t) cpu_hatch) != 0) { return; + } /* * wait for it to become ready */ for (i = 100000; (!(ci->ci_flags & CPUF_PRESENT)) && i > 0; i--) { -#ifdef MPDEBUG - extern int cpu_trace[3]; - static int otrace[3]; - if (memcmp(otrace, cpu_trace, sizeof(otrace)) != 0) { - aprint_debug_dev(ci->ci_dev, "trace %02x %02x %02x\n", - cpu_trace[0], cpu_trace[1], cpu_trace[2]); - memcpy(otrace, cpu_trace, sizeof(otrace)); - } -#endif delay(10); } if ((ci->ci_flags & CPUF_PRESENT) == 0) { @@ -693,7 +691,6 @@ cpu_boot_secondary(struct cpu_info *ci) { int i; - atomic_or_32(&ci->ci_flags, CPUF_GO); for (i = 100000; (!(ci->ci_flags & CPUF_RUNNING)) && i > 0; i--) { delay(10); @@ -708,13 +705,13 @@ } /* - * The CPU ends up here when its ready to run - * This is called from code in mptramp.s; at this point, we are running - * in the idle pcb/idle stack of the new CPU. When this function returns, - * this processor will enter the idle loop and start looking for work. - * - * XXX should share some of this with init386 in machdep.c + * APs end up here immediately after initialisation and VCPUOP_up in + * mp_cpu_start(). + * At this point, we are running in the idle pcb/idle stack of the new + * CPU. This function jumps to the idle loop and starts looking for + * work. */ +extern void x86_64_tls_switch(struct lwp *); void cpu_hatch(void *v) { @@ -722,15 +719,15 @@ struct pcb *pcb; int s, i; - cpu_probe(ci); - - cpu_feature[0] &= ~CPUID_FEAT_BLACKLIST; - cpu_feature[2] &= ~CPUID_FEAT_EXT_BLACKLIST; + /* Setup TLS and kernel GS/FS */ + cpu_init_msrs(ci, true); + cpu_init_idt(); + gdt_init_cpu(ci); - cpu_init_msrs(ci, true); + cpu_probe(ci); - KDASSERT((ci->ci_flags & CPUF_PRESENT) == 0); atomic_or_32(&ci->ci_flags, CPUF_PRESENT); + while ((ci->ci_flags & CPUF_GO) == 0) { /* Don't use delay, boot CPU may be patching the text. */ for (i = 10000; i != 0; i--) @@ -738,50 +735,43 @@ } /* Because the text may have been patched in x86_patch(). */ - wbinvd(); x86_flush(); tlbflushg(); KASSERT((ci->ci_flags & CPUF_RUNNING) == 0); pcb = lwp_getpcb(curlwp); - lcr3(pmap_kernel()->pm_pdirpa); - pcb->pcb_cr3 = pmap_kernel()->pm_pdirpa; + pcb->pcb_cr3 = pmap_pdirpa(pmap_kernel(), 0); /* XXX: consider using pmap_load() ? */ pcb = lwp_getpcb(ci->ci_data.cpu_idlelwp); - lcr0(pcb->pcb_cr0); - cpu_init_idt(); - gdt_init_cpu(ci); - lapic_enable(); - lapic_set_lvt(); - lapic_initclocks(); + xen_ipi_init(); -#ifdef i386 - npxinit(ci); -#else + xen_initclocks(); + + /* XXX: lapic_initclocks(); */ + +#ifdef __x86_64__ fpuinit(ci); #endif lldt(GSEL(GLDT_SEL, SEL_KPL)); - ltr(ci->ci_tss_sel); cpu_init(ci); cpu_get_tsc_freq(ci); s = splhigh(); -#ifdef i386 - lapic_tpr = 0; -#else - lcr8(0); -#endif x86_enable_intr(); splx(s); #if 0 x86_errata(); #endif - aprint_debug_dev(ci->ci_dev, "CPU %ld running\n", - (long)ci->ci_cpuid); + aprint_debug_dev(ci->ci_dev, "running\n"); + + cpu_switchto(NULL, ci->ci_data.cpu_idlelwp, true); + + panic("switch to idle_loop context returned!\n"); + /* NOTREACHED */ } #if defined(DDB) @@ -811,189 +801,282 @@ } #endif /* DDB */ -static void -cpu_copy_trampoline(void) -{ - /* - * Copy boot code. - */ - extern u_char cpu_spinup_trampoline[]; - extern u_char cpu_spinup_trampoline_end[]; - - vaddr_t mp_trampoline_vaddr; - - mp_trampoline_vaddr = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, - UVM_KMF_VAONLY); +#endif /* MULTIPROCESSOR */ - pmap_kenter_pa(mp_trampoline_vaddr, mp_trampoline_paddr, - VM_PROT_READ | VM_PROT_WRITE, 0); - pmap_update(pmap_kernel()); - memcpy((void *)mp_trampoline_vaddr, - cpu_spinup_trampoline, - cpu_spinup_trampoline_end - cpu_spinup_trampoline); - - pmap_kremove(mp_trampoline_vaddr, PAGE_SIZE); - pmap_update(pmap_kernel()); - uvm_km_free(kernel_map, mp_trampoline_vaddr, PAGE_SIZE, UVM_KMF_VAONLY); -} +extern void hypervisor_callback(void); +extern void failsafe_callback(void); +#ifdef __x86_64__ +typedef void (vector)(void); +extern vector Xsyscall, Xsyscall32; +#endif -#endif /* MULTIPROCESSOR */ +/* + * Setup the "trampoline". On Xen, we setup nearly all cpu context + * outside a trampoline, so we prototype and call targetip like so: + * void targetip(struct cpu_info *); + */ -#ifdef i386 -#if 0 static void -tss_init(struct i386tss *tss, void *stack, void *func) +gdt_prepframes(paddr_t *frames, vaddr_t base, uint32_t entries) { - memset(tss, 0, sizeof *tss); - tss->tss_esp0 = tss->tss_esp = (int)((char *)stack + USPACE - 16); - tss->tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); - tss->__tss_cs = GSEL(GCODE_SEL, SEL_KPL); - tss->tss_fs = GSEL(GCPU_SEL, SEL_KPL); - tss->tss_gs = tss->__tss_es = tss->__tss_ds = - tss->__tss_ss = GSEL(GDATA_SEL, SEL_KPL); - tss->tss_cr3 = pmap_kernel()->pm_pdirpa; - tss->tss_esp = (int)((char *)stack + USPACE - 16); - tss->tss_ldt = GSEL(GLDT_SEL, SEL_KPL); - tss->__tss_eflags = PSL_MBO | PSL_NT; /* XXX not needed? */ - tss->__tss_eip = (int)func; + int i; + for (i = 0; i < roundup(entries, PAGE_SIZE) >> PAGE_SHIFT; i++) { + + frames[i] = ((paddr_t) xpmap_ptetomach( + (pt_entry_t *) (base + (i << PAGE_SHIFT)))) + >> PAGE_SHIFT; + + /* Mark Read-only */ + pmap_pte_clearbits(kvtopte(base + (i << PAGE_SHIFT)), + PG_RW); + } } -#endif -/* XXX */ -#define IDTVEC(name) __CONCAT(X, name) -typedef void (vector)(void); -extern vector IDTVEC(tss_trap08); -#ifdef DDB -extern vector Xintrddbipi; -extern int ddb_vec; -#endif +#ifdef __x86_64__ +extern char *ldtstore; /* XXX: Xen MP todo */ static void -cpu_set_tss_gates(struct cpu_info *ci) -{ -#if 0 - struct segment_descriptor sd; +xen_init_amd64_vcpuctxt(struct cpu_info *ci, + struct vcpu_guest_context *initctx, + void targetrip(struct cpu_info *)) +{ + /* page frames to point at GDT */ + extern int gdt_size; + paddr_t frames[16]; + psize_t gdt_ents; - ci->ci_doubleflt_stack = (char *)uvm_km_alloc(kernel_map, USPACE, 0, - UVM_KMF_WIRED); - tss_init(&ci->ci_doubleflt_tss, ci->ci_doubleflt_stack, - IDTVEC(tss_trap08)); - setsegment(&sd, &ci->ci_doubleflt_tss, sizeof(struct i386tss) - 1, - SDT_SYS386TSS, SEL_KPL, 0, 0); - ci->ci_gdt[GTRAPTSS_SEL].sd = sd; - setgate(&idt[8], NULL, 0, SDT_SYSTASKGT, SEL_KPL, - GSEL(GTRAPTSS_SEL, SEL_KPL)); -#endif + struct lwp *l; + struct pcb *pcb; -#if defined(DDB) && defined(MULTIPROCESSOR) - /* - * Set up separate handler for the DDB IPI, so that it doesn't - * stomp on a possibly corrupted stack. - * - * XXX overwriting the gate set in db_machine_init. - * Should rearrange the code so that it's set only once. + volatile struct vcpu_info *vci; + + KASSERT(ci != NULL); + KASSERT(ci != &cpu_info_primary); + KASSERT(initctx != NULL); + KASSERT(targetrip != NULL); + + memset(initctx, 0, sizeof *initctx); + + gdt_ents = roundup(gdt_size, PAGE_SIZE) >> PAGE_SHIFT; /* XXX: re-investigate roundup(gdt_size... ) for gdt_ents. */ + KASSERT(gdt_ents <= 16); + + gdt_prepframes(frames, (vaddr_t) ci->ci_gdt, gdt_ents); + + /* XXX: The stuff in here is amd64 specific. move to mptramp.[Sc] ? */ + + /* Initialise the vcpu context: We use idle_loop()'s pcb context. */ + + l = ci->ci_data.cpu_idlelwp; + + KASSERT(l != NULL); + pcb = lwp_getpcb(l); + KASSERT(pcb != NULL); + + /* resume with interrupts off */ + vci = ci->ci_vcpu; + vci->evtchn_upcall_mask = 1; + xen_mb(); + + /* resume in kernel-mode */ + initctx->flags = VGCF_in_kernel | VGCF_online; + + /* Stack and entry points: + * We arrange for the stack frame for cpu_hatch() to + * appear as a callee frame of lwp_trampoline(). Being a + * leaf frame prevents trampling on any of the MD stack setup + * that x86/vm_machdep.c:cpu_lwp_fork() does for idle_loop() */ - ci->ci_ddbipi_stack = (char *)uvm_km_alloc(kernel_map, USPACE, 0, - UVM_KMF_WIRED); - tss_init(&ci->ci_ddbipi_tss, ci->ci_ddbipi_stack, - Xintrddbipi); - - setsegment(&sd, &ci->ci_ddbipi_tss, sizeof(struct i386tss) - 1, - SDT_SYS386TSS, SEL_KPL, 0, 0); - ci->ci_gdt[GIPITSS_SEL].sd = sd; - setgate(&idt[ddb_vec], NULL, 0, SDT_SYSTASKGT, SEL_KPL, - GSEL(GIPITSS_SEL, SEL_KPL)); -#endif + initctx->user_regs.rdi = (uint64_t) ci; /* targetrip(ci); */ + initctx->user_regs.rip = (vaddr_t) targetrip; + + initctx->user_regs.cs = GSEL(GCODE_SEL, SEL_KPL); + + initctx->user_regs.rflags = pcb->pcb_flags; + initctx->user_regs.rsp = pcb->pcb_rsp; + + /* Data segments */ + initctx->user_regs.ss = GSEL(GDATA_SEL, SEL_KPL); + initctx->user_regs.es = GSEL(GDATA_SEL, SEL_KPL); + initctx->user_regs.ds = GSEL(GDATA_SEL, SEL_KPL); + + /* GDT */ + memcpy(initctx->gdt_frames, frames, sizeof frames); + initctx->gdt_ents = gdt_ents; + + /* LDT */ + initctx->ldt_base = (unsigned long) ldtstore; + initctx->ldt_ents = LDT_SIZE >> 3; + + /* Kernel context state */ + initctx->kernel_ss = GSEL(GDATA_SEL, SEL_KPL); + initctx->kernel_sp = pcb->pcb_rsp0; + initctx->ctrlreg[0] = pcb->pcb_cr0; + initctx->ctrlreg[1] = 0; /* "resuming" from kernel - no User cr3. */ + initctx->ctrlreg[2] = pcb->pcb_cr2; /* XXX: */ + /* + * Use pmap_kernel() L4 PD directly, until we setup the + * per-cpu L4 PD in pmap_cpu_init_late() + */ + initctx->ctrlreg[3] = xpmap_ptom(pcb->pcb_cr3); + initctx->ctrlreg[4] = CR4_PAE | CR4_OSFXSR | CR4_OSXMMEXCPT; + + + /* Xen callbacks */ + initctx->event_callback_eip = (unsigned long) hypervisor_callback; + initctx->failsafe_callback_eip = (unsigned long) failsafe_callback; + initctx->syscall_callback_eip = (unsigned long) Xsyscall; + + return; } -#else +#else /* i386 */ +extern union descriptor *ldt; +extern void Xsyscall(void); + static void -cpu_set_tss_gates(struct cpu_info *ci) -{ +xen_init_i386_vcpuctxt(struct cpu_info *ci, + struct vcpu_guest_context *initctx, + void targeteip(struct cpu_info *)) +{ + /* page frames to point at GDT */ + extern int gdt_size; + paddr_t frames[16]; + psize_t gdt_ents; -} -#endif /* i386 */ + struct lwp *l; + struct pcb *pcb; -int -mp_cpu_start(struct cpu_info *ci, paddr_t target) -{ -#if 0 -#if NLAPIC > 0 - int error; -#endif - unsigned short dwordptr[2]; + volatile struct vcpu_info *vci; - /* - * Bootstrap code must be addressable in real mode - * and it must be page aligned. + KASSERT(ci != NULL); + KASSERT(ci != &cpu_info_primary); + KASSERT(initctx != NULL); + KASSERT(targeteip != NULL); + + memset(initctx, 0, sizeof *initctx); + + gdt_ents = roundup(gdt_size, PAGE_SIZE) >> PAGE_SHIFT; /* XXX: re-investigate roundup(gdt_size... ) for gdt_ents. */ + KASSERT(gdt_ents <= 16); + + gdt_prepframes(frames, (vaddr_t) ci->ci_gdt, gdt_ents); + + /* + * Initialise the vcpu context: + * We use this cpu's idle_loop() pcb context. */ - KASSERT(target < 0x10000 && target % PAGE_SIZE == 0); - /* - * "The BSP must initialize CMOS shutdown code to 0Ah ..." + l = ci->ci_data.cpu_idlelwp; + + KASSERT(l != NULL); + pcb = lwp_getpcb(l); + KASSERT(pcb != NULL); + + /* resume with interrupts off */ + vci = ci->ci_vcpu; + vci->evtchn_upcall_mask = 1; + xen_mb(); + + /* resume in kernel-mode */ + initctx->flags = VGCF_in_kernel | VGCF_online; + + /* Stack frame setup for cpu_hatch(): + * We arrange for the stack frame for cpu_hatch() to + * appear as a callee frame of lwp_trampoline(). Being a + * leaf frame prevents trampling on any of the MD stack setup + * that x86/vm_machdep.c:cpu_lwp_fork() does for idle_loop() */ - outb(IO_RTC, NVRAM_RESET); - outb(IO_RTC+1, NVRAM_RESET_JUMP); + initctx->user_regs.esp = pcb->pcb_esp - 4; /* Leave word for + arg1 */ + { /* targeteip(ci); */ + uint32_t *arg = (uint32_t *) initctx->user_regs.esp; + arg[1] = (uint32_t) ci; /* arg1 */ - /* - * "and the warm reset vector (DWORD based at 40:67) to point - * to the AP startup code ..." + } + + initctx->user_regs.eip = (vaddr_t) targeteip; + initctx->user_regs.cs = GSEL(GCODE_SEL, SEL_KPL); + initctx->user_regs.eflags |= pcb->pcb_iopl; + + /* Data segments */ + initctx->user_regs.ss = GSEL(GDATA_SEL, SEL_KPL); + initctx->user_regs.es = GSEL(GDATA_SEL, SEL_KPL); + initctx->user_regs.ds = GSEL(GDATA_SEL, SEL_KPL); + initctx->user_regs.fs = GSEL(GDATA_SEL, SEL_KPL); + + /* GDT */ + memcpy(initctx->gdt_frames, frames, sizeof frames); + initctx->gdt_ents = gdt_ents; + + /* LDT */ + initctx->ldt_base = (unsigned long) ldt; + initctx->ldt_ents = NLDT; + + /* Kernel context state */ + initctx->kernel_ss = GSEL(GDATA_SEL, SEL_KPL); + initctx->kernel_sp = pcb->pcb_esp0; + initctx->ctrlreg[0] = pcb->pcb_cr0; + initctx->ctrlreg[1] = 0; /* "resuming" from kernel - no User cr3. */ + initctx->ctrlreg[2] = pcb->pcb_cr2; /* XXX: */ + /* + * Use pmap_kernel() L4 PD directly, until we setup the + * per-cpu L4 PD in pmap_cpu_init_late() */ + initctx->ctrlreg[3] = xpmap_ptom(pcb->pcb_cr3); + initctx->ctrlreg[4] = /* CR4_PAE | */CR4_OSFXSR | CR4_OSXMMEXCPT; - dwordptr[0] = 0; - dwordptr[1] = target >> 4; - pmap_kenter_pa (0, 0, VM_PROT_READ|VM_PROT_WRITE, 0); - pmap_update(pmap_kernel()); + /* Xen callbacks */ + initctx->event_callback_eip = (unsigned long) hypervisor_callback; + initctx->event_callback_cs = GSEL(GCODE_SEL, SEL_KPL); + initctx->failsafe_callback_eip = (unsigned long) failsafe_callback; + initctx->failsafe_callback_cs = GSEL(GCODE_SEL, SEL_KPL); - memcpy ((uint8_t *) 0x467, dwordptr, 4); + return; +} +#endif /* __x86_64__ */ - pmap_kremove (0, PAGE_SIZE); - pmap_update(pmap_kernel()); +int +mp_cpu_start(struct cpu_info *ci, vaddr_t target) +{ -#if NLAPIC > 0 - /* - * ... prior to executing the following sequence:" - */ + int hyperror; + struct vcpu_guest_context vcpuctx; - if (ci->ci_flags & CPUF_AP) { - if ((error = x86_ipi_init(ci->ci_cpuid)) != 0) - return error; - - delay(10000); - - if (cpu_feature & CPUID_APIC) { - error = x86_ipi_init(ci->ci_cpuid); - if (error != 0) { - aprint_error_dev(ci->ci_dev, "%s: IPI not taken (1)\n", - __func__); - return error; - } - - delay(10000); - - error = x86_ipi(target / PAGE_SIZE, ci->ci_cpuid, - LAPIC_DLMODE_STARTUP); - if (error != 0) { - aprint_error_dev(ci->ci_dev, "%s: IPI not taken (2)\n", - __func__); - return error; - } - delay(200); - - error = x86_ipi(target / PAGE_SIZE, ci->ci_cpuid, - LAPIC_DLMODE_STARTUP); - if (error != 0) { - aprint_error_dev(ci->ci_dev, "%s: IPI not taken ((3)\n", - __func__); - return error; - } - delay(200); - } + KASSERT(ci != NULL); + KASSERT(ci != &cpu_info_primary); + KASSERT(ci->ci_flags & CPUF_AP); + +#ifdef __x86_64__ + xen_init_amd64_vcpuctxt(ci, &vcpuctx, (void (*)(struct cpu_info *))target); +#else /* i386 */ + xen_init_i386_vcpuctxt(ci, &vcpuctx, (void (*)(struct cpu_info *))target); +#endif /* __x86_64__ */ + + /* Initialise the given vcpu to execute cpu_hatch(ci); */ + if ((hyperror = HYPERVISOR_vcpu_op(VCPUOP_initialise, ci->ci_cpuid, &vcpuctx))) { + aprint_error(": context initialisation failed. errno = %d\n", hyperror); + return hyperror; } -#endif -#endif /* 0 */ + + /* Start it up */ + + /* First bring it down - the Xen documentation conveniently omits this slight detail. */ + if ((hyperror = HYPERVISOR_vcpu_op(VCPUOP_down, ci->ci_cpuid, NULL))) { + aprint_error(": VCPUOP_down hypervisor command failed. errno = %d\n", hyperror); + return hyperror; + } + + if ((hyperror = HYPERVISOR_vcpu_op(VCPUOP_up, ci->ci_cpuid, NULL))) { + aprint_error(": VCPUOP_up hypervisor command failed. errno = %d\n", hyperror); + return hyperror; + } + + if (!vcpu_is_up(ci)) { + aprint_error(": did not come up\n"); + return -1; + } + return 0; } @@ -1008,6 +1091,13 @@ outb(IO_RTC, NVRAM_RESET); outb(IO_RTC+1, NVRAM_RESET_RST); #endif + if (vcpu_is_up(ci)) { + aprint_debug_dev(ci->ci_dev, "is started.\n"); + } + else { + aprint_error_dev(ci->ci_dev, "did not start up.\n"); + } + } void @@ -1023,6 +1113,7 @@ if (cpu_feature[2] & CPUID_NOX) wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NXE); + } void @@ -1096,8 +1187,12 @@ void cpu_get_tsc_freq(struct cpu_info *ci) { + uint32_t vcpu_tversion; const volatile vcpu_time_info_t *tinfo = &ci->ci_vcpu->time; - delay(1000000); + + vcpu_tversion = tinfo->version; + while (tinfo->version == vcpu_tversion); /* Wait for a time update. XXX: timeout ? */ + uint64_t freq = 1000000000ULL << 32; freq = freq / (uint64_t)tinfo->tsc_to_system_mul; if ( tinfo->tsc_shift < 0 ) @@ -1111,7 +1206,7 @@ x86_cpu_idle_xen(void) { struct cpu_info *ci = curcpu(); - + KASSERT(ci->ci_ilevel == IPL_NONE); x86_disable_intr(); @@ -1134,6 +1229,7 @@ struct cpu_info *ci; s = splvm(); /* just to be safe */ + xpq_queue_lock(); ci = curcpu(); paddr_t l3_pd = xpmap_ptom_masked(ci->ci_pae_l3_pdirpa); /* don't update the kernel L3 slot */ @@ -1141,6 +1237,7 @@ xpq_queue_pte_update(l3_pd + i * sizeof(pd_entry_t), xpmap_ptom(pmap->pm_pdirpa[i]) | PG_V); } + xpq_queue_unlock(); splx(s); tlbflush(); #else /* PAE */ Index: src/sys/arch/xen/xen/hypervisor.c diff -u src/sys/arch/xen/xen/hypervisor.c:1.55 src/sys/arch/xen/xen/hypervisor.c:1.56 --- src/sys/arch/xen/xen/hypervisor.c:1.55 Tue May 17 17:34:53 2011 +++ src/sys/arch/xen/xen/hypervisor.c Sat Aug 13 12:37:30 2011 @@ -1,4 +1,4 @@ -/* $NetBSD: hypervisor.c,v 1.55 2011/05/17 17:34:53 dyoung Exp $ */ +/* $NetBSD: hypervisor.c,v 1.56 2011/08/13 12:37:30 cherry Exp $ */ /* * Copyright (c) 2005 Manuel Bouyer. @@ -53,7 +53,7 @@ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: hypervisor.c,v 1.55 2011/05/17 17:34:53 dyoung Exp $"); +__KERNEL_RCSID(0, "$NetBSD: hypervisor.c,v 1.56 2011/08/13 12:37:30 cherry Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -79,6 +79,7 @@ #include <xen/evtchn.h> #include <xen/xen3-public/version.h> +#include <sys/cpu.h> #include <sys/dirent.h> #include <sys/stat.h> #include <sys/tree.h> @@ -176,11 +177,20 @@ { struct hypervisor_attach_args *haa = aux; - if (strcmp(haa->haa_busname, "hypervisor") == 0) + if (strncmp(haa->haa_busname, "hypervisor", sizeof("hypervisor")) == 0) return 1; return 0; } +#ifdef MULTIPROCESSOR +static int +hypervisor_vcpu_print(void *aux, const char *parent) +{ + /* Unconfigured cpus are ignored quietly. */ + return (QUIET); +} +#endif /* MULTIPROCESSOR */ + /* * Attach the hypervisor. */ @@ -188,6 +198,7 @@ hypervisor_attach(device_t parent, device_t self, void *aux) { int xen_version; + #if NPCI >0 #ifdef PCI_BUS_FIXUP int pci_maxbus = 0; @@ -209,10 +220,33 @@ memset(&hac, 0, sizeof(hac)); hac.hac_vcaa.vcaa_name = "vcpu"; hac.hac_vcaa.vcaa_caa.cpu_number = 0; - hac.hac_vcaa.vcaa_caa.cpu_role = CPU_ROLE_SP; - hac.hac_vcaa.vcaa_caa.cpu_func = 0; + hac.hac_vcaa.vcaa_caa.cpu_role = CPU_ROLE_BP; + hac.hac_vcaa.vcaa_caa.cpu_func = NULL; /* See xen/x86/cpu.c:vcpu_attach() */ config_found_ia(self, "xendevbus", &hac.hac_vcaa, hypervisor_print); +#ifdef MULTIPROCESSOR + + /* + * The xenstore contains the configured number of vcpus. + * The xenstore however, is not accessible until much later in + * the boot sequence. We therefore bruteforce check for + * allocated vcpus (See: cpu.c:vcpu_match()) by iterating + * through the maximum supported by NetBSD MP. + */ + cpuid_t vcpuid; + + for (vcpuid = 1; vcpuid < maxcpus; vcpuid++) { + memset(&hac, 0, sizeof(hac)); + hac.hac_vcaa.vcaa_name = "vcpu"; + hac.hac_vcaa.vcaa_caa.cpu_number = vcpuid; + hac.hac_vcaa.vcaa_caa.cpu_role = CPU_ROLE_AP; + hac.hac_vcaa.vcaa_caa.cpu_func = NULL; /* See xen/x86/cpu.c:vcpu_attach() */ + if (NULL == config_found_ia(self, "xendevbus", &hac.hac_vcaa, hypervisor_vcpu_print)) { + break; + } + } + +#endif /* MULTIPROCESSOR */ events_init(); #if NXENBUS > 0