Module Name: src Committed By: ad Date: Sun Apr 19 14:11:38 UTC 2009
Modified Files: src/sys/arch/amd64/include: types.h src/sys/arch/i386/include: types.h src/sys/arch/x86/include: cpu.h intr.h pic.h src/sys/arch/x86/isa: isa_machdep.c src/sys/arch/x86/x86: idt.c intr.c ioapic.c src/sys/kern: kern_cpu.c src/sys/sys: cpu.h cpuio.h src/usr.sbin/cpuctl: cpuctl.c Removed Files: src/sys/compat/sys: cpuio.h Log Message: cpuctl: - Add interrupt shielding (direct hardware interrupts away from the specified CPUs). Not documented just yet but will be soon. - Redo /dev/cpu time_t compat so no kernel changes are needed. x86: - Make intr_establish, intr_disestablish safe to use when !cold. - Distribute hardware interrupts among the CPUs, instead of directing everything to the boot CPU. - Add MD code for interrupt sheilding. This works in most cases but there is a bug where delivery is not accepted by an LAPIC after redistribution. It also needs re-balancing to make things fair after interrupts are turned back on for a CPU. To generate a diff of this commit: cvs rdiff -u -r1.31 -r1.32 src/sys/arch/amd64/include/types.h cvs rdiff -u -r1.64 -r1.65 src/sys/arch/i386/include/types.h cvs rdiff -u -r1.15 -r1.16 src/sys/arch/x86/include/cpu.h cvs rdiff -u -r1.38 -r1.39 src/sys/arch/x86/include/intr.h cvs rdiff -u -r1.6 -r1.7 src/sys/arch/x86/include/pic.h cvs rdiff -u -r1.25 -r1.26 src/sys/arch/x86/isa/isa_machdep.c cvs rdiff -u -r1.2 -r1.3 src/sys/arch/x86/x86/idt.c cvs rdiff -u -r1.60 -r1.61 src/sys/arch/x86/x86/intr.c cvs rdiff -u -r1.39 -r1.40 src/sys/arch/x86/x86/ioapic.c cvs rdiff -u -r1.2 -r0 src/sys/compat/sys/cpuio.h cvs rdiff -u -r1.41 -r1.42 src/sys/kern/kern_cpu.c cvs rdiff -u -r1.29 -r1.30 src/sys/sys/cpu.h cvs rdiff -u -r1.3 -r1.4 src/sys/sys/cpuio.h cvs rdiff -u -r1.13 -r1.14 src/usr.sbin/cpuctl/cpuctl.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/amd64/include/types.h diff -u src/sys/arch/amd64/include/types.h:1.31 src/sys/arch/amd64/include/types.h:1.32 --- src/sys/arch/amd64/include/types.h:1.31 Sun Apr 5 00:57:56 2009 +++ src/sys/arch/amd64/include/types.h Sun Apr 19 14:11:36 2009 @@ -1,4 +1,4 @@ -/* $NetBSD: types.h,v 1.31 2009/04/05 00:57:56 tsutsui Exp $ */ +/* $NetBSD: types.h,v 1.32 2009/04/19 14:11:36 ad Exp $ */ /*- * Copyright (c) 1990 The Regents of the University of California. @@ -76,6 +76,7 @@ #define __HAVE_ATOMIC64_OPS #define __HAVE_ATOMIC_AS_MEMBAR #define __HAVE_CPU_LWP_SETPRIVATE +#define __HAVE_INTR_CONTROL #ifdef _KERNEL_OPT #include "opt_xen.h" Index: src/sys/arch/i386/include/types.h diff -u src/sys/arch/i386/include/types.h:1.64 src/sys/arch/i386/include/types.h:1.65 --- src/sys/arch/i386/include/types.h:1.64 Sun Apr 5 00:57:56 2009 +++ src/sys/arch/i386/include/types.h Sun Apr 19 14:11:37 2009 @@ -1,4 +1,4 @@ -/* $NetBSD: types.h,v 1.64 2009/04/05 00:57:56 tsutsui Exp $ */ +/* $NetBSD: types.h,v 1.65 2009/04/19 14:11:37 ad Exp $ */ /*- * Copyright (c) 1990 The Regents of the University of California. @@ -84,6 +84,7 @@ #define __HAVE_ATOMIC64_OPS #define __HAVE_ATOMIC_AS_MEMBAR #define __HAVE_CPU_LWP_SETPRIVATE +#define __HAVE_INTR_CONTROL #if defined(_KERNEL) #define __HAVE_RAS Index: src/sys/arch/x86/include/cpu.h diff -u src/sys/arch/x86/include/cpu.h:1.15 src/sys/arch/x86/include/cpu.h:1.16 --- src/sys/arch/x86/include/cpu.h:1.15 Thu Apr 16 15:34:23 2009 +++ src/sys/arch/x86/include/cpu.h Sun Apr 19 14:11:37 2009 @@ -1,4 +1,4 @@ -/* $NetBSD: cpu.h,v 1.15 2009/04/16 15:34:23 rmind Exp $ */ +/* $NetBSD: cpu.h,v 1.16 2009/04/19 14:11:37 ad Exp $ */ /*- * Copyright (c) 1990 The Regents of the University of California. @@ -111,6 +111,7 @@ #define TLBSTATE_LAZY 1 /* tlbs are valid but won't be kept uptodate */ #define TLBSTATE_STALE 2 /* we might have stale user tlbs */ int ci_curldt; /* current LDT descriptor */ + int ci_nintrhand; /* number of H/W interrupt handlers */ uint64_t ci_scratch; #ifdef XEN Index: src/sys/arch/x86/include/intr.h diff -u src/sys/arch/x86/include/intr.h:1.38 src/sys/arch/x86/include/intr.h:1.39 --- src/sys/arch/x86/include/intr.h:1.38 Fri Mar 27 16:09:24 2009 +++ src/sys/arch/x86/include/intr.h Sun Apr 19 14:11:37 2009 @@ -1,4 +1,4 @@ -/* $NetBSD: intr.h,v 1.38 2009/03/27 16:09:24 dyoung Exp $ */ +/* $NetBSD: intr.h,v 1.39 2009/04/19 14:11:37 ad Exp $ */ /*- * Copyright (c) 1998, 2001, 2006, 2007, 2008 The NetBSD Foundation, Inc. @@ -103,6 +103,7 @@ int (*ih_realfun)(void *); void *ih_realarg; struct intrhand *ih_next; + struct intrhand **ih_prevp; int ih_pin; int ih_slot; struct cpu_info *ih_cpu; @@ -176,9 +177,7 @@ void cpu_intr_init(struct cpu_info *); int intr_find_mpmapping(int, int, int *); struct pic *intr_findpic(int); -#ifdef INTRDEBUG void intr_printconfig(void); -#endif int x86_send_ipi(struct cpu_info *, int); void x86_broadcast_ipi(int); Index: src/sys/arch/x86/include/pic.h diff -u src/sys/arch/x86/include/pic.h:1.6 src/sys/arch/x86/include/pic.h:1.7 --- src/sys/arch/x86/include/pic.h:1.6 Thu Apr 2 00:09:32 2009 +++ src/sys/arch/x86/include/pic.h Sun Apr 19 14:11:37 2009 @@ -1,4 +1,4 @@ -/* $NetBSD: pic.h,v 1.6 2009/04/02 00:09:32 dyoung Exp $ */ +/* $NetBSD: pic.h,v 1.7 2009/04/19 14:11:37 ad Exp $ */ #ifndef _X86_PIC_H #define _X86_PIC_H @@ -18,6 +18,7 @@ void (*pic_hwunmask)(struct pic *, int); void (*pic_addroute)(struct pic *, struct cpu_info *, int, int, int); void (*pic_delroute)(struct pic *, struct cpu_info *, int, int, int); + bool (*pic_trymask)(struct pic *, int); struct intrstub *pic_level_stubs; struct intrstub *pic_edge_stubs; struct ioapic_softc *pic_ioapic; /* if pic_type == PIC_IOAPIC */ Index: src/sys/arch/x86/isa/isa_machdep.c diff -u src/sys/arch/x86/isa/isa_machdep.c:1.25 src/sys/arch/x86/isa/isa_machdep.c:1.26 --- src/sys/arch/x86/isa/isa_machdep.c:1.25 Sat Mar 14 14:46:08 2009 +++ src/sys/arch/x86/isa/isa_machdep.c Sun Apr 19 14:11:37 2009 @@ -1,4 +1,4 @@ -/* $NetBSD: isa_machdep.c,v 1.25 2009/03/14 14:46:08 dsl Exp $ */ +/* $NetBSD: isa_machdep.c,v 1.26 2009/04/19 14:11:37 ad Exp $ */ /*- * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. @@ -65,7 +65,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: isa_machdep.c,v 1.25 2009/03/14 14:46:08 dsl Exp $"); +__KERNEL_RCSID(0, "$NetBSD: isa_machdep.c,v 1.26 2009/04/19 14:11:37 ad Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -74,10 +74,10 @@ #include <sys/device.h> #include <sys/proc.h> #include <sys/mbuf.h> +#include <sys/bus.h> +#include <sys/cpu.h> -#include <machine/bus.h> #include <machine/bus_private.h> - #include <machine/pio.h> #include <machine/cpufunc.h> @@ -127,7 +127,6 @@ int isa_intr_alloc(isa_chipset_tag_t ic, int mask, int type, int *irq) { - extern kmutex_t x86_intr_lock; int i, tmp, bestirq, count; struct intrhand **p, *q; struct intrsource *isp; @@ -150,7 +149,7 @@ */ mask &= 0xefbf; - mutex_enter(&x86_intr_lock); + mutex_enter(&cpu_lock); for (i = 0; i < NUM_LEGACY_IRQS; i++) { if (LEGAL_IRQ(i) == 0 || (mask & (1<<i)) == 0) @@ -161,7 +160,7 @@ * if nothing's using the irq, just return it */ *irq = i; - mutex_exit(&x86_intr_lock); + mutex_exit(&cpu_lock); return (0); } @@ -194,7 +193,7 @@ } } - mutex_exit(&x86_intr_lock); + mutex_exit(&cpu_lock); if (bestirq == -1) return (1); Index: src/sys/arch/x86/x86/idt.c diff -u src/sys/arch/x86/x86/idt.c:1.2 src/sys/arch/x86/x86/idt.c:1.3 --- src/sys/arch/x86/x86/idt.c:1.2 Mon Apr 28 20:23:40 2008 +++ src/sys/arch/x86/x86/idt.c Sun Apr 19 14:11:37 2009 @@ -1,12 +1,12 @@ -/* $NetBSD: idt.c,v 1.2 2008/04/28 20:23:40 martin Exp $ */ +/* $NetBSD: idt.c,v 1.3 2009/04/19 14:11:37 ad Exp $ */ /*- - * Copyright (c) 1996, 1997, 1998, 2000 The NetBSD Foundation, Inc. + * Copyright (c) 1996, 1997, 1998, 2000, 2009 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation - * by Charles M. Hannum and by Jason R. Thorpe of the Numerical Aerospace - * Simulation Facility, NASA Ames Research Center. + * by Charles M. Hannum, by Jason R. Thorpe of the Numerical Aerospace + * Simulation Facility NASA Ames Research Center, and by Andrew Doran. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -65,46 +65,51 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: idt.c,v 1.2 2008/04/28 20:23:40 martin Exp $"); +__KERNEL_RCSID(0, "$NetBSD: idt.c,v 1.3 2009/04/19 14:11:37 ad Exp $"); #include <sys/param.h> #include <sys/systm.h> #include <sys/mutex.h> +#include <sys/cpu.h> +#include <sys/atomic.h> #include <machine/segments.h> #if !defined(XEN) -static kmutex_t idt_lock; struct gate_descriptor *idt; static char idt_allocmap[NIDT]; /* * Allocate an IDT vector slot within the given range. + * cpu_lock will be held unless single threaded during early boot. */ - int idt_vec_alloc(int low, int high) { int vec; - mutex_enter(&idt_lock); + KASSERT(mutex_owned(&cpu_lock) || !mp_online); + for (vec = low; vec <= high; vec++) { if (idt_allocmap[vec] == 0) { + /* idt_vec_free() can be unlocked, so membar. */ + membar_sync(); idt_allocmap[vec] = 1; - mutex_exit(&idt_lock); return vec; } } - mutex_exit(&idt_lock); return 0; } void idt_vec_reserve(int vec) { - int result = idt_vec_alloc(vec, vec); + int result; + + KASSERT(mutex_owned(&cpu_lock) || !mp_online); + result = idt_vec_alloc(vec, vec); if (result != vec) { panic("%s: failed to reserve vec %d", __func__, vec); } @@ -113,30 +118,28 @@ void idt_vec_set(int vec, void (*function)(void)) { - /* - * Vector should be allocated, so no locking needed. - */ + KASSERT(mutex_owned(&cpu_lock) || !mp_online); KASSERT(idt_allocmap[vec] == 1); setgate(&idt[vec], function, 0, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); } +/* + * Free IDT vector. No locking required as release is atomic. + */ void idt_vec_free(int vec) { - mutex_enter(&idt_lock); unsetgate(&idt[vec]); idt_allocmap[vec] = 0; - mutex_exit(&idt_lock); } void idt_init(void) { - mutex_init(&idt_lock, MUTEX_DEFAULT, IPL_NONE); } #endif /* !defined(XEN) */ Index: src/sys/arch/x86/x86/intr.c diff -u src/sys/arch/x86/x86/intr.c:1.60 src/sys/arch/x86/x86/intr.c:1.61 --- src/sys/arch/x86/x86/intr.c:1.60 Tue Apr 7 18:24:23 2009 +++ src/sys/arch/x86/x86/intr.c Sun Apr 19 14:11:37 2009 @@ -1,7 +1,7 @@ -/* $NetBSD: intr.c,v 1.60 2009/04/07 18:24:23 dyoung Exp $ */ +/* $NetBSD: intr.c,v 1.61 2009/04/19 14:11:37 ad Exp $ */ /*- - * Copyright (c) 2007, 2008 The NetBSD Foundation, Inc. + * Copyright (c) 2007, 2008, 2009 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -133,7 +133,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: intr.c,v 1.60 2009/04/07 18:24:23 dyoung Exp $"); +__KERNEL_RCSID(0, "$NetBSD: intr.c,v 1.61 2009/04/19 14:11:37 ad Exp $"); #include "opt_intrdebug.h" #include "opt_multiprocessor.h" @@ -144,7 +144,6 @@ #include <sys/kernel.h> #include <sys/syslog.h> #include <sys/device.h> -#include <sys/malloc.h> #include <sys/kmem.h> #include <sys/proc.h> #include <sys/errno.h> @@ -192,8 +191,6 @@ #endif #endif -kmutex_t x86_intr_lock; - /* * Fill in default interrupt table (in case of spurious interrupt * during configuration of kernel), setup interrupt control unit @@ -203,8 +200,6 @@ { int i; - mutex_init(&x86_intr_lock, MUTEX_DEFAULT, IPL_NONE); - /* icu vectors */ for (i = 0; i < NUM_LEGACY_IRQS; i++) { idt_vec_reserve(ICU_OFFSET + i); @@ -226,12 +221,22 @@ int x86_nmi(void) { + log(LOG_CRIT, "NMI port 61 %x, port 70 %x\n", inb(0x61), inb(0x70)); return(0); } /* * Recalculate the interrupt masks from scratch. + * During early boot, anything goes and we are always called on the BP. + * When the system is up and running: + * + * => called with ci == curcpu() + * => cpu_lock held by the initiator + * => interrupts disabled on-chip (PSL_I) + * + * Do not call printf(), kmem_free() or other "heavyweight" routines + * from here. This routine must be quick and must not block. */ static void intr_calculatemasks(struct cpu_info *ci) @@ -311,7 +316,7 @@ { struct intr_extra_bus *iebp; - iebp = malloc(sizeof(struct intr_extra_bus), M_TEMP, M_WAITOK); + iebp = kmem_alloc(sizeof(*iebp), KM_SLEEP); iebp->bus = pba->pba_bus; iebp->pci_chipset_tag = pba->pba_pc; iebp->pci_bridge_tag = pba->pba_bridgetag; @@ -350,10 +355,6 @@ } #endif - -/* - * XXX if defined(MULTIPROCESSOR) && .. ? - */ #if NIOAPIC > 0 || NACPI > 0 int intr_find_mpmapping(int bus, int pin, int *handle) @@ -416,15 +417,14 @@ int slot, i; struct intrsource *isp; + KASSERT(mutex_owned(&cpu_lock)); + if (pic == &i8259_pic) { - if (!CPU_IS_PRIMARY(ci)) - return EBUSY; + KASSERT(CPU_IS_PRIMARY(ci)); slot = pin; - mutex_enter(&x86_intr_lock); } else { slot = -1; - mutex_enter(&x86_intr_lock); /* * intr_allocate_slot has checked for an existing mapping. * Now look for a free slot. @@ -436,17 +436,14 @@ } } if (slot == -1) { - mutex_exit(&x86_intr_lock); return EBUSY; } } isp = ci->ci_isources[slot]; if (isp == NULL) { - isp = malloc(sizeof (struct intrsource), - M_DEVBUF, M_NOWAIT|M_ZERO); + isp = kmem_zalloc(sizeof(*isp), KM_SLEEP); if (isp == NULL) { - mutex_exit(&x86_intr_lock); return ENOMEM; } snprintf(isp->is_evname, sizeof (isp->is_evname), @@ -455,7 +452,6 @@ pic->pic_name, isp->is_evname); ci->ci_isources[slot] = isp; } - mutex_exit(&x86_intr_lock); *index = slot; return 0; @@ -464,20 +460,23 @@ /* * A simple round-robin allocator to assign interrupts to CPUs. */ -static int +static int __noinline intr_allocate_slot(struct pic *pic, int pin, int level, struct cpu_info **cip, int *index, int *idt_slot) { CPU_INFO_ITERATOR cii; - struct cpu_info *ci; + struct cpu_info *ci, *lci; struct intrsource *isp; int slot, idtvec, error; + KASSERT(mutex_owned(&cpu_lock)); + /* First check if this pin is already used by an interrupt vector. */ for (CPU_INFO_FOREACH(cii, ci)) { for (slot = 0 ; slot < MAX_INTR_SOURCES ; slot++) { - if ((isp = ci->ci_isources[slot]) == NULL) + if ((isp = ci->ci_isources[slot]) == NULL) { continue; + } if (isp->is_pic == pic && isp->is_pin == pin) { *idt_slot = isp->is_idtvec; *index = slot; @@ -489,56 +488,73 @@ /* * The pic/pin combination doesn't have an existing mapping. - * Find a slot for a new interrupt source and allocate an IDT - * vector. - * - * For the i8259 case, this always uses the reserved slots - * of the primary CPU and fixed IDT vectors. This is required - * by other parts of the code, see x86/intr.h for more details. - * - * For the IOAPIC case, interrupts are assigned to the - * primary CPU by default, until it runs out of slots. + * Find a slot for a new interrupt source. For the i8259 case, + * we always use reserved slots of the primary CPU. Otherwise, + * we make an attempt to balance the interrupt load. * * PIC and APIC usage are essentially exclusive, so the reservation * of the ISA slots is ignored when assigning IOAPIC slots. - * - * XXX Fix interrupt allocation to Application Processors. - * XXX Check how many interrupts each CPU got and assign it to - * XXX the least loaded CPU. Consider adding options to bind - * XXX interrupts to specific CPUs. - * XXX Drop apic level support, just assign IDT vectors sequentially. */ - ci = &cpu_info_primary; - error = intr_allocate_slot_cpu(ci, pic, pin, &slot); - if (error != 0) { + if (pic == &i8259_pic) { + /* + * Must be directed to BP. + */ + ci = &cpu_info_primary; + error = intr_allocate_slot_cpu(ci, pic, pin, &slot); + } else { /* - * ..now try the others. + * Find least loaded AP/BP and try to allocate there. */ - for (CPU_INFO_FOREACH(cii, ci)) { - if (CPU_IS_PRIMARY(ci)) + ci = NULL; + for (CPU_INFO_FOREACH(cii, lci)) { + if ((lci->ci_schedstate.spc_flags & SPCF_NOINTR) != 0) { continue; - error = intr_allocate_slot_cpu(ci, pic, pin, &slot); - if (error == 0) - break; + } + if (ci == NULL || + ci->ci_nintrhand > lci->ci_nintrhand) { + ci = lci; + } + } + KASSERT(ci != NULL); + error = intr_allocate_slot_cpu(ci, pic, pin, &slot); + + /* + * If that did not work, allocate anywhere. + */ + if (error != 0) { + for (CPU_INFO_FOREACH(cii, ci)) { + if ((ci->ci_schedstate.spc_flags & + SPCF_NOINTR) != 0) { + continue; + } + error = intr_allocate_slot_cpu(ci, pic, + pin, &slot); + if (error == 0) { + break; + } + } } - if (error != 0) - return EBUSY; } + if (error != 0) { + return error; + } + KASSERT(ci != NULL); - if (pic == &i8259_pic) + /* + * Now allocate an IDT vector. + * For the 8259 these are reserved up front. + */ + if (pic == &i8259_pic) { idtvec = ICU_OFFSET + pin; - else + } else { idtvec = idt_vec_alloc(APIC_LEVEL(level), IDT_INTR_HIGH); - + } if (idtvec == 0) { - mutex_enter(&x86_intr_lock); evcnt_detach(&ci->ci_isources[slot]->is_evcnt); - free(ci->ci_isources[slot], M_DEVBUF); + kmem_free(ci->ci_isources[slot], sizeof(*(ci->ci_isources[slot]))); ci->ci_isources[slot] = NULL; - mutex_exit(&x86_intr_lock); return EBUSY; } - ci->ci_isources[slot]->is_idtvec = idtvec; *idt_slot = idtvec; *index = slot; @@ -546,6 +562,23 @@ return 0; } +static void +intr_source_free(struct cpu_info *ci, int slot, struct pic *pic, int idtvec) +{ + struct intrsource *isp; + + isp = ci->ci_isources[slot]; + + if (isp->is_handlers != NULL) + return; + ci->ci_isources[slot] = NULL; + evcnt_detach(&isp->is_evcnt); + kmem_free(isp, sizeof(*isp)); + ci->ci_isources[slot] = NULL; + if (pic != &i8259_pic) + idt_vec_free(idtvec); +} + #ifdef MULTIPROCESSOR static int intr_biglock_wrapper(void *); @@ -585,6 +618,59 @@ return NULL; } +/* + * Handle per-CPU component of interrupt establish. + * + * => caller (on initiating CPU) holds cpu_lock on our behalf + * => arg1: struct intrhand *ih + * => arg2: int idt_vec + */ +static void +intr_establish_xcall(void *arg1, void *arg2) +{ + struct intrsource *source; + struct intrstub *stubp; + struct intrhand *ih; + struct cpu_info *ci; + int idt_vec; + u_long psl; + + ih = arg1; + + KASSERT(ih->ih_cpu == curcpu() || !mp_online); + + ci = ih->ih_cpu; + source = ci->ci_isources[ih->ih_slot]; + idt_vec = (int)(intptr_t)arg2; + + /* Disable interrupts locally. */ + psl = x86_read_psl(); + x86_disable_intr(); + + /* Link in the handler and re-calculate masks. */ + *(ih->ih_prevp) = ih; + intr_calculatemasks(ci); + + /* Hook in new IDT vector and SPL state. */ + if (source->is_resume == NULL || source->is_idtvec != idt_vec) { + if (source->is_idtvec != 0 && source->is_idtvec != idt_vec) + idt_vec_free(source->is_idtvec); + source->is_idtvec = idt_vec; + if (source->is_type == IST_LEVEL) { + stubp = &source->is_pic->pic_level_stubs[ih->ih_slot]; + } else { + stubp = &source->is_pic->pic_edge_stubs[ih->ih_slot]; + } + source->is_resume = stubp->ist_resume; + source->is_recurse = stubp->ist_recurse; + setgate(&idt[idt_vec], stubp->ist_entry, 0, SDT_SYS386IGT, + SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + } + + /* Re-enable interrupts locally. */ + x86_write_psl(psl); +} + void * intr_establish(int legacy_irq, struct pic *pic, int pin, int type, int level, int (*handler)(void *), void *arg, bool known_mpsafe) @@ -593,10 +679,10 @@ struct cpu_info *ci; int slot, error, idt_vec; struct intrsource *source; - struct intrstub *stubp; #ifdef MULTIPROCESSOR bool mpsafe = (known_mpsafe || level != IPL_VM); #endif /* MULTIPROCESSOR */ + uint64_t where; #ifdef DIAGNOSTIC if (legacy_irq != -1 && (legacy_irq < 0 || legacy_irq > 15)) @@ -606,18 +692,19 @@ panic("intr_establish: non-legacy IRQ on i8259"); #endif - error = intr_allocate_slot(pic, pin, level, &ci, &slot, - &idt_vec); - if (error != 0) { - printf("failed to allocate interrupt slot for PIC %s pin %d\n", - pic->pic_name, pin); + ih = kmem_alloc(sizeof(*ih), KM_SLEEP); + if (ih == NULL) { + printf("intr_establish: can't allocate handler info\n"); return NULL; } - /* no point in sleeping unless someone can free memory. */ - ih = malloc(sizeof *ih, M_DEVBUF, cold ? M_NOWAIT : M_WAITOK); - if (ih == NULL) { - printf("intr_establish: can't allocate malloc handler info\n"); + mutex_enter(&cpu_lock); + error = intr_allocate_slot(pic, pin, level, &ci, &slot, &idt_vec); + if (error != 0) { + mutex_exit(&cpu_lock); + kmem_free(ih, sizeof(*ih)); + printf("failed to allocate interrupt slot for PIC %s pin %d\n", + pic->pic_name, pin); return NULL; } @@ -625,15 +712,14 @@ if (source->is_handlers != NULL && source->is_pic->pic_type != pic->pic_type) { - free(ih, M_DEVBUF); + mutex_exit(&cpu_lock); + kmem_free(ih, sizeof(*ih)); printf("intr_establish: can't share intr source between " "different PIC types (legacy_irq %d pin %d slot %d)\n", legacy_irq, pin, slot); return NULL; } - mutex_enter(&x86_intr_lock); - source->is_pin = pin; source->is_pic = pic; @@ -645,23 +731,30 @@ case IST_LEVEL: if (source->is_type == type) break; + /* FALLTHROUGH */ case IST_PULSE: if (type != IST_NONE) { - mutex_exit(&x86_intr_lock); + mutex_exit(&cpu_lock); + kmem_free(ih, sizeof(*ih)); + intr_source_free(ci, slot, pic, idt_vec); printf("intr_establish: pic %s pin %d: can't share " "type %d with %d\n", pic->pic_name, pin, source->is_type, type); - free(ih, M_DEVBUF); return NULL; } break; default: - mutex_exit(&x86_intr_lock); panic("intr_establish: bad intr type %d for pic %s pin %d\n", source->is_type, pic->pic_name, pin); + /* NOTREACHED */ } - pic->pic_hwmask(pic, pin); + /* + * We're now committed. Mask the interrupt in hardware and + * count it for load distribution. + */ + (*pic->pic_hwmask)(pic, pin); + (ci->ci_nintrhand)++; /* * Figure out where to put the handler. @@ -670,11 +763,13 @@ */ for (p = &ci->ci_isources[slot]->is_handlers; (q = *p) != NULL && q->ih_level > level; - p = &q->ih_next) - ; + p = &q->ih_next) { + /* nothing */; + } ih->ih_fun = ih->ih_realfun = handler; ih->ih_arg = ih->ih_realarg = arg; + ih->ih_prevp = p; ih->ih_next = *p; ih->ih_level = level; ih->ih_pin = pin; @@ -686,27 +781,23 @@ ih->ih_arg = ih; } #endif /* MULTIPROCESSOR */ - *p = ih; - - intr_calculatemasks(ci); - mutex_exit(&x86_intr_lock); - - if (source->is_resume == NULL || source->is_idtvec != idt_vec) { - if (source->is_idtvec != 0 && source->is_idtvec != idt_vec) - idt_vec_free(source->is_idtvec); - source->is_idtvec = idt_vec; - stubp = type == IST_LEVEL ? - &pic->pic_level_stubs[slot] : &pic->pic_edge_stubs[slot]; - source->is_resume = stubp->ist_resume; - source->is_recurse = stubp->ist_recurse; - setgate(&idt[idt_vec], stubp->ist_entry, 0, SDT_SYS386IGT, - SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + /* + * Call out to the remote CPU to update its interrupt state. + * Only make RPCs if the APs are up and running. + */ + if (ci == curcpu() || !mp_online) { + intr_establish_xcall(ih, (void *)(intptr_t)idt_vec); + } else { + where = xc_unicast(0, intr_establish_xcall, ih, + (void *)(intptr_t)idt_vec, ci); + xc_wait(where); } - pic->pic_addroute(pic, ci, pin, idt_vec, type); - - pic->pic_hwunmask(pic, pin); + /* All set up, so add a route for the interrupt and unmask it. */ + (*pic->pic_addroute)(pic, ci, pin, idt_vec, type); + (*pic->pic_hwunmask)(pic, pin); + mutex_exit(&cpu_lock); #ifdef INTRDEBUG printf("allocated pic %s type %s pin %d level %d to %s slot %d " @@ -719,24 +810,37 @@ } /* - * Deregister an interrupt handler. + * Called on bound CPU to handle intr_disestablish(). + * + * => caller (on initiating CPU) holds cpu_lock on our behalf + * => arg1: struct intrhand *ih + * => arg2: unused */ -void -intr_disestablish(struct intrhand *ih) +static void +intr_disestablish_xcall(void *arg1, void *arg2) { struct intrhand **p, *q; struct cpu_info *ci; struct pic *pic; struct intrsource *source; + struct intrhand *ih; + u_long psl; int idtvec; + ih = arg1; ci = ih->ih_cpu; + + KASSERT(ci == curcpu() || !mp_online); + + /* Disable interrupts locally. */ + psl = x86_read_psl(); + x86_disable_intr(); + pic = ci->ci_isources[ih->ih_slot]->is_pic; source = ci->ci_isources[ih->ih_slot]; idtvec = source->is_idtvec; - mutex_enter(&x86_intr_lock); - pic->pic_hwmask(pic, ih->ih_pin); + (*pic->pic_hwmask)(pic, ih->ih_pin); atomic_and_32(&ci->ci_ipending, ~(1 << ih->ih_slot)); /* @@ -746,33 +850,56 @@ p = &q->ih_next) ; if (q == NULL) { - mutex_exit(&x86_intr_lock); + x86_write_psl(psl); panic("intr_disestablish: handler not registered"); + /* NOTREACHED */ } *p = q->ih_next; intr_calculatemasks(ci); - pic->pic_delroute(pic, ci, ih->ih_pin, idtvec, source->is_type); - pic->pic_hwunmask(pic, ih->ih_pin); + (*pic->pic_delroute)(pic, ci, ih->ih_pin, idtvec, source->is_type); + (*pic->pic_hwunmask)(pic, ih->ih_pin); + + /* Re-enable interrupts. */ + x86_write_psl(psl); + + /* If the source is free we can drop it now. */ + intr_source_free(ci, ih->ih_slot, pic, idtvec); #ifdef INTRDEBUG printf("%s: remove slot %d (pic %s pin %d vec %d)\n", device_xname(ci->ci_dev), ih->ih_slot, pic->pic_name, ih->ih_pin, idtvec); #endif +} - if (source->is_handlers == NULL) { - evcnt_detach(&source->is_evcnt); - free(source, M_DEVBUF); - ci->ci_isources[ih->ih_slot] = NULL; - if (pic != &i8259_pic) - idt_vec_free(idtvec); - } - - free(ih, M_DEVBUF); +/* + * Deregister an interrupt handler. + */ +void +intr_disestablish(struct intrhand *ih) +{ + struct cpu_info *ci; + uint64_t where; - mutex_exit(&x86_intr_lock); + /* + * Count the removal for load balancing. + * Call out to the remote CPU to update its interrupt state. + * Only make RPCs if the APs are up and running. + */ + mutex_enter(&cpu_lock); + ci = ih->ih_cpu; + (ci->ci_nintrhand)--; + KASSERT(ci->ci_nintrhand >= 0); + if (ci == curcpu() || !mp_online) { + intr_disestablish_xcall(ih, NULL); + } else { + where = xc_unicast(0, intr_disestablish_xcall, ih, NULL, ci); + xc_wait(where); + } + mutex_exit(&cpu_lock); + kmem_free(ih, sizeof(*ih)); } const char * @@ -809,8 +936,6 @@ } -#define CONCAT(x,y) __CONCAT(x,y) - /* * Fake interrupt handler structures for the benefit of symmetry with * other interrupt sources, and the benefit of intr_calculatemasks() @@ -859,9 +984,8 @@ #endif #if NLAPIC > 0 - isp = malloc(sizeof (struct intrsource), M_DEVBUF, M_WAITOK|M_ZERO); - if (isp == NULL) - panic("can't allocate fixed interrupt source"); + isp = kmem_zalloc(sizeof(*isp), KM_SLEEP); + KASSERT(isp != NULL); isp->is_recurse = Xrecurse_lapic_ltimer; isp->is_resume = Xresume_lapic_ltimer; fake_timer_intrhand.ih_level = IPL_CLOCK; @@ -872,9 +996,8 @@ device_xname(ci->ci_dev), "timer"); #ifdef MULTIPROCESSOR - isp = malloc(sizeof (struct intrsource), M_DEVBUF, M_WAITOK|M_ZERO); - if (isp == NULL) - panic("can't allocate fixed interrupt source"); + isp = kmem_zalloc(sizeof(*isp), KM_SLEEP); + KASSERT(isp != NULL); isp->is_recurse = Xrecurse_lapic_ipi; isp->is_resume = Xresume_lapic_ipi; fake_ipi_intrhand.ih_level = IPL_IPI; @@ -888,9 +1011,8 @@ #endif #endif - isp = malloc(sizeof (struct intrsource), M_DEVBUF, M_WAITOK|M_ZERO); - if (isp == NULL) - panic("can't allocate fixed interrupt source"); + isp = kmem_zalloc(sizeof(*isp), KM_SLEEP); + KASSERT(isp != NULL); isp->is_recurse = Xpreemptrecurse; isp->is_resume = Xpreemptresume; fake_preempt_intrhand.ih_level = IPL_PREEMPT; @@ -926,7 +1048,7 @@ ci->ci_idepth = -1; } -#ifdef INTRDEBUG +#if defined(INTRDEBUG) || defined(DDB) void intr_printconfig(void) { @@ -967,9 +1089,8 @@ ci = l->l_cpu; - isp = malloc(sizeof (struct intrsource), M_DEVBUF, M_WAITOK|M_ZERO); - if (isp == NULL) - panic("can't allocate fixed interrupt source"); + isp = kmem_zalloc(sizeof(*isp), KM_SLEEP); + KASSERT(isp != NULL); isp->is_recurse = Xsoftintr; isp->is_resume = Xsoftintr; isp->is_pic = &softintr_pic; @@ -1007,3 +1128,211 @@ intr_calculatemasks(ci); } + +static void +intr_redistribute_xc_t(void *arg1, void *arg2) +{ + struct cpu_info *ci; + struct intrsource *isp; + int slot; + u_long psl; + + ci = curcpu(); + isp = arg1; + slot = (int)(intptr_t)arg2; + + /* Disable interrupts locally. */ + psl = x86_read_psl(); + x86_disable_intr(); + + /* Hook it in and re-calculate masks. */ + ci->ci_isources[slot] = isp; + intr_calculatemasks(curcpu()); + + /* Re-enable interrupts locally. */ + x86_write_psl(psl); +} + +static void +intr_redistribute_xc_s1(void *arg1, void *arg2) +{ + struct pic *pic; + struct intrsource *isp; + struct cpu_info *nci; + u_long psl; + + isp = arg1; + nci = arg2; + + /* + * Disable interrupts on-chip and mask the pin. Back out + * and let the interrupt be processed if one is pending. + */ + pic = isp->is_pic; + for (;;) { + psl = x86_read_psl(); + x86_disable_intr(); + if ((*pic->pic_trymask)(pic, isp->is_pin)) { + break; + } + x86_write_psl(psl); + DELAY(1000); + } + + /* pic_addroute will unmask the interrupt. */ + (*pic->pic_addroute)(pic, nci, isp->is_pin, isp->is_idtvec, + isp->is_type); + x86_write_psl(psl); +} + +static void +intr_redistribute_xc_s2(void *arg1, void *arg2) +{ + struct cpu_info *ci; + u_long psl; + int slot; + + ci = curcpu(); + slot = (int)(uintptr_t)arg1; + + /* Disable interrupts locally. */ + psl = x86_read_psl(); + x86_disable_intr(); + + /* Patch out the source and re-calculate masks. */ + ci->ci_isources[slot] = NULL; + intr_calculatemasks(ci); + + /* Re-enable interrupts locally. */ + x86_write_psl(psl); +} + +static bool +intr_redistribute(struct cpu_info *oci) +{ + struct intrsource *isp; + struct intrhand *ih; + CPU_INFO_ITERATOR cii; + struct cpu_info *nci, *ici; + int oslot, nslot; + uint64_t where; + + KASSERT(mutex_owned(&cpu_lock)); + + /* Look for an interrupt source that we can migrate. */ + for (oslot = 0; oslot < MAX_INTR_SOURCES; oslot++) { + if ((isp = oci->ci_isources[oslot]) == NULL) { + continue; + } + if (isp->is_pic->pic_type == PIC_IOAPIC) { + break; + } + } + if (oslot == MAX_INTR_SOURCES) { + return false; + } + + /* Find least loaded CPU and try to move there. */ + nci = NULL; + for (CPU_INFO_FOREACH(cii, ici)) { + if ((ici->ci_schedstate.spc_flags & SPCF_NOINTR) != 0) { + continue; + } + KASSERT(ici != oci); + if (nci == NULL || nci->ci_nintrhand > ici->ci_nintrhand) { + nci = ici; + } + } + if (nci == NULL) { + return false; + } + for (nslot = 0; nslot < MAX_INTR_SOURCES; nslot++) { + if (nci->ci_isources[nslot] == NULL) { + break; + } + } + + /* If that did not work, allocate anywhere. */ + if (nslot == MAX_INTR_SOURCES) { + for (CPU_INFO_FOREACH(cii, nci)) { + if ((nci->ci_schedstate.spc_flags & SPCF_NOINTR) != 0) { + continue; + } + KASSERT(nci != oci); + for (nslot = 0; nslot < MAX_INTR_SOURCES; nslot++) { + if (nci->ci_isources[nslot] == NULL) { + break; + } + } + if (nslot != MAX_INTR_SOURCES) { + break; + } + } + } + if (nslot == MAX_INTR_SOURCES) { + return false; + } + + /* + * Now we have new CPU and new slot. Run a cross-call to set up + * the new vector on the target CPU. + */ + where = xc_unicast(0, intr_redistribute_xc_t, isp, + (void *)(intptr_t)nslot, nci); + xc_wait(where); + + /* + * We're ready to go on the target CPU. Run a cross call to + * reroute the interrupt away from the source CPU. + */ + where = xc_unicast(0, intr_redistribute_xc_s1, isp, nci, oci); + xc_wait(where); + + /* Sleep for (at least) 10ms to allow the change to take hold. */ + (void)kpause("intrdist", false, mstohz(10), NULL); + + /* Complete removal from the source CPU. */ + where = xc_unicast(0, intr_redistribute_xc_s2, + (void *)(uintptr_t)oslot, NULL, oci); + xc_wait(where); + + /* Finally, take care of book-keeping. */ + for (ih = isp->is_handlers; ih != NULL; ih = ih->ih_next) { + oci->ci_nintrhand--; + nci->ci_nintrhand++; + ih->ih_cpu = nci; + } + + return true; +} + +void +cpu_intr_redistribute(void) +{ + CPU_INFO_ITERATOR cii; + struct cpu_info *ci; + + KASSERT(mutex_owned(&cpu_lock)); + KASSERT(mp_online); + + /* Direct interrupts away from shielded CPUs. */ + for (CPU_INFO_FOREACH(cii, ci)) { + if ((ci->ci_schedstate.spc_flags & SPCF_NOINTR) == 0) { + continue; + } + while (intr_redistribute(ci)) { + /* nothing */ + } + } + + /* XXX should now re-balance */ +} + +u_int +cpu_intr_count(struct cpu_info *ci) +{ + + KASSERT(ci->ci_nintrhand >= 0); + + return ci->ci_nintrhand; +} Index: src/sys/arch/x86/x86/ioapic.c diff -u src/sys/arch/x86/x86/ioapic.c:1.39 src/sys/arch/x86/x86/ioapic.c:1.40 --- src/sys/arch/x86/x86/ioapic.c:1.39 Fri Feb 13 20:51:19 2009 +++ src/sys/arch/x86/x86/ioapic.c Sun Apr 19 14:11:37 2009 @@ -1,11 +1,11 @@ -/* $NetBSD: ioapic.c,v 1.39 2009/02/13 20:51:19 bouyer Exp $ */ +/* $NetBSD: ioapic.c,v 1.40 2009/04/19 14:11:37 ad Exp $ */ /*- - * Copyright (c) 2000 The NetBSD Foundation, Inc. + * Copyright (c) 2000, 2009 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation - * by RedBack Networks Inc. + * by RedBack Networks Inc, and by Andrew Doran. * * Author: Bill Sommerfeld * @@ -31,7 +31,6 @@ * POSSIBILITY OF SUCH DAMAGE. */ - /* * Copyright (c) 1999 Stefan Grefen * @@ -65,7 +64,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: ioapic.c,v 1.39 2009/02/13 20:51:19 bouyer Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ioapic.c,v 1.40 2009/04/19 14:11:37 ad Exp $"); #include "opt_ddb.h" @@ -108,6 +107,7 @@ void ioapic_hwmask(struct pic *, int); void ioapic_hwunmask(struct pic *, int); +bool ioapic_trymask(struct pic *, int); static void ioapic_addroute(struct pic *, struct cpu_info *, int, int, int); static void ioapic_delroute(struct pic *, struct cpu_info *, int, int, int); @@ -302,6 +302,7 @@ sc->sc_pic.pic_hwunmask = ioapic_hwunmask; sc->sc_pic.pic_addroute = ioapic_addroute; sc->sc_pic.pic_delroute = ioapic_delroute; + sc->sc_pic.pic_trymask = ioapic_trymask; sc->sc_pic.pic_edge_stubs = ioapic_edge_stubs; sc->sc_pic.pic_level_stubs = ioapic_level_stubs; @@ -402,7 +403,6 @@ uint32_t redlo; uint32_t redhi; int delmode; - struct ioapic_pin *pp; struct mp_intr_map *map; @@ -410,39 +410,28 @@ map = pp->ip_map; redlo = map == NULL ? IOAPIC_REDLO_MASK : map->redir; delmode = (redlo & IOAPIC_REDLO_DEL_MASK) >> IOAPIC_REDLO_DEL_SHIFT; + redhi = (ci->ci_cpuid << IOAPIC_REDHI_DEST_SHIFT); - /* XXX magic numbers */ - if ((delmode != 0) && (delmode != 1)) - redhi = 0; - else if (pp->ip_type == IST_NONE) { - redlo |= IOAPIC_REDLO_MASK; - redhi = 0; - } else { - redlo |= (idt_vec & 0xff); - redlo |= (IOAPIC_REDLO_DEL_FIXED<<IOAPIC_REDLO_DEL_SHIFT); - redlo &= ~IOAPIC_REDLO_DSTMOD; - - /* - * Destination: BSP CPU - * - * XXX will want to distribute interrupts across CPUs - * eventually. most likely, we'll want to vector each - * interrupt to a specific CPU and load-balance across - * CPUs. but there's no point in doing that until after - * most interrupts run without the kernel lock. - */ - redhi = (ci->ci_cpuid << IOAPIC_REDHI_DEST_SHIFT); + if (delmode == IOAPIC_REDLO_DEL_FIXED || + delmode == IOAPIC_REDLO_DEL_LOPRI) { + if (pp->ip_type == IST_NONE) { + redlo |= IOAPIC_REDLO_MASK; + } else { + redlo |= (idt_vec & 0xff); + redlo |= (IOAPIC_REDLO_DEL_FIXED<<IOAPIC_REDLO_DEL_SHIFT); + redlo &= ~IOAPIC_REDLO_DSTMOD; - /* XXX derive this bit from BIOS info */ - if (pp->ip_type == IST_LEVEL) - redlo |= IOAPIC_REDLO_LEVEL; - else - redlo &= ~IOAPIC_REDLO_LEVEL; - if (map != NULL && ((map->flags & 3) == MPS_INTPO_DEF)) { + /* XXX derive this bit from BIOS info */ if (pp->ip_type == IST_LEVEL) - redlo |= IOAPIC_REDLO_ACTLO; + redlo |= IOAPIC_REDLO_LEVEL; else - redlo &= ~IOAPIC_REDLO_ACTLO; + redlo &= ~IOAPIC_REDLO_LEVEL; + if (map != NULL && ((map->flags & 3) == MPS_INTPO_DEF)) { + if (pp->ip_type == IST_LEVEL) + redlo |= IOAPIC_REDLO_ACTLO; + else + redlo &= ~IOAPIC_REDLO_ACTLO; + } } } ioapic_write(sc, IOAPIC_REDHI(pin), redhi); @@ -511,6 +500,33 @@ ioapic_unlock(sc, flags); } +bool +ioapic_trymask(struct pic *pic, int pin) +{ + uint32_t redlo; + struct ioapic_softc *sc = pic->pic_ioapic; + u_long flags; + bool rv; + + /* Mask it. */ + flags = ioapic_lock(sc); + redlo = ioapic_read_ul(sc, IOAPIC_REDLO(pin)); + redlo |= IOAPIC_REDLO_MASK; + ioapic_write_ul(sc, IOAPIC_REDLO(pin), redlo); + + /* If pending, unmask and abort. */ + redlo = ioapic_read_ul(sc, IOAPIC_REDLO(pin)); + if ((redlo & (IOAPIC_REDLO_RIRR|IOAPIC_REDLO_DELSTS)) != 0) { + redlo &= ~IOAPIC_REDLO_MASK; + ioapic_write_ul(sc, IOAPIC_REDLO(pin), redlo); + rv = false; + } else { + rv = true; + } + ioapic_unlock(sc, flags); + return rv; +} + void ioapic_hwunmask(struct pic *pic, int pin) { Index: src/sys/kern/kern_cpu.c diff -u src/sys/kern/kern_cpu.c:1.41 src/sys/kern/kern_cpu.c:1.42 --- src/sys/kern/kern_cpu.c:1.41 Mon Jan 19 23:04:26 2009 +++ src/sys/kern/kern_cpu.c Sun Apr 19 14:11:37 2009 @@ -1,7 +1,7 @@ -/* $NetBSD: kern_cpu.c,v 1.41 2009/01/19 23:04:26 njoly Exp $ */ +/* $NetBSD: kern_cpu.c,v 1.42 2009/04/19 14:11:37 ad Exp $ */ /*- - * Copyright (c) 2007, 2008 The NetBSD Foundation, Inc. + * Copyright (c) 2007, 2008, 2009 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -56,9 +56,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_cpu.c,v 1.41 2009/01/19 23:04:26 njoly Exp $"); - -#include "opt_compat_netbsd.h" +__KERNEL_RCSID(0, "$NetBSD: kern_cpu.c,v 1.42 2009/04/19 14:11:37 ad Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -81,10 +79,6 @@ #include <uvm/uvm_extern.h> -#ifdef COMPAT_50 -#include <compat/sys/cpuio.h> -#endif - void cpuctlattach(int); static void cpu_xc_online(struct cpu_info *); @@ -163,17 +157,6 @@ mutex_enter(&cpu_lock); switch (cmd) { -#ifdef IOC_CPU_OSETSTATE - cpustate_t csb; - - case IOC_CPU_OSETSTATE: { - cpustate50_t *ocs = data; - cpustate50_to_cpustate(ocs, &csb); - cs = &csb; - error = 1; - /*FALLTHROUGH*/ - } -#endif case IOC_CPU_SETSTATE: if (error == 0) cs = data; @@ -187,22 +170,10 @@ error = ESRCH; break; } - if (!cs->cs_intr) { - error = EOPNOTSUPP; - break; - } + error = cpu_setintr(ci, cs->cs_intr); error = cpu_setstate(ci, cs->cs_online); break; -#ifdef IOC_CPU_OGETSTATE - case IOC_CPU_OGETSTATE: { - cpustate50_t *ocs = data; - cpustate50_to_cpustate(ocs, &csb); - cs = &csb; - error = 1; - /*FALLTHROUGH*/ - } -#endif case IOC_CPU_GETSTATE: if (error == 0) cs = data; @@ -218,15 +189,14 @@ cs->cs_online = false; else cs->cs_online = true; - cs->cs_intr = true; - cs->cs_lastmod = ci->ci_schedstate.spc_lastmod; -#ifdef IOC_CPU_OGETSTATE - if (cmd == IOC_CPU_OGETSTATE) { - cpustate50_t *ocs = data; - cpustate_to_cpustate50(cs, ocs); - error = 0; - } -#endif + if ((ci->ci_schedstate.spc_flags & SPCF_NOINTR) != 0) + cs->cs_intr = false; + else + cs->cs_intr = true; + cs->cs_lastmod = (int32_t)ci->ci_schedstate.spc_lastmod; + cs->cs_lastmodhi = (int32_t) + (ci->ci_schedstate.spc_lastmod >> 32); + cs->cs_intrcnt = cpu_intr_count(ci) + 1; break; case IOC_CPU_MAPID: @@ -275,15 +245,15 @@ int s; /* - * Thread which sent unicast (separate context) is holding - * the cpu_lock for us. + * Thread that made the cross call (separate context) holds + * cpu_lock on our behalf. */ spc = &ci->ci_schedstate; s = splsched(); spc->spc_flags |= SPCF_OFFLINE; splx(s); - /* Take the first available CPU for the migration */ + /* Take the first available CPU for the migration. */ for (CPU_INFO_FOREACH(cii, target_ci)) { mspc = &target_ci->ci_schedstate; if ((mspc->spc_flags & SPCF_OFFLINE) == 0) @@ -402,6 +372,99 @@ return 0; } +#ifdef __HAVE_INTR_CONTROL +static void +cpu_xc_intr(struct cpu_info *ci) +{ + struct schedstate_percpu *spc; + int s; + + spc = &ci->ci_schedstate; + s = splsched(); + spc->spc_flags &= ~SPCF_NOINTR; + splx(s); +} + +static void +cpu_xc_nointr(struct cpu_info *ci) +{ + struct schedstate_percpu *spc; + int s; + + spc = &ci->ci_schedstate; + s = splsched(); + spc->spc_flags |= SPCF_NOINTR; + splx(s); +} + +int +cpu_setintr(struct cpu_info *ci, bool intr) +{ + struct schedstate_percpu *spc; + CPU_INFO_ITERATOR cii; + struct cpu_info *ci2; + uint64_t where; + xcfunc_t func; + int nintr; + + spc = &ci->ci_schedstate; + + KASSERT(mutex_owned(&cpu_lock)); + + if (intr) { + if ((spc->spc_flags & SPCF_NOINTR) == 0) + return 0; + func = (xcfunc_t)cpu_xc_intr; + } else { + if ((spc->spc_flags & SPCF_NOINTR) != 0) + return 0; + /* + * Ensure that at least one CPU within the system + * is handing device interrupts. + */ + nintr = 0; + for (CPU_INFO_FOREACH(cii, ci2)) { + if ((ci2->ci_schedstate.spc_flags & SPCF_NOINTR) != 0) + continue; + if (ci2 == ci) + continue; + nintr++; + } + if (nintr == 0) + return EBUSY; + func = (xcfunc_t)cpu_xc_nointr; + } + + where = xc_unicast(0, func, ci, NULL, ci); + xc_wait(where); + if (intr) { + KASSERT((spc->spc_flags & SPCF_NOINTR) == 0); + } else if ((spc->spc_flags & SPCF_NOINTR) == 0) { + /* If was not set offline, then it is busy */ + return EBUSY; + } + + /* Direct interrupts away from the CPU and record the change. */ + cpu_intr_redistribute(); + spc->spc_lastmod = time_second; + return 0; +} +#else /* __HAVE_INTR_CONTROL */ +int +cpu_setintr(struct cpu_info *ci, bool intr) +{ + + return EOPNOTSUPP; +} + +u_int +cpu_intr_count(struct cpu_info *ci) +{ + + return 0; /* 0 == "don't know" */ +} +#endif /* __HAVE_INTR_CONTROL */ + bool cpu_softintr_p(void) { Index: src/sys/sys/cpu.h diff -u src/sys/sys/cpu.h:1.29 src/sys/sys/cpu.h:1.30 --- src/sys/sys/cpu.h:1.29 Thu Apr 9 00:34:44 2009 +++ src/sys/sys/cpu.h Sun Apr 19 14:11:37 2009 @@ -1,4 +1,4 @@ -/* $NetBSD: cpu.h,v 1.29 2009/04/09 00:34:44 yamt Exp $ */ +/* $NetBSD: cpu.h,v 1.30 2009/04/19 14:11:37 ad Exp $ */ /*- * Copyright (c) 2007 YAMAMOTO Takashi, @@ -71,12 +71,15 @@ lwp_t *cpu_switchto(lwp_t *, lwp_t *, bool); struct cpu_info *cpu_lookup(u_int); int cpu_setstate(struct cpu_info *, bool); +int cpu_setintr(struct cpu_info *, bool); bool cpu_intr_p(void); bool cpu_softintr_p(void); bool cpu_kpreempt_enter(uintptr_t, int); void cpu_kpreempt_exit(uintptr_t); bool cpu_kpreempt_disabled(void); int cpu_lwp_setprivate(lwp_t *, void *); +void cpu_intr_redistribute(void); +u_int cpu_intr_count(struct cpu_info *); CIRCLEQ_HEAD(cpuqueue, cpu_info); Index: src/sys/sys/cpuio.h diff -u src/sys/sys/cpuio.h:1.3 src/sys/sys/cpuio.h:1.4 --- src/sys/sys/cpuio.h:1.3 Mon Jan 19 17:39:02 2009 +++ src/sys/sys/cpuio.h Sun Apr 19 14:11:37 2009 @@ -1,7 +1,7 @@ -/* $NetBSD: cpuio.h,v 1.3 2009/01/19 17:39:02 christos Exp $ */ +/* $NetBSD: cpuio.h,v 1.4 2009/04/19 14:11:37 ad Exp $ */ /*- - * Copyright (c) 2007 The NetBSD Foundation, Inc. + * Copyright (c) 2007, 2009 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -49,16 +49,16 @@ bool cs_online; /* running unbound LWPs */ bool cs_intr; /* fielding interrupts */ bool cs_unused[2]; /* reserved */ - time_t cs_lastmod; /* time of last state change */ + int32_t cs_lastmod; /* time of last state change */ char cs_name[16]; /* reserved */ - uint32_t cs_reserved[4]; /* reserved */ + int32_t cs_lastmodhi; /* time of last state change */ + uint32_t cs_intrcnt; /* count of interrupt handlers + 1 */ + uint32_t cs_reserved[2]; /* reserved */ } cpustate_t; -/* 0 IOC_CPU_OSETSTATE */ -/* 1 IOC_CPU_OGETSTATE */ +#define IOC_CPU_SETSTATE _IOW('c', 0, cpustate_t) +#define IOC_CPU_GETSTATE _IOWR('c', 1, cpustate_t) #define IOC_CPU_GETCOUNT _IOR('c', 2, int) #define IOC_CPU_MAPID _IOWR('c', 3, int) -#define IOC_CPU_SETSTATE _IOW('c', 4, cpustate_t) -#define IOC_CPU_GETSTATE _IOWR('c', 5, cpustate_t) #endif /* !_SYS_CPUIO_H_ */ Index: src/usr.sbin/cpuctl/cpuctl.c diff -u src/usr.sbin/cpuctl/cpuctl.c:1.13 src/usr.sbin/cpuctl/cpuctl.c:1.14 --- src/usr.sbin/cpuctl/cpuctl.c:1.13 Wed Jan 28 22:37:09 2009 +++ src/usr.sbin/cpuctl/cpuctl.c Sun Apr 19 14:11:38 2009 @@ -1,7 +1,7 @@ -/* $NetBSD: cpuctl.c,v 1.13 2009/01/28 22:37:09 ad Exp $ */ +/* $NetBSD: cpuctl.c,v 1.14 2009/04/19 14:11:38 ad Exp $ */ /*- - * Copyright (c) 2007, 2008 The NetBSD Foundation, Inc. + * Copyright (c) 2007, 2008, 2009 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -31,7 +31,7 @@ #ifndef lint #include <sys/cdefs.h> -__RCSID("$NetBSD: cpuctl.c,v 1.13 2009/01/28 22:37:09 ad Exp $"); +__RCSID("$NetBSD: cpuctl.c,v 1.14 2009/04/19 14:11:38 ad Exp $"); #endif /* not lint */ #include <sys/param.h> @@ -61,6 +61,8 @@ void cpu_list(char **); void cpu_offline(char **); void cpu_online(char **); +void cpu_intr(char **); +void cpu_nointr(char **); struct cmdtab { const char *label; @@ -71,6 +73,8 @@ { "list", 0, cpu_list }, { "offline", 1, cpu_offline }, { "online", 1, cpu_online }, + { "intr", 1, cpu_intr }, + { "nointr", 1, cpu_nointr }, { NULL, 0, NULL }, }; @@ -113,6 +117,8 @@ fprintf(stderr, " %s list\n", progname); fprintf(stderr, " %s offline cpuno\n", progname); fprintf(stderr, " %s online cpuno\n", progname); + fprintf(stderr, " %s intr cpuno\n", progname); + fprintf(stderr, " %s nointr cpuno\n", progname); exit(EXIT_FAILURE); /* NOTREACHED */ } @@ -144,6 +150,37 @@ } void +cpu_intr(char **argv) +{ + cpustate_t cs; + + cs.cs_id = getcpuid(argv); + if (ioctl(fd, IOC_CPU_GETSTATE, &cs) < 0) + err(EXIT_FAILURE, "IOC_CPU_GETSTATE"); + cs.cs_intr = true; + if (ioctl(fd, IOC_CPU_SETSTATE, &cs) < 0) + err(EXIT_FAILURE, "IOC_CPU_SETSTATE"); +} + +void +cpu_nointr(char **argv) +{ + cpustate_t cs; + + cs.cs_id = getcpuid(argv); + if (ioctl(fd, IOC_CPU_GETSTATE, &cs) < 0) + err(EXIT_FAILURE, "IOC_CPU_GETSTATE"); + cs.cs_intr = false; + if (ioctl(fd, IOC_CPU_SETSTATE, &cs) < 0) { + if (errno == EOPNOTSUPP) { + warnx("interrupt control not supported on " + "this platform"); + } else + err(EXIT_FAILURE, "IOC_CPU_SETSTATE"); + } +} + +void cpu_identify(char **argv) { char name[32]; @@ -198,12 +235,15 @@ const char *state, *intr; cpustate_t cs; u_int cnt, i; + time_t lastmod; + char ibuf[16], *ts; if (ioctl(fd, IOC_CPU_GETCOUNT, &cnt) < 0) err(EXIT_FAILURE, "IOC_CPU_GETCOUNT"); - printf("Num HwId Unbound LWPs Interrupts Last change\n"); - printf("---- ---- ------------ -------------- ----------------------------\n"); + printf( +"Num HwId Unbound LWPs Interrupts Last change #Intr\n" +"---- ---- ------------ ---------- ------------------------ -----\n"); for (i = 0; i < cnt; i++) { cs.cs_id = i; @@ -219,8 +259,16 @@ intr = "intr"; else intr = "nointr"; - printf("%-4d %-4x %-12s %-12s %s", i, cs.cs_id, state, - intr, asctime(localtime(&cs.cs_lastmod))); + if (cs.cs_intrcnt == 0) + strcpy(ibuf, "?"); + else + snprintf(ibuf, sizeof(ibuf), "%d", cs.cs_intrcnt - 1); + lastmod = (time_t)cs.cs_lastmod | + ((time_t)cs.cs_lastmodhi << 32); + ts = asctime(localtime(&lastmod)); + ts[strlen(ts) - 1] = '\0'; + printf("%-4d %-4x %-12s %-10s %s %s\n", i, cs.cs_id, state, + intr, ts, ibuf); } }