seems to work fine on my big box. On 29/11/2010, at 5:43 PM, Philip Guenther wrote:
> On Sun, 28 Nov 2010, Philip Guenther wrote: >> On Sun, 28 Nov 2010, Philip Guenther wrote: >>> On Sunday, November 28, 2010, David Gwynne <d...@cvs.openbsd.org> wrote: >> ... >>>> Log message: >>>> bump the number of supported cpus from 32 up to 64. lets me attach and use >>>> all 48 cores in one of my boxes. >>>> >>>> requested by deraadt@ >>>> made possible by the recent pmap diff by kettenis@ >>> >>> Doesn't pm_cpus in the pmap need to change to a u_int64_t and locore.S >>> and pmap.c (at least) change to match? >> >> Here's a diff to do that. >> >> It also corrects the x86_atomic_*_{l,ul}() macros to actually expand to >> the functions that operate on longs instead of ints (64- and 32-bits, >> respectively) and removes the unused x86_multicast_ipi() function. >> Finally, tlb_shoot_wait has been operated on with 32bit atomic ops, so >> make it an (unsigned) int instead of a long. (This would have never >> worked on a big-endian platform.) >> >> Compile tested only so far (about to get on plane). > > Revised diff that doesn't include my bogus flailing on x86_atomic_cas_ul() > (which does operate on unsigned longs) or tlb_shoot_wait. > > I'm running this now on my lowly little 4 core amd64. > > Philip Guenther > > > diff -ru t/amd64/intr.c ./amd64/intr.c > --- t/amd64/intr.c Sun Nov 28 20:27:17 2010 > +++ ./amd64/intr.c Sun Nov 28 18:48:08 2010 > @@ -498,7 +498,7 @@ > > simple_lock(&ci->ci_slock); > pic->pic_hwmask(pic, ih->ih_pin); > - x86_atomic_clearbits_l(&ci->ci_ipending, (1 << ih->ih_slot)); > + x86_atomic_clearbits_u32(&ci->ci_ipending, (1 << ih->ih_slot)); > > /* > * Remove the handler from the chain. > diff -ru t/amd64/ipi.c ./amd64/ipi.c > --- t/amd64/ipi.c Sun Nov 28 20:27:17 2010 > +++ ./amd64/ipi.c Sun Nov 28 18:48:46 2010 > @@ -50,7 +50,7 @@ > { > int ret; > > - x86_atomic_setbits_l(&ci->ci_ipis, ipimask); > + x86_atomic_setbits_u32(&ci->ci_ipis, ipimask); > > /* Don't send IPI to cpu which isn't (yet) running. */ > if (!(ci->ci_flags & CPUF_RUNNING)) > @@ -88,7 +88,7 @@ > continue; > if ((ci->ci_flags & CPUF_RUNNING) == 0) > continue; > - x86_atomic_setbits_l(&ci->ci_ipis, ipimask); > + x86_atomic_setbits_u32(&ci->ci_ipis, ipimask); > count++; > } > if (!count) > @@ -98,23 +98,6 @@ > } > > void > -x86_multicast_ipi(int cpumask, int ipimask) > -{ > - struct cpu_info *ci; > - CPU_INFO_ITERATOR cii; > - > - cpumask &= ~(1U << cpu_number()); > - if (cpumask == 0) > - return; > - > - CPU_INFO_FOREACH(cii, ci) { > - if ((cpumask & (1U << ci->ci_cpuid)) == 0) > - continue; > - x86_send_ipi(ci, ipimask); > - } > -} > - > -void > x86_ipi_handler(void) > { > extern struct evcount ipi_count; > @@ -122,7 +105,7 @@ > u_int32_t pending; > int bit; > > - pending = x86_atomic_testset_ul(&ci->ci_ipis, 0); > + pending = x86_atomic_testset_u32(&ci->ci_ipis, 0); > > for (bit = 0; bit < X86_NIPI && pending; bit++) { > if (pending & (1<<bit)) { > diff -ru t/amd64/locore.S ./amd64/locore.S > --- t/amd64/locore.S Sun Nov 28 20:27:17 2010 > +++ ./amd64/locore.S Sun Nov 28 19:00:57 2010 > @@ -762,7 +762,7 @@ > /* clear the old pmap's bit for the cpu */ > movq PCB_PMAP(%r13),%rcx > lock > - btrl %edi,PM_CPUS(%rcx) > + btrq %rdi,PM_CPUS(%rcx) > > /* Save stack pointers. */ > movq %rsp,PCB_RSP(%r13) > @@ -800,9 +800,11 @@ > /* set the new pmap's bit for the cpu */ > movl CPUVAR(CPUID),%edi > movq PCB_PMAP(%r13),%rcx > - movl PM_CPUS(%rcx),%eax > +#ifdef DIAGNOSTIC > + movq PM_CPUS(%rcx),%rax > +#endif > lock > - btsl %edi,PM_CPUS(%rcx) > + btsq %rdi,PM_CPUS(%rcx) > #ifdef DIAGNOSTIC > jc _C_LABEL(switch_pmcpu_set) > #endif > diff -ru t/amd64/pmap.c ./amd64/pmap.c > --- t/amd64/pmap.c Sun Nov 28 20:36:05 2010 > +++ ./amd64/pmap.c Sun Nov 28 20:32:48 2010 > @@ -351,7 +351,7 @@ > pmap_is_active(struct pmap *pmap, int cpu_id) > { > return (pmap == pmap_kernel() || > - (pmap->pm_cpus & (1U << cpu_id)) != 0); > + (pmap->pm_cpus & (1ULL << cpu_id)) != 0); > } > > static __inline u_int > @@ -1064,7 +1064,7 @@ > > #ifdef DIAGNOSTIC > if (pmap->pm_cpus != 0) > - printf("pmap_destroy: pmap %p cpus=0x%lx\n", > + printf("pmap_destroy: pmap %p cpus=0x%llx\n", > (void *)pmap, pmap->pm_cpus); > #endif > > @@ -1127,7 +1127,7 @@ > /* > * mark the pmap in use by this processor. > */ > - x86_atomic_setbits_ul(&pmap->pm_cpus, (1U << cpu_number())); > + x86_atomic_setbits_u64(&pmap->pm_cpus, (1ULL << cpu_number())); > } > } > > @@ -1143,7 +1143,7 @@ > /* > * mark the pmap no longer in use by this processor. > */ > - x86_atomic_clearbits_ul(&pmap->pm_cpus, (1U << cpu_number())); > + x86_atomic_clearbits_u64(&pmap->pm_cpus, (1ULL << cpu_number())); > > } > > @@ -2437,7 +2437,7 @@ > * cpus we need to send the IPI to, then we grab the counter, then > * we send the IPIs, then we finally do our own shootdown. > * > - * Our shootdown is last to make it parallell with the other cpus > + * Our shootdown is last to make it parallel with the other cpus > * to shorten the spin time. > * > * Notice that we depend on failures to send IPIs only being able to > @@ -2457,13 +2457,13 @@ > struct cpu_info *ci, *self = curcpu(); > CPU_INFO_ITERATOR cii; > long wait = 0; > - int mask = 0; > + u_int64_t mask = 0; > > CPU_INFO_FOREACH(cii, ci) { > if (ci == self || !pmap_is_active(pm, ci->ci_cpuid) || > !(ci->ci_flags & CPUF_RUNNING)) > continue; > - mask |= 1 << ci->ci_cpuid; > + mask |= 1ULL << ci->ci_cpuid; > wait++; > } > > @@ -2476,7 +2476,7 @@ > } > tlb_shoot_addr1 = va; > CPU_INFO_FOREACH(cii, ci) { > - if ((mask & 1 << ci->ci_cpuid) == 0) > + if ((mask & 1ULL << ci->ci_cpuid) == 0) > continue; > if (x86_fast_ipi(ci, LAPIC_IPI_INVLPG) != 0) > panic("pmap_tlb_shootpage: ipi failed"); > @@ -2494,14 +2494,14 @@ > struct cpu_info *ci, *self = curcpu(); > CPU_INFO_ITERATOR cii; > long wait = 0; > - int mask = 0; > + u_int64_t mask = 0; > vaddr_t va; > > CPU_INFO_FOREACH(cii, ci) { > if (ci == self || !pmap_is_active(pm, ci->ci_cpuid) || > !(ci->ci_flags & CPUF_RUNNING)) > continue; > - mask |= 1 << ci->ci_cpuid; > + mask |= 1ULL << ci->ci_cpuid; > wait++; > } > > @@ -2515,7 +2515,7 @@ > tlb_shoot_addr1 = sva; > tlb_shoot_addr2 = eva; > CPU_INFO_FOREACH(cii, ci) { > - if ((mask & 1 << ci->ci_cpuid) == 0) > + if ((mask & 1ULL << ci->ci_cpuid) == 0) > continue; > if (x86_fast_ipi(ci, LAPIC_IPI_INVLRANGE) != 0) > panic("pmap_tlb_shootrange: ipi failed"); > @@ -2534,12 +2534,12 @@ > struct cpu_info *ci, *self = curcpu(); > CPU_INFO_ITERATOR cii; > long wait = 0; > - int mask = 0; > + u_int64_t mask = 0; > > CPU_INFO_FOREACH(cii, ci) { > if (ci == self || !(ci->ci_flags & CPUF_RUNNING)) > continue; > - mask |= 1 << ci->ci_cpuid; > + mask |= 1ULL << ci->ci_cpuid; > wait++; > } > > @@ -2552,7 +2552,7 @@ > } > > CPU_INFO_FOREACH(cii, ci) { > - if ((mask & 1 << ci->ci_cpuid) == 0) > + if ((mask & 1ULL << ci->ci_cpuid) == 0) > continue; > if (x86_fast_ipi(ci, LAPIC_IPI_INVLTLB) != 0) > panic("pmap_tlb_shoottlb: ipi failed"); > diff -ru t/include/atomic.h ./include/atomic.h > --- t/include/atomic.h Sun Nov 28 20:27:17 2010 > +++ ./include/atomic.h Sun Nov 28 20:33:58 2010 > @@ -120,12 +120,10 @@ > __asm __volatile(LOCK " andq %1,%0" : "=m" (*ptr) : "ir" (~bits)); > } > > -#define x86_atomic_testset_ul x86_atomic_testset_u32 > +#define x86_atomic_testset_ul x86_atomic_testset_u64 > #define x86_atomic_testset_i x86_atomic_testset_i32 > -#define x86_atomic_setbits_l x86_atomic_setbits_u32 > -#define x86_atomic_setbits_ul x86_atomic_setbits_u32 > -#define x86_atomic_clearbits_l x86_atomic_clearbits_u32 > -#define x86_atomic_clearbits_ul x86_atomic_clearbits_u32 > +#define x86_atomic_setbits_ul x86_atomic_setbits_u64 > +#define x86_atomic_clearbits_ul x86_atomic_clearbits_u64 > > #define atomic_setbits_int x86_atomic_setbits_u32 > #define atomic_clearbits_int x86_atomic_clearbits_u32 > diff -ru t/include/intr.h ./include/intr.h > --- t/include/intr.h Sun Nov 28 20:27:17 2010 > +++ ./include/intr.h Sun Nov 28 18:26:27 2010 > @@ -215,7 +215,6 @@ > int x86_send_ipi(struct cpu_info *, int); > int x86_fast_ipi(struct cpu_info *, int); > void x86_broadcast_ipi(int); > -void x86_multicast_ipi(int, int); > void x86_ipi_handler(void); > void x86_intlock(struct intrframe); > void x86_intunlock(struct intrframe); > diff -ru t/include/pmap.h ./include/pmap.h > --- t/include/pmap.h Sun Nov 28 20:27:17 2010 > +++ ./include/pmap.h Sun Nov 28 17:43:17 2010 > @@ -318,7 +318,7 @@ > /* pointer to a PTP in our pmap */ > struct pmap_statistics pm_stats; /* pmap stats (lck by object lock) */ > > - u_int32_t pm_cpus; /* mask of CPUs using pmap */ > + u_int64_t pm_cpus; /* mask of CPUs using pmap */ > }; > > /*