Applied, thanks!!
Luca Dariz, le dim. 21 mai 2023 10:57:56 +0200, a ecrit: > * i386/intel/pmap.c: switch to dynamic allocation of all the page tree > map levels for the user-space address range, using a separate kmem > cache for each level. This allows to extend the usable memory space > on x86_64 to use more than one L3 page for user space. The kernel > address map is left untouched for now as it needs a different > initialization. > * i386/intel/pmap.h: remove hardcoded user pages and add macro to > recontruct the page-to-virtual mapping > --- > i386/intel/pmap.c | 544 ++++++++++++++++++++++------------------------ > i386/intel/pmap.h | 21 +- > 2 files changed, 277 insertions(+), 288 deletions(-) > > diff --git a/i386/intel/pmap.c b/i386/intel/pmap.c > index e867ed59..3a30271e 100644 > --- a/i386/intel/pmap.c > +++ b/i386/intel/pmap.c > @@ -398,6 +398,7 @@ struct pmap kernel_pmap_store; > pmap_t kernel_pmap; > > struct kmem_cache pmap_cache; /* cache of pmap structures */ > +struct kmem_cache pt_cache; /* cache of page tables */ > struct kmem_cache pd_cache; /* cache of page directories */ > #if PAE > struct kmem_cache pdpt_cache; /* cache of page directory pointer tables */ > @@ -429,6 +430,14 @@ pt_entry_t *kernel_page_dir; > */ > static pmap_mapwindow_t mapwindows[PMAP_NMAPWINDOWS * NCPUS]; > > +#ifdef __x86_64__ > +static inline pt_entry_t * > +pmap_l4base(const pmap_t pmap, vm_offset_t lin_addr) > +{ > + return &pmap->l4base[lin2l4num(lin_addr)]; > +} > +#endif > + > #ifdef PAE > static inline pt_entry_t * > pmap_ptp(const pmap_t pmap, vm_offset_t lin_addr) > @@ -443,7 +452,7 @@ pmap_ptp(const pmap_t pmap, vm_offset_t lin_addr) > #else /* __x86_64__ */ > pdp_table = pmap->pdpbase; > #endif /* __x86_64__ */ > - return pdp_table; > + return &pdp_table[lin2pdpnum(lin_addr)]; > } > #endif > > @@ -456,7 +465,9 @@ pmap_pde(const pmap_t pmap, vm_offset_t addr) > #if PAE > pt_entry_t *pdp_table; > pdp_table = pmap_ptp(pmap, addr); > - pt_entry_t pde = pdp_table[lin2pdpnum(addr)]; > + if (pdp_table == 0) > + return(PT_ENTRY_NULL); > + pt_entry_t pde = *pdp_table; > if ((pde & INTEL_PTE_VALID) == 0) > return PT_ENTRY_NULL; > page_dir = (pt_entry_t *) ptetokv(pde); > @@ -1092,15 +1103,18 @@ void pmap_init(void) > */ > s = (vm_size_t) sizeof(struct pmap); > kmem_cache_init(&pmap_cache, "pmap", s, 0, NULL, 0); > - kmem_cache_init(&pd_cache, "pd", > + kmem_cache_init(&pt_cache, "pmap_L1", > + INTEL_PGBYTES, INTEL_PGBYTES, NULL, > + KMEM_CACHE_PHYSMEM); > + kmem_cache_init(&pd_cache, "pmap_L2", > INTEL_PGBYTES, INTEL_PGBYTES, NULL, > KMEM_CACHE_PHYSMEM); > #if PAE > - kmem_cache_init(&pdpt_cache, "pdpt", > + kmem_cache_init(&pdpt_cache, "pmap_L3", > INTEL_PGBYTES, INTEL_PGBYTES, NULL, > KMEM_CACHE_PHYSMEM); > #ifdef __x86_64__ > - kmem_cache_init(&l4_cache, "L4", > + kmem_cache_init(&l4_cache, "pmap_L4", > INTEL_PGBYTES, INTEL_PGBYTES, NULL, > KMEM_CACHE_PHYSMEM); > #endif /* __x86_64__ */ > @@ -1244,6 +1258,11 @@ pmap_page_table_page_dealloc(vm_offset_t pa) > vm_object_lock(pmap_object); > m = vm_page_lookup(pmap_object, pa); > vm_page_lock_queues(); > +#ifdef MACH_PV_PAGETABLES > + if (!hyp_mmuext_op_mfn (MMUEXT_UNPIN_TABLE, pa_to_mfn(pa))) > + panic("couldn't unpin page %llx(%lx)\n", pa, (vm_offset_t) > kv_to_ma(pa)); > + pmap_set_page_readwrite((void*) phystokv(pa)); > +#endif /* MACH_PV_PAGETABLES */ > vm_page_free(m); > inuse_ptepages_count--; > vm_page_unlock_queues(); > @@ -1265,7 +1284,7 @@ pmap_page_table_page_dealloc(vm_offset_t pa) > pmap_t pmap_create(vm_size_t size) > { > #ifdef __x86_64__ > - // needs to be reworked if we want to dynamically allocate PDPs > + // needs to be reworked if we want to dynamically allocate PDPs for > kernel > const int PDPNUM = PDPNUM_KERNEL; > #endif > pt_entry_t *page_dir[PDPNUM]; > @@ -1360,30 +1379,6 @@ pmap_t pmap_create(vm_size_t size) > memset(p->l4base, 0, INTEL_PGBYTES); > WRITE_PTE(&p->l4base[lin2l4num(VM_MIN_KERNEL_ADDRESS)], > pa_to_pte(kvtophys((vm_offset_t) pdp_kernel)) | > INTEL_PTE_VALID | INTEL_PTE_WRITE); > -#if lin2l4num(VM_MIN_KERNEL_ADDRESS) != lin2l4num(VM_MAX_USER_ADDRESS) > - // kernel vm and user vm are not in the same l4 entry, so add the user > one > - // TODO alloc only PDPTE for the user range VM_MIN_USER_ADDRESS, > VM_MAX_USER_ADDRESS > - // and keep the same for kernel range, in l4 table we have different > entries > - pt_entry_t *pdp_user = (pt_entry_t *) kmem_cache_alloc(&pdpt_cache); > - if (pdp_user == NULL) { > - panic("pmap create"); > - } > - memset(pdp_user, 0, INTEL_PGBYTES); > - WRITE_PTE(&p->l4base[lin2l4num(VM_MIN_USER_ADDRESS)], > - pa_to_pte(kvtophys((vm_offset_t) pdp_user)) | INTEL_PTE_VALID > | INTEL_PTE_WRITE | INTEL_PTE_USER); > -#endif /* lin2l4num(VM_MIN_KERNEL_ADDRESS) != lin2l4num(VM_MAX_USER_ADDRESS) > */ > - for (int i = 0; i < PDPNUM_USER; i++) { > - pt_entry_t *user_page_dir = (pt_entry_t *) > kmem_cache_alloc(&pd_cache); > - memset(user_page_dir, 0, INTEL_PGBYTES); > - WRITE_PTE(&pdp_user[i + lin2pdpnum(VM_MIN_USER_ADDRESS)], // > pdp_user > - pa_to_pte(kvtophys((vm_offset_t)user_page_dir)) > - | INTEL_PTE_VALID > -#if (defined(__x86_64__) && !defined(MACH_HYP)) || > defined(MACH_PV_PAGETABLES) > - | INTEL_PTE_WRITE | INTEL_PTE_USER > -#endif > - ); > - } > - > #ifdef MACH_PV_PAGETABLES > // FIXME: use kmem_cache_alloc instead > if (kmem_alloc_wired(kernel_map, > @@ -1443,15 +1438,7 @@ pmap_t pmap_create(vm_size_t size) > > void pmap_destroy(pmap_t p) > { > -#if PAE > - int i; > -#endif > - boolean_t free_all; > - pt_entry_t *page_dir; > - pt_entry_t *pdep; > - phys_addr_t pa; > int c, s; > - vm_page_t m; > > if (p == PMAP_NULL) > return; > @@ -1466,87 +1453,54 @@ void pmap_destroy(pmap_t p) > return; /* still in use */ > } > > + /* > + * Free the page table tree. > + */ > #if PAE > - for (i = 0; i < lin2pdpnum(VM_MAX_USER_ADDRESS); i++) { > #ifdef __x86_64__ > -#ifdef USER32 > - /* In this case we know we have one PDP for user space */ > - pt_entry_t *pdp = (pt_entry_t *) > ptetokv(p->l4base[lin2l4num(VM_MIN_USER_ADDRESS)]); > -#else > -#warning "TODO do 64-bit userspace need more that 512G?" > - pt_entry_t *pdp = (pt_entry_t *) > ptetokv(p->l4base[lin2l4num(VM_MIN_USER_ADDRESS)]); > -#endif /* USER32 */ > - page_dir = (pt_entry_t *) ptetokv(pdp[i]); > + for (int l4i = 0; l4i < lin2l4num(VM_MAX_USER_ADDRESS); l4i++) { > + pt_entry_t pdp = (pt_entry_t) p->l4base[l4i]; > + if (!(pdp & INTEL_PTE_VALID)) > + continue; > + pt_entry_t *pdpbase = (pt_entry_t*) ptetokv(pdp); > + for (int l3i = 0; l3i < 512; l3i++) { > #else /* __x86_64__ */ > - page_dir = (pt_entry_t *) ptetokv(p->pdpbase[i]); > + pt_entry_t *pdpbase = p->pdpbase; > + for (int l3i = 0; l3i < lin2pdpnum(VM_MAX_USER_ADDRESS); l3i++) > { > #endif /* __x86_64__ */ > - free_all = i < lin2pdpnum(LINEAR_MIN_KERNEL_ADDRESS); > + pt_entry_t pde = (pt_entry_t) pdpbase[l3i]; > + if (!(pde & INTEL_PTE_VALID)) > + continue; > + pt_entry_t *pdebase = (pt_entry_t*) ptetokv(pde); > + for (int l2i = 0; l2i < 512; l2i++) { > #else /* PAE */ > - free_all = FALSE; > - page_dir = p->dirbase; > + pt_entry_t *pdebase = p->dirbase; > + for (int l2i = 0; l2i < > lin2pdenum(VM_MAX_USER_ADDRESS); l2i++) { > #endif /* PAE */ > - > -#ifdef __x86_64__ > -#warning FIXME 64bit need to free l3 > -#endif > - /* > - * Free the memory maps, then the > - * pmap structure. > - */ > - for (pdep = page_dir; > - (free_all > - || pdep < &page_dir[lin2pdenum(LINEAR_MIN_KERNEL_ADDRESS)]) > - && pdep < &page_dir[NPTES]; > - pdep += ptes_per_vm_page) { > - if (*pdep & INTEL_PTE_VALID) { > - pa = pte_to_pa(*pdep); > - assert(pa == (vm_offset_t) pa); > - vm_object_lock(pmap_object); > - m = vm_page_lookup(pmap_object, pa); > - if (m == VM_PAGE_NULL) > - panic("pmap_destroy: pte page not in object"); > - vm_page_lock_queues(); > -#ifdef MACH_PV_PAGETABLES > - if (!hyp_mmuext_op_mfn (MMUEXT_UNPIN_TABLE, pa_to_mfn(pa))) > - panic("pmap_destroy: couldn't unpin page %llx(%lx)\n", > pa, (vm_offset_t) kv_to_ma(pa)); > - pmap_set_page_readwrite((void*) phystokv(pa)); > -#endif /* MACH_PV_PAGETABLES */ > - vm_page_free(m); > - inuse_ptepages_count--; > - vm_page_unlock_queues(); > - vm_object_unlock(pmap_object); > - } > - } > -#ifdef MACH_PV_PAGETABLES > - pmap_set_page_readwrite((void*) page_dir); > -#endif /* MACH_PV_PAGETABLES */ > - kmem_cache_free(&pd_cache, (vm_offset_t) page_dir); > + pt_entry_t pte = (pt_entry_t) pdebase[l2i]; > + if (!(pte & INTEL_PTE_VALID)) > + continue; > + kmem_cache_free(&pt_cache, > (vm_offset_t)ptetokv(pte)); > + } > #if PAE > - } > - > -#ifdef MACH_PV_PAGETABLES > + kmem_cache_free(&pd_cache, (vm_offset_t)pdebase); > + } > #ifdef __x86_64__ > - pmap_set_page_readwrite(p->l4base); > - pmap_set_page_readwrite(p->user_l4base); > - pmap_set_page_readwrite(p->user_pdpbase); > + kmem_cache_free(&pdpt_cache, (vm_offset_t)pdpbase); > + } > #endif /* __x86_64__ */ > - pmap_set_page_readwrite(p->pdpbase); > -#endif /* MACH_PV_PAGETABLES */ > +#endif /* PAE */ > > + /* Finally, free the page table tree root and the pmap itself */ > +#if PAE > #ifdef __x86_64__ > - kmem_cache_free(&pdpt_cache, (vm_offset_t) pmap_ptp(p, > VM_MIN_USER_ADDRESS)); > -#if lin2l4num(VM_MIN_KERNEL_ADDRESS) != lin2l4num(VM_MAX_USER_ADDRESS) > - // TODO kernel vm and user vm are not in the same l4 entry > -#endif > kmem_cache_free(&l4_cache, (vm_offset_t) p->l4base); > -#ifdef MACH_PV_PAGETABLES > - kmem_free(kernel_map, (vm_offset_t)p->user_l4base, INTEL_PGBYTES); > - kmem_free(kernel_map, (vm_offset_t)p->user_pdpbase, INTEL_PGBYTES); > -#endif /* MACH_PV_PAGETABLES */ > #else /* __x86_64__ */ > - kmem_cache_free(&pdpt_cache, (vm_offset_t) p->pdpbase); > + kmem_cache_free(&pdpt_cache, (vm_offset_t) p->pdpbase); > #endif /* __x86_64__ */ > -#endif /* PAE */ > +#else /* PAE */ > + kmem_cache_free(&pd_cache, (vm_offset_t) p->dirbase); > +#endif /* PAE */ > kmem_cache_free(&pmap_cache, (vm_offset_t) p); > } > > @@ -1756,7 +1710,7 @@ void pmap_remove( > l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1); > if (l > e) > l = e; > - if (*pde & INTEL_PTE_VALID) { > + if (pde && (*pde & INTEL_PTE_VALID)) { > spte = (pt_entry_t *)ptetokv(*pde); > spte = &spte[ptenum(s)]; > epte = &spte[intel_btop(l-s)]; > @@ -2036,86 +1990,24 @@ void pmap_protect( > SPLX(spl); > } > > +typedef pt_entry_t* (*pmap_level_getter_t)(const pmap_t pmap, > vm_offset_t addr); > /* > - * Insert the given physical page (p) at > - * the specified virtual address (v) in the > - * target physical map with the protection requested. > - * > - * If specified, the page will be wired down, meaning > - * that the related pte can not be reclaimed. > - * > - * NB: This is the only routine which MAY NOT lazy-evaluate > - * or lose information. That is, this routine must actually > - * insert this page into the given map NOW. > - */ > -void pmap_enter( > - pmap_t pmap, > - vm_offset_t v, > - phys_addr_t pa, > - vm_prot_t prot, > - boolean_t wired) > +* Expand one single level of the page table tree > +*/ > +static inline pt_entry_t* pmap_expand_level(pmap_t pmap, vm_offset_t v, int > spl, > + pmap_level_getter_t pmap_level, > + pmap_level_getter_t > pmap_level_upper, > + int n_per_vm_page, > + struct kmem_cache *cache) > { > - boolean_t is_physmem; > pt_entry_t *pte; > - pv_entry_t pv_h; > - unsigned long i, pai; > - pv_entry_t pv_e; > - pt_entry_t template; > - int spl; > - phys_addr_t old_pa; > - > - assert(pa != vm_page_fictitious_addr); > - if (pmap_debug) printf("pmap(%zx, %llx)\n", v, (unsigned long long) pa); > - if (pmap == PMAP_NULL) > - return; > - > -#if !MACH_KDB > - if (pmap == kernel_pmap && (v < kernel_virtual_start || v >= > kernel_virtual_end)) > - panic("pmap_enter(%zx, %llx) falls in physical memory area!\n", > v, (unsigned long long) pa); > -#endif > -#if !(__i486__ || __i586__ || __i686__) > - if (pmap == kernel_pmap && (prot & VM_PROT_WRITE) == 0 > - && !wired /* hack for io_wire */ ) { > - /* > - * Because the 386 ignores write protection in kernel mode, > - * we cannot enter a read-only kernel mapping, and must > - * remove an existing mapping if changing it. > - */ > - PMAP_READ_LOCK(pmap, spl); > - > - pte = pmap_pte(pmap, v); > - if (pte != PT_ENTRY_NULL && *pte != 0) { > - /* > - * Invalidate the translation buffer, > - * then remove the mapping. > - */ > - pmap_remove_range(pmap, v, pte, > - pte + ptes_per_vm_page); > - PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE); > - } > - PMAP_READ_UNLOCK(pmap, spl); > - return; > - } > -#endif > - > - /* > - * Must allocate a new pvlist entry while we're unlocked; > - * Allocating may cause pageout (which will lock the pmap system). > - * If we determine we need a pvlist entry, we will unlock > - * and allocate one. Then we will retry, throughing away > - * the allocated entry later (if we no longer need it). > - */ > - pv_e = PV_ENTRY_NULL; > -Retry: > - PMAP_READ_LOCK(pmap, spl); > > /* > * Expand pmap to include this pte. Assume that > * pmap is always expanded to include enough hardware > * pages to map one VM page. > */ > - > - while ((pte = pmap_pte(pmap, v)) == PT_ENTRY_NULL) { > + while ((pte = pmap_level(pmap, v)) == PT_ENTRY_NULL) { > /* > * Need to allocate a new page-table page. > */ > @@ -2136,7 +2028,9 @@ Retry: > */ > PMAP_READ_UNLOCK(pmap, spl); > > - ptp = phystokv(pmap_page_table_page_alloc()); > + while (!(ptp = kmem_cache_alloc(cache))) > + VM_PAGE_WAIT((void (*)()) 0); > + memset((void *)ptp, 0, PAGE_SIZE); > > /* > * Re-lock the pmap and check that another thread has > @@ -2146,12 +2040,12 @@ Retry: > */ > PMAP_READ_LOCK(pmap, spl); > > - if (pmap_pte(pmap, v) != PT_ENTRY_NULL) { > + if (pmap_level(pmap, v) != PT_ENTRY_NULL) { > /* > * Oops... > */ > PMAP_READ_UNLOCK(pmap, spl); > - pmap_page_table_page_dealloc(kvtophys(ptp)); > + kmem_cache_free(cache, ptp); > PMAP_READ_LOCK(pmap, spl); > continue; > } > @@ -2159,8 +2053,8 @@ Retry: > /* > * Enter the new page table page in the page directory. > */ > - i = ptes_per_vm_page; > - pdp = pmap_pde(pmap, v); > + i = n_per_vm_page; > + pdp = pmap_level_upper(pmap, v); > do { > #ifdef MACH_PV_PAGETABLES > pmap_set_page_readonly((void *) ptp); > @@ -2185,6 +2079,100 @@ Retry: > */ > continue; > } > + return pte; > +} > + > +/* > + * Expand, if required, the PMAP to include the virtual address V. > + * PMAP needs to be locked, and it will be still locked on return. It > + * can temporarily unlock the PMAP, during allocation or deallocation > + * of physical pages. > + */ > +static inline pt_entry_t* pmap_expand(pmap_t pmap, vm_offset_t v, int spl) > +{ > +#ifdef PAE > +#ifdef __x86_64__ > + pmap_expand_level(pmap, v, spl, pmap_ptp, pmap_l4base, > ptes_per_vm_page, &pdpt_cache); > +#endif /* __x86_64__ */ > + pmap_expand_level(pmap, v, spl, pmap_pde, pmap_ptp, ptes_per_vm_page, > &pd_cache); > +#endif /* PAE */ > + return pmap_expand_level(pmap, v, spl, pmap_pte, pmap_pde, > ptes_per_vm_page, &pt_cache); > +} > + > +/* > + * Insert the given physical page (p) at > + * the specified virtual address (v) in the > + * target physical map with the protection requested. > + * > + * If specified, the page will be wired down, meaning > + * that the related pte can not be reclaimed. > + * > + * NB: This is the only routine which MAY NOT lazy-evaluate > + * or lose information. That is, this routine must actually > + * insert this page into the given map NOW. > + */ > +void pmap_enter( > + pmap_t pmap, > + vm_offset_t v, > + phys_addr_t pa, > + vm_prot_t prot, > + boolean_t wired) > +{ > + boolean_t is_physmem; > + pt_entry_t *pte; > + pv_entry_t pv_h; > + unsigned long i, pai; > + pv_entry_t pv_e; > + pt_entry_t template; > + int spl; > + phys_addr_t old_pa; > + > + assert(pa != vm_page_fictitious_addr); > + if (pmap_debug) printf("pmap(%zx, %llx)\n", v, (unsigned long long) pa); > + if (pmap == PMAP_NULL) > + return; > + > +#if !MACH_KDB > + if (pmap == kernel_pmap && (v < kernel_virtual_start || v >= > kernel_virtual_end)) > + panic("pmap_enter(%llx, %llx) falls in physical memory > area!\n", v, (unsigned long long) pa); > +#endif > +#if !(__i486__ || __i586__ || __i686__) > + if (pmap == kernel_pmap && (prot & VM_PROT_WRITE) == 0 > + && !wired /* hack for io_wire */ ) { > + /* > + * Because the 386 ignores write protection in kernel mode, > + * we cannot enter a read-only kernel mapping, and must > + * remove an existing mapping if changing it. > + */ > + PMAP_READ_LOCK(pmap, spl); > + > + pte = pmap_pte(pmap, v); > + if (pte != PT_ENTRY_NULL && *pte != 0) { > + /* > + * Invalidate the translation buffer, > + * then remove the mapping. > + */ > + pmap_remove_range(pmap, v, pte, > + pte + ptes_per_vm_page); > + PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE); > + } > + PMAP_READ_UNLOCK(pmap, spl); > + return; > + } > +#endif > + > + /* > + * Must allocate a new pvlist entry while we're unlocked; > + * Allocating may cause pageout (which will lock the pmap system). > + * If we determine we need a pvlist entry, we will unlock > + * and allocate one. Then we will retry, throughing away > + * the allocated entry later (if we no longer need it). > + */ > + pv_e = PV_ENTRY_NULL; > +Retry: > + PMAP_READ_LOCK(pmap, spl); > + > + pte = pmap_expand(pmap, v, spl); > > if (vm_page_ready()) > is_physmem = (vm_page_lookup_pa(pa) != NULL); > @@ -2462,10 +2450,7 @@ void pmap_copy( > */ > void pmap_collect(pmap_t p) > { > - int i; > - boolean_t free_all; > - pt_entry_t *page_dir; > - pt_entry_t *pdp, *ptp; > + pt_entry_t *ptp; > pt_entry_t *eptp; > phys_addr_t pa; > int spl, wired; > @@ -2476,119 +2461,104 @@ void pmap_collect(pmap_t p) > if (p == kernel_pmap) > return; > > + /* > + * Free the page table tree. > + */ > #if PAE > - for (i = 0; i < lin2pdpnum(VM_MAX_USER_ADDRESS); i++) { > #ifdef __x86_64__ > -#ifdef USER32 > - /* In this case we know we have one PDP for user space */ > - pdp = (pt_entry_t *) > ptetokv(p->l4base[lin2l4num(VM_MIN_USER_ADDRESS)]); > -#else > -#warning "TODO do 64-bit userspace need more that 512G?" > - pdp = (pt_entry_t *) > ptetokv(p->l4base[lin2l4num(VM_MIN_USER_ADDRESS)]); > -#endif /* USER32 */ > - page_dir = (pt_entry_t *) ptetokv(pdp[i]); > + for (int l4i = 0; l4i < lin2l4num(VM_MAX_USER_ADDRESS); l4i++) { > + pt_entry_t pdp = (pt_entry_t) p->l4base[l4i]; > + if (!(pdp & INTEL_PTE_VALID)) > + continue; > + pt_entry_t *pdpbase = (pt_entry_t*) ptetokv(pdp); > + for (int l3i = 0; l3i < 512; l3i++) { > #else /* __x86_64__ */ > - page_dir = (pt_entry_t *) ptetokv(p->pdpbase[i]); > + pt_entry_t *pdpbase = p->pdpbase; > + for (int l3i = 0; l3i < lin2pdpnum(VM_MAX_USER_ADDRESS); l3i++) > { > #endif /* __x86_64__ */ > - free_all = i < lin2pdpnum(LINEAR_MIN_KERNEL_ADDRESS); > -#else > - i = 0; > - free_all = FALSE; > - page_dir = p->dirbase; > -#endif > - > - /* > - * Garbage collect map. > - */ > - PMAP_READ_LOCK(p, spl); > - for (pdp = page_dir; > - (free_all > - || pdp < &page_dir[lin2pdenum(LINEAR_MIN_KERNEL_ADDRESS)]) > - && pdp < &page_dir[NPTES]; > - pdp += ptes_per_vm_page) { > - if (*pdp & INTEL_PTE_VALID) { > - > - pa = pte_to_pa(*pdp); > - ptp = (pt_entry_t *)phystokv(pa); > - eptp = ptp + NPTES*ptes_per_vm_page; > - > - /* > - * If the pte page has any wired mappings, we cannot > - * free it. > - */ > - wired = 0; > - { > - pt_entry_t *ptep; > - for (ptep = ptp; ptep < eptp; ptep++) { > - if (*ptep & INTEL_PTE_WIRED) { > - wired = 1; > - break; > - } > - } > - } > - if (!wired) { > - /* > - * Remove the virtual addresses mapped by this pte page. > - */ > - { /*XXX big hack*/ > - vm_offset_t va = pdenum2lin(pdp - page_dir > - + i * NPTES); > - if (p == kernel_pmap) > - va = lintokv(va); > - pmap_remove_range(p, > - va, > - ptp, > - eptp); > - } > - > - /* > - * Invalidate the page directory pointer. > - */ > - { > - int i = ptes_per_vm_page; > - pt_entry_t *pdep = pdp; > - do { > + pt_entry_t pde = (pt_entry_t ) pdpbase[l3i]; > + if (!(pde & INTEL_PTE_VALID)) > + continue; > + pt_entry_t *pdebase = (pt_entry_t*) ptetokv(pde); > + for (int l2i = 0; l2i < 512; l2i++) { > +#else /* PAE */ > + pt_entry_t *pdebase = p->dirbase; > + for (int l2i = 0; l2i < > lin2pdenum(VM_MAX_USER_ADDRESS); l2i++) { > +#endif /* PAE */ > + pt_entry_t pte = (pt_entry_t) pdebase[l2i]; > + if (!(pte & INTEL_PTE_VALID)) > + continue; > + > + pa = pte_to_pa(pte); > + ptp = (pt_entry_t *)phystokv(pa); > + eptp = ptp + NPTES*ptes_per_vm_page; > + > + /* > + * If the pte page has any wired mappings, we > cannot > + * free it. > + */ > + wired = 0; > + { > + pt_entry_t *ptep; > + for (ptep = ptp; ptep < eptp; ptep++) { > + if (*ptep & INTEL_PTE_WIRED) { > + wired = 1; > + break; > + } > + } > + } > + if (!wired) { > + /* > + * Remove the virtual addresses mapped by > this pte page. > + */ > + { /*XXX big hack*/ > + vm_offset_t va = pagenum2lin(l4i, l3i, > l2i, 0); > + if (p == kernel_pmap) > + va = lintokv(va); > + pmap_remove_range(p, va, ptp, eptp); > + } > + > + /* > + * Invalidate the page directory pointer. > + */ > + { > + int i = ptes_per_vm_page; > + pt_entry_t *pdep = &pdebase[l2i]; > + do { > #ifdef MACH_PV_PAGETABLES > - unsigned long pte = *pdep; > - void *ptable = (void*) ptetokv(pte); > - if > (!(hyp_mmu_update_pte(pa_to_ma(kvtophys((vm_offset_t)pdep++)), 0))) > - panic("%s:%d could not clear pde > %p\n",__FILE__,__LINE__,pdep-1); > - if (!hyp_mmuext_op_mfn (MMUEXT_UNPIN_TABLE, > kv_to_mfn(ptable))) > - panic("couldn't unpin page %p(%lx)\n", > ptable, (vm_offset_t) pa_to_ma(kvtophys((vm_offset_t)ptable))); > - pmap_set_page_readwrite(ptable); > + unsigned long pte = *pdep; > + void *ptable = (void*) ptetokv(pte); > + if > (!(hyp_mmu_update_pte(pa_to_ma(kvtophys((vm_offset_t)pdep++)), 0))) > + panic("%s:%d could not clear > pde %p\n",__FILE__,__LINE__,pdep-1); > + if (!hyp_mmuext_op_mfn > (MMUEXT_UNPIN_TABLE, kv_to_mfn(ptable))) > + panic("couldn't unpin page > %p(%lx)\n", ptable, (vm_offset_t) pa_to_ma(kvtophys((vm_offset_t)ptable))); > + pmap_set_page_readwrite(ptable); > #else /* MACH_PV_PAGETABLES */ > - *pdep++ = 0; > + *pdep++ = 0; > #endif /* MACH_PV_PAGETABLES */ > - } while (--i > 0); > - } > + } while (--i > 0); > + } > > - PMAP_READ_UNLOCK(p, spl); > + PMAP_READ_UNLOCK(p, spl); > > - /* > - * And free the pte page itself. > - */ > - { > - vm_page_t m; > - > - vm_object_lock(pmap_object); > - assert(pa == (vm_offset_t) pa); > - m = vm_page_lookup(pmap_object, pa); > - if (m == VM_PAGE_NULL) > - panic("pmap_collect: pte page not in object"); > - vm_page_lock_queues(); > - vm_page_free(m); > - inuse_ptepages_count--; > - vm_page_unlock_queues(); > - vm_object_unlock(pmap_object); > - } > + /* > + * And free the pte page itself. > + */ > + kmem_cache_free(&pt_cache, > (vm_offset_t)ptetokv(pte)); > > - PMAP_READ_LOCK(p, spl); > - } > - } > - } > + PMAP_READ_LOCK(p, spl); > + > + } > + } > #if PAE > + // TODO check l2? > + } > +#ifdef __x86_64__ > + // TODO check l3? > } > -#endif > +#endif /* __x86_64__ */ > +#endif /* PAE */ > + > PMAP_UPDATE_TLBS(p, VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS); > > PMAP_READ_UNLOCK(p, spl); > diff --git a/i386/intel/pmap.h b/i386/intel/pmap.h > index 4c1b9bd5..5fc7fb25 100644 > --- a/i386/intel/pmap.h > +++ b/i386/intel/pmap.h > @@ -75,7 +75,6 @@ typedef phys_addr_t pt_entry_t; > #define L4SHIFT 39 /* L4 shift */ > #define L4MASK 0x1ff /* mask for L4 index */ > #define PDPNUM_KERNEL (((VM_MAX_KERNEL_ADDRESS - > VM_MIN_KERNEL_ADDRESS) >> PDPSHIFT) + 1) > -#define PDPNUM_USER (((VM_MAX_USER_ADDRESS - VM_MIN_USER_ADDRESS) >> > PDPSHIFT) + 1) > #define PDPMASK 0x1ff /* mask for page directory pointer > index */ > #else /* __x86_64__ */ > #define PDPNUM 4 /* number of page directory pointers */ > @@ -130,6 +129,26 @@ typedef phys_addr_t pt_entry_t; > */ > #define pdenum2lin(a) ((vm_offset_t)(a) << PDESHIFT) > > +#if PAE > +#ifdef __x86_64__ > +#define pagenum2lin(l4num, l3num, l2num, l1num) \ > + (((vm_offset_t)(l4num) << L4SHIFT) + \ > + ((vm_offset_t)(l3num) << PDPSHIFT) + \ > + ((vm_offset_t)(l2num) << PDESHIFT) + \ > + ((vm_offset_t)(l1num) << PTESHIFT)) > +#else /* __x86_64__ */ > +#define pagenum2lin(l4num, l3num, l2num, l1num) \ > + (((vm_offset_t)(l3num) << PDPSHIFT) + \ > + ((vm_offset_t)(l2num) << PDESHIFT) + \ > + ((vm_offset_t)(l1num) << PTESHIFT)) > +#endif > +#else /* PAE */ > +#define pagenum2lin(l4num, l3num, l2num, l1num) \ > + (((vm_offset_t)(l2num) << PDESHIFT) + \ > + ((vm_offset_t)(l1num) << PTESHIFT)) > +#endif > + > + > /* > * Convert linear offset to page table index > */ > -- > 2.30.2 > > -- Samuel --- Pour une évaluation indépendante, transparente et rigoureuse ! Je soutiens la Commission d'Évaluation de l'Inria.