On Wed, Jul 05, 2017 at 11:27:06AM +0200, Mark Kettenis wrote: > > Date: Wed, 5 Jul 2017 09:34:59 +0300 > > From: Artturi Alm <artturi....@gmail.com> > > > > On Wed, Jul 05, 2017 at 02:27:46AM +0300, Artturi Alm wrote: > > > Hi, > > > > > > instead of messing w/bs_tags, use the fact pmap_kernel()->pm_refs is going > > > to be 0 until pmap_bootstrap() has ran. tmp_bs_tag was unused, and > > > bootstrap_bs_map doesn't need/use the void *t-arg when being ran > > > indirectly > > > via armv7_bs_map(). > > > > > > the whole existence of bootstrap_bs_map is another story, and the comment > > > in > > > /* Now, map the FDT area. */ is somewhat an stupid excuse, it's already > > > mapped > > > before initarm() w/VA=PA, and could well be _init()&_get_size()'d & > > > memcpy'ed > > > somewhere in reach within bootstrap KVA, guess diff might follow for that, > > > too, if anyone has time for these simplifications. > > > > > > > Ok, i was wrong ^there, and the bootstrap code before initarm() didn't fill > > the L1 w/VA=PA anymore, for reasons i don't understand, so i 'fixed' it, > > with diff below. tested to boot and eeprom -p normally on cubie2 and wandb. > > > > i kept the diff minimal, to the point it does fdt_get_size() twice just like > > before, which i don't like, nor the name of size-variable and what not, but > > minimal it is. Would be the first step towards earlier physmem load :) > > > > -Artturi > > What are you trying to achieve heree? > > The current code quite deliberately does not create a cachable 1:1 > mapping for the entire address space. Such a mapping is dangerous as > the CPU might speculatively load from any valid mapping and that is a > terrible idea for device mappings. >
Point taken, and adapted the diff to map only 4mb at the expected fdt pa. So something like below, guess you read the one mail in this thread w/o diff in it, ofc. the aim is really higher, make arm/armv7 more consistent/ readable/structured/cleaned/ all around, hoping it will make maintenance and future innovations easier or something, now stop worrying, i'm not NIH-patient about to design a new wheel or anything xD. Diff below is still rather raw, tested to boot and build a new kernel while running the diff correctly on sxi&imx, unfortunately the diff has a few unnecessary things in it, but the purpose of this is just to show the kind of things rather small reorganizing could bring. been up +24hrs, and might have had a few too long streches hacking w/o turning on the windows vm for a game or anything, so any stupid mistakes are because of that, i usually take a break atleast every 90mins or so:) And forgive the stupid ugly printf()s in _bs_valloc(), i forgot, and am already late from where i was supposed to be now, 'til later o/ -Artturi diff --git a/sys/arch/armv7/armv7/armv7_machdep.c b/sys/arch/armv7/armv7/armv7_machdep.c index aa1c549b29b..12eac8acc00 100644 --- a/sys/arch/armv7/armv7/armv7_machdep.c +++ b/sys/arch/armv7/armv7/armv7_machdep.c @@ -151,11 +151,6 @@ char *boot_args = NULL; char *boot_file = ""; u_int cpu_reset_address = 0; -vaddr_t physical_start; -vaddr_t physical_freestart; -vaddr_t physical_freeend; -vaddr_t physical_end; -u_int free_pages; int physmem = 0; /*int debug_flags;*/ @@ -356,6 +351,31 @@ copy_io_area_map(pd_entry_t *new_pd) } } +static inline paddr_t +_bs_alloc(size_t sz) +{ + paddr_t addr, pa = 0; + + for (sz = round_page(sz); sz > 0; sz -= PAGE_SIZE) { + if (uvm_page_physget(&addr) == FALSE) + panic("uvm_page_physget() failed"); + memset((char *)addr, 0, PAGE_SIZE); + if (pa == 0) + pa = addr; + } + return pa; +} + +#define _BS_RPA2VA(x, y) (KERNEL_BASE + (x) - (y)) +static inline void +_bs_valloc(pv_addr_t *pv, vsize_t sz, paddr_t off) +{ + printf("_bs_valloc: pv %p sz %#8lx off %#8lx\n", pv, sz, off); + pv->pv_pa = _bs_alloc(sz); + pv->pv_va = _BS_RPA2VA(pv->pv_pa, off); + printf("\tpv_pa %#8lx pv_va %#8lx\n", pv->pv_pa, pv->pv_va); +} + /* * u_int initarm(...) * @@ -371,15 +391,14 @@ copy_io_area_map(pd_entry_t *new_pd) u_int initarm(void *arg0, void *arg1, void *arg2, paddr_t loadaddr) { - int loop, loop1, i, physsegs = VM_PHYSSEG_MAX; - u_int l1pagetable; + int loop, i, physsegs = VM_PHYSSEG_MAX; pv_addr_t kernel_l1pt; pv_addr_t fdt; struct fdt_reg reg; - paddr_t memstart; - psize_t memsize; - paddr_t memend; - void *config; + vaddr_t physical_start, physical_freestart, physical_end; + paddr_t kl1pt, klxpt_areap; + u_int free_pages; + void *config = arg2; size_t size; void *node; extern uint32_t esym; /* &_end if no symbols are loaded */ @@ -420,18 +439,8 @@ initarm(void *arg0, void *arg1, void *arg2, paddr_t loadaddr) tmp_bs_tag.bs_map = bootstrap_bs_map; /* - * Now, map the FDT area. - * - * As we don't know the size of a possible FDT, map the size of a - * typical bootstrap bs map. The FDT might not be aligned, so this - * might take up to two L1_S_SIZEd mappings. - * - * XXX: There's (currently) no way to unmap a bootstrap mapping, so - * we might lose a bit of the bootstrap address space. + * Now, init the FDT @ PA, reloc and reinit to KVA later. */ - bootstrap_bs_map(NULL, (bus_addr_t)arg2, L1_S_SIZE, 0, - (bus_space_handle_t *)&config); - if (!fdt_init(config) || fdt_get_size(config) == 0) panic("initarm: no FDT"); @@ -453,8 +462,6 @@ initarm(void *arg0, void *arg1, void *arg2, paddr_t loadaddr) if (node == NULL || fdt_get_reg(node, 0, ®)) panic("initarm: no memory specificed"); - memstart = reg.addr; - memsize = reg.size; physical_start = reg.addr; physical_end = MIN(reg.addr + reg.size, (paddr_t)-PAGE_SIZE); @@ -472,11 +479,39 @@ initarm(void *arg0, void *arg1, void *arg2, paddr_t loadaddr) boothowto |= RB_DFLTROOT; #endif /* RAMDISK_HOOKS */ - physical_freestart = (((unsigned long)esym - KERNEL_TEXT_BASE + 0xfff) & ~0xfff) + loadaddr; - physical_freeend = MIN((uint64_t)physical_end, (paddr_t)-PAGE_SIZE); + physical_freestart = round_page(esym - KERNEL_TEXT_BASE) + loadaddr; physmem = (physical_end - physical_start) / PAGE_SIZE; + /* Load memory into UVM. */ +#ifdef VERBOSE_INIT_ARM + printf("page "); +#endif + uvm_setpagesize(); /* initialize PAGE_SIZE-dependent variables */ + uvm_page_physload(atop(physical_freestart), atop(physical_end), + atop(physical_freestart), atop(physical_end), 0); + + if (physical_start < loadaddr) { + uvm_page_physload(atop(physical_start), atop(loadaddr), + atop(physical_start), atop(loadaddr), 0); + physsegs--; + } + + for (i = 1; i < physsegs; i++) { + paddr_t memstart, memend; + + if (fdt_get_reg(node, i, ®)) + break; + if (reg.size == 0) + continue; + + memstart = reg.addr; + memend = MIN(reg.addr + reg.size, (paddr_t)-PAGE_SIZE); + physmem += atop(memend - memstart); + uvm_page_physload(atop(memstart), atop(memend), + atop(memstart), atop(memend), 0); + } + #ifdef DEBUG /* Tell the user about the memory */ printf("physmemory: %d pages at 0x%08lx -> 0x%08lx\n", physmem, @@ -505,7 +540,7 @@ initarm(void *arg0, void *arg1, void *arg2, paddr_t loadaddr) printf("Allocating page tables\n"); #endif - free_pages = (physical_freeend - physical_freestart) / PAGE_SIZE; + free_pages = atop(physical_end - physical_freestart); #ifdef VERBOSE_INIT_ARM printf("freestart = 0x%08lx, free_pages = %d (0x%08x)\n", @@ -513,34 +548,28 @@ initarm(void *arg0, void *arg1, void *arg2, paddr_t loadaddr) #endif /* Define a macro to simplify memory allocation */ -#define valloc_pages(var, np) \ - alloc_pages((var).pv_pa, (np)); \ - (var).pv_va = KERNEL_BASE + (var).pv_pa - loadaddr; - -#define alloc_pages(var, np) \ - (var) = physical_freestart; \ - physical_freestart += ((np) * PAGE_SIZE); \ - if (physical_freeend < physical_freestart) \ - panic("initarm: out of memory"); \ - free_pages -= (np); \ - memset((char *)(var), 0, ((np) * PAGE_SIZE)); - - loop1 = 0; - kernel_l1pt.pv_pa = 0; - for (loop = 0; loop <= NUM_KERNEL_PTS; ++loop) { +#define valloc_pages(var, np) _bs_valloc(&(var), ptoa((np)), loadaddr) +#define alloc_pages(var, np) (var) = _bs_alloc(ptoa((np))) + + kernel_l1pt.pv_pa = kl1pt = 0; + size = ptoa(NUM_KERNEL_PTS) + L1_TABLE_SIZE; + klxpt_areap = _bs_alloc(round_page(size)); + for (i = 0; i < NUM_KERNEL_PTS; i++) { /* Are we 16KB aligned for an L1 ? */ - if (((physical_freestart) & (L1_TABLE_SIZE - 1)) == 0 - && kernel_l1pt.pv_pa == 0) { - valloc_pages(kernel_l1pt, L1_TABLE_SIZE / PAGE_SIZE); - } else { - valloc_pages(kernel_pt_table[loop1], - L2_TABLE_SIZE / PAGE_SIZE); - ++loop1; + if ((klxpt_areap & (L1_TABLE_SIZE - 1)) == 0 && kl1pt == 0) { + kernel_l1pt.pv_pa = kl1pt = klxpt_areap; + kernel_l1pt.pv_va = _BS_RPA2VA(klxpt_areap, loadaddr); + klxpt_areap += L1_TABLE_SIZE; + i--; + continue; } + kernel_pt_table[i].pv_pa = klxpt_areap; + kernel_pt_table[i].pv_va = _BS_RPA2VA(klxpt_areap, loadaddr); + klxpt_areap += L2_TABLE_SIZE; } /* This should never be able to happen but better confirm that. */ - if (!kernel_l1pt.pv_pa || (kernel_l1pt.pv_pa & (L1_TABLE_SIZE-1)) != 0) + if (!kernel_l1pt.pv_pa || (kl1pt & (L1_TABLE_SIZE - 1)) != 0) panic("initarm: Failed to align the kernel page directory"); /* @@ -558,8 +587,6 @@ initarm(void *arg0, void *arg1, void *arg2, paddr_t loadaddr) valloc_pages(undstack, UND_STACK_SIZE); valloc_pages(kernelstack, UPAGES); - /* Allocate enough pages for cleaning the Mini-Data cache. */ - #ifdef VERBOSE_INIT_ARM printf("IRQ stack: p0x%08lx v0x%08lx\n", irqstack.pv_pa, irqstack.pv_va); @@ -572,11 +599,15 @@ initarm(void *arg0, void *arg1, void *arg2, paddr_t loadaddr) #endif /* - * Allocate pages for an FDT copy. + * Allocate pages for FDT, copy it there, and zero the original. */ size = fdt_get_size(config); valloc_pages(fdt, round_page(size) / PAGE_SIZE); memcpy((void *)fdt.pv_pa, config, size); + memset(config, 0, size); + + /* Now we must reinit the FDT, using the virtual address. */ + fdt_init((void *)fdt.pv_va); /* * XXX Defer this to later so that we can reclaim the memory @@ -589,7 +620,7 @@ initarm(void *arg0, void *arg1, void *arg2, paddr_t loadaddr) */ #ifdef VERBOSE_INIT_ARM - printf("Creating L1 page table at 0x%08lx\n", kernel_l1pt.pv_pa); + printf("Creating L1 page table at %#8lx\n", kernel_l1pt.pv_pa); #endif /* @@ -597,18 +628,17 @@ initarm(void *arg0, void *arg1, void *arg2, paddr_t loadaddr) * We start by mapping the L2 page tables into the L1. * This means that we can replace L1 mappings later on if necessary */ - l1pagetable = kernel_l1pt.pv_pa; /* Map the L2 pages tables in the L1 page table */ - pmap_link_l2pt(l1pagetable, vector_page & ~(0x00400000 - 1), + pmap_link_l2pt(kl1pt, vector_page & ~(0x00400000 - 1), &kernel_pt_table[KERNEL_PT_SYS]); for (loop = 0; loop < KERNEL_PT_KERNEL_NUM; loop++) - pmap_link_l2pt(l1pagetable, KERNEL_BASE + loop * 0x00400000, + pmap_link_l2pt(kl1pt, KERNEL_BASE + loop * 0x00400000, &kernel_pt_table[KERNEL_PT_KERNEL + loop]); for (loop = 0; loop < KERNEL_PT_VMDATA_NUM; loop++) - pmap_link_l2pt(l1pagetable, KERNEL_VM_BASE + loop * 0x00400000, + pmap_link_l2pt(kl1pt, KERNEL_VM_BASE + loop * 0x00400000, &kernel_pt_table[KERNEL_PT_VMDATA + loop]); /* update the top of the kernel VM */ @@ -631,10 +661,10 @@ initarm(void *arg0, void *arg1, void *arg2, paddr_t loadaddr) logical = 0x00000000; /* offset of kernel in RAM */ - logical += pmap_map_chunk(l1pagetable, KERNEL_BASE + logical, + logical += pmap_map_chunk(kl1pt, KERNEL_BASE + logical, loadaddr + logical, textsize, PROT_READ | PROT_EXEC, PTE_CACHE); - logical += pmap_map_chunk(l1pagetable, KERNEL_BASE + logical, + logical += pmap_map_chunk(kl1pt, KERNEL_BASE + logical, loadaddr + logical, totalsize - textsize, PROT_READ | PROT_WRITE, PTE_CACHE); } @@ -644,32 +674,30 @@ initarm(void *arg0, void *arg1, void *arg2, paddr_t loadaddr) #endif /* Map the stack pages */ - pmap_map_chunk(l1pagetable, irqstack.pv_va, irqstack.pv_pa, + pmap_map_chunk(kl1pt, irqstack.pv_va, irqstack.pv_pa, IRQ_STACK_SIZE * PAGE_SIZE, PROT_READ | PROT_WRITE, PTE_CACHE); - pmap_map_chunk(l1pagetable, abtstack.pv_va, abtstack.pv_pa, + pmap_map_chunk(kl1pt, abtstack.pv_va, abtstack.pv_pa, ABT_STACK_SIZE * PAGE_SIZE, PROT_READ | PROT_WRITE, PTE_CACHE); - pmap_map_chunk(l1pagetable, undstack.pv_va, undstack.pv_pa, + pmap_map_chunk(kl1pt, undstack.pv_va, undstack.pv_pa, UND_STACK_SIZE * PAGE_SIZE, PROT_READ | PROT_WRITE, PTE_CACHE); - pmap_map_chunk(l1pagetable, kernelstack.pv_va, kernelstack.pv_pa, + pmap_map_chunk(kl1pt, kernelstack.pv_va, kernelstack.pv_pa, UPAGES * PAGE_SIZE, PROT_READ | PROT_WRITE, PTE_CACHE); - pmap_map_chunk(l1pagetable, kernel_l1pt.pv_va, kernel_l1pt.pv_pa, + pmap_map_chunk(kl1pt, kernel_l1pt.pv_va, kernel_l1pt.pv_pa, L1_TABLE_SIZE, PROT_READ | PROT_WRITE, PTE_PAGETABLE); for (loop = 0; loop < NUM_KERNEL_PTS; ++loop) { - pmap_map_chunk(l1pagetable, kernel_pt_table[loop].pv_va, + pmap_map_chunk(kl1pt, kernel_pt_table[loop].pv_va, kernel_pt_table[loop].pv_pa, L2_TABLE_SIZE, PROT_READ | PROT_WRITE, PTE_PAGETABLE); } - /* Map the Mini-Data cache clean area. */ - /* Map the vector page. */ - pmap_map_entry(l1pagetable, vector_page, systempage.pv_pa, + pmap_map_entry(kl1pt, vector_page, systempage.pv_pa, PROT_READ | PROT_WRITE, PTE_CACHE); /* Map the FDT. */ - pmap_map_chunk(l1pagetable, fdt.pv_va, fdt.pv_pa, + pmap_map_chunk(kl1pt, fdt.pv_va, fdt.pv_pa, round_page(fdt_get_size((void *)fdt.pv_pa)), PROT_READ | PROT_WRITE, PTE_CACHE); @@ -677,14 +705,14 @@ initarm(void *arg0, void *arg1, void *arg2, paddr_t loadaddr) * map integrated peripherals at same address in l1pagetable * so that we can continue to use console. */ - copy_io_area_map((pd_entry_t *)l1pagetable); + copy_io_area_map((pd_entry_t *)kl1pt); /* * Now we have the real page tables in place so we can switch to them. * Once this is done we will be running with the REAL kernel page * tables. */ - setttb(kernel_l1pt.pv_pa); + setttb(kl1pt); cpu_tlb_flushID(); /* @@ -726,43 +754,12 @@ initarm(void *arg0, void *arg1, void *arg2, paddr_t loadaddr) prefetch_abort_handler_address = (u_int)prefetch_abort_handler; undefined_handler_address = (u_int)undefinedinstruction_bounce; - /* Now we can reinit the FDT, using the virtual address. */ - fdt_init((void *)fdt.pv_va); - /* Initialise the undefined instruction handlers */ #ifdef VERBOSE_INIT_ARM printf("undefined "); #endif undefined_init(); - /* Load memory into UVM. */ -#ifdef VERBOSE_INIT_ARM - printf("page "); -#endif - uvm_setpagesize(); /* initialize PAGE_SIZE-dependent variables */ - uvm_page_physload(atop(physical_freestart), atop(physical_freeend), - atop(physical_freestart), atop(physical_freeend), 0); - - if (physical_start < loadaddr) { - uvm_page_physload(atop(physical_start), atop(loadaddr), - atop(physical_start), atop(loadaddr), 0); - physsegs--; - } - - node = fdt_find_node("/memory"); - for (i = 1; i < physsegs; i++) { - if (fdt_get_reg(node, i, ®)) - break; - if (reg.size == 0) - continue; - - memstart = reg.addr; - memend = MIN(reg.addr + reg.size, (paddr_t)-PAGE_SIZE); - physmem += (memend - memstart) / PAGE_SIZE; - uvm_page_physload(atop(memstart), atop(memend), - atop(memstart), atop(memend), 0); - } - /* Boot strap pmap telling it where the kernel page table is */ #ifdef VERBOSE_INIT_ARM printf("pmap "); diff --git a/sys/arch/armv7/armv7/locore0.S b/sys/arch/armv7/armv7/locore0.S index 2a4e98cbe8c..69648d696e9 100644 --- a/sys/arch/armv7/armv7/locore0.S +++ b/sys/arch/armv7/armv7/locore0.S @@ -134,7 +134,15 @@ _C_LABEL(bootstrap_start): orr r3, r3, r9 str r2, [r4, #4] str r3, [r4, #8] - str r3, [r4, #0x14] // ram address for 0xc0000000 + str r3, [r4, #(12+8)] /* ram address for 0xc0000000 */ + + mov r1, r8, lsr #20 + mov r2, r1, lsl #2 + str r2, [r4, #(24 + 4)] + mov r1, r2, lsl #18 + ldr r3, [r4, #(24 + 8)] + orr r3, r3, r1 + str r3, [r4, #(24 + 8)] /* * the first entry has two fields that need to be updated for @@ -191,6 +199,9 @@ mmu_init_table: /* map VA 0xc0000000..0xc3ffffff to PA */ MMU_INIT(0xc0000000, 0x00000000, 64, L1_TYPE_S|L1_S_C|L1_S_V7_AP(AP_KRW)|L1_S_V7_AF) + /* map SDRAM VA==PA, WT cacheable @ r2 = fdt */ + MMU_INIT(0x00000000, 0x00000000, 4, + L1_TYPE_S|L1_S_C|L1_S_V7_AP(AP_KRW)|L1_S_V7_AF) .word 0 /* end of table */