On Wed, Jul 05, 2017 at 11:27:06AM +0200, Mark Kettenis wrote:
> > Date: Wed, 5 Jul 2017 09:34:59 +0300
> > From: Artturi Alm <artturi....@gmail.com>
> > 
> > On Wed, Jul 05, 2017 at 02:27:46AM +0300, Artturi Alm wrote:
> > > Hi,
> > > 
> > > instead of messing w/bs_tags, use the fact pmap_kernel()->pm_refs is going
> > > to be 0 until pmap_bootstrap() has ran. tmp_bs_tag was unused, and
> > > bootstrap_bs_map doesn't need/use the void *t-arg when being ran 
> > > indirectly
> > > via armv7_bs_map().
> > > 
> > > the whole existence of bootstrap_bs_map is another story, and the comment 
> > > in
> > > /* Now, map the FDT area. */ is somewhat an stupid excuse, it's already 
> > > mapped
> > > before initarm() w/VA=PA, and could well be _init()&_get_size()'d & 
> > > memcpy'ed
> > > somewhere in reach within bootstrap KVA, guess diff might follow for that,
> > > too, if anyone has time for these simplifications.
> > > 
> > 
> > Ok, i was wrong ^there, and the bootstrap code before initarm() didn't fill
> > the L1 w/VA=PA anymore, for reasons i don't understand, so i 'fixed' it,
> > with diff below. tested to boot and eeprom -p normally on cubie2 and wandb.
> > 
> > i kept the diff minimal, to the point it does fdt_get_size() twice just like
> > before, which i don't like, nor the name of size-variable and what not, but
> > minimal it is. Would be the first step towards earlier physmem load :)
> > 
> > -Artturi
> 
> What are you trying to achieve heree?
> 
> The current code quite deliberately does not create a cachable 1:1
> mapping for the entire address space.  Such a mapping is dangerous as
> the CPU might speculatively load from any valid mapping and that is a
> terrible idea for device mappings.
> 

Point taken, and adapted the diff to map only 4mb at the expected fdt pa. So
something like below, guess you read the one mail in this thread w/o diff
in it, ofc. the aim is really higher, make arm/armv7 more consistent/
readable/structured/cleaned/ all around, hoping it will make maintenance
and future innovations easier or something, now stop worrying, i'm not
NIH-patient about to design a new wheel or anything xD.

Diff below is still rather raw, tested to boot and build a new kernel
while running the diff correctly on sxi&imx, unfortunately the diff has a few
unnecessary things in it, but the purpose of this is just to show the kind of
things rather small reorganizing could bring.

been up +24hrs, and might have had a few too long streches hacking w/o
turning on the windows vm for a game or anything, so any stupid mistakes
are because of that, i usually take a break atleast every 90mins or so:)

And forgive the stupid ugly printf()s in _bs_valloc(), i forgot, and am
already late from where i was supposed to be now, 'til later o/
-Artturi



diff --git a/sys/arch/armv7/armv7/armv7_machdep.c 
b/sys/arch/armv7/armv7/armv7_machdep.c
index aa1c549b29b..12eac8acc00 100644
--- a/sys/arch/armv7/armv7/armv7_machdep.c
+++ b/sys/arch/armv7/armv7/armv7_machdep.c
@@ -151,11 +151,6 @@ char *boot_args = NULL;
 char *boot_file = "";
 u_int cpu_reset_address = 0;
 
-vaddr_t physical_start;
-vaddr_t physical_freestart;
-vaddr_t physical_freeend;
-vaddr_t physical_end;
-u_int free_pages;
 int physmem = 0;
 
 /*int debug_flags;*/
@@ -356,6 +351,31 @@ copy_io_area_map(pd_entry_t *new_pd)
        }
 }
 
+static inline paddr_t
+_bs_alloc(size_t sz)
+{
+       paddr_t addr, pa = 0;
+
+       for (sz = round_page(sz); sz > 0; sz -= PAGE_SIZE) {
+               if (uvm_page_physget(&addr) == FALSE)
+                       panic("uvm_page_physget() failed");
+               memset((char *)addr, 0, PAGE_SIZE);
+               if (pa == 0)
+                       pa = addr;
+       }
+       return pa;
+}
+
+#define        _BS_RPA2VA(x, y)        (KERNEL_BASE + (x) - (y))
+static inline void
+_bs_valloc(pv_addr_t *pv, vsize_t sz, paddr_t off)
+{
+       printf("_bs_valloc: pv %p sz %#8lx off %#8lx\n", pv, sz, off);
+       pv->pv_pa = _bs_alloc(sz);
+       pv->pv_va = _BS_RPA2VA(pv->pv_pa, off);
+       printf("\tpv_pa %#8lx pv_va %#8lx\n", pv->pv_pa, pv->pv_va);
+}
+
 /*
  * u_int initarm(...)
  *
@@ -371,15 +391,14 @@ copy_io_area_map(pd_entry_t *new_pd)
 u_int
 initarm(void *arg0, void *arg1, void *arg2, paddr_t loadaddr)
 {
-       int loop, loop1, i, physsegs = VM_PHYSSEG_MAX;
-       u_int l1pagetable;
+       int loop, i, physsegs = VM_PHYSSEG_MAX;
        pv_addr_t kernel_l1pt;
        pv_addr_t fdt;
        struct fdt_reg reg;
-       paddr_t memstart;
-       psize_t memsize;
-       paddr_t memend;
-       void *config;
+       vaddr_t physical_start, physical_freestart, physical_end;
+       paddr_t kl1pt, klxpt_areap;
+       u_int free_pages;
+       void *config = arg2;
        size_t size;
        void *node;
        extern uint32_t esym; /* &_end if no symbols are loaded */
@@ -420,18 +439,8 @@ initarm(void *arg0, void *arg1, void *arg2, paddr_t 
loadaddr)
        tmp_bs_tag.bs_map = bootstrap_bs_map;
 
        /*
-        * Now, map the FDT area.
-        *
-        * As we don't know the size of a possible FDT, map the size of a
-        * typical bootstrap bs map.  The FDT might not be aligned, so this
-        * might take up to two L1_S_SIZEd mappings.
-        *
-        * XXX: There's (currently) no way to unmap a bootstrap mapping, so
-        * we might lose a bit of the bootstrap address space.
+        * Now, init the FDT @ PA, reloc and reinit to KVA later.
         */
-       bootstrap_bs_map(NULL, (bus_addr_t)arg2, L1_S_SIZE, 0,
-           (bus_space_handle_t *)&config);
-
        if (!fdt_init(config) || fdt_get_size(config) == 0)
                panic("initarm: no FDT");
 
@@ -453,8 +462,6 @@ initarm(void *arg0, void *arg1, void *arg2, paddr_t 
loadaddr)
        if (node == NULL || fdt_get_reg(node, 0, &reg))
                panic("initarm: no memory specificed");
 
-       memstart = reg.addr;
-       memsize = reg.size;
        physical_start = reg.addr;
        physical_end = MIN(reg.addr + reg.size, (paddr_t)-PAGE_SIZE);
 
@@ -472,11 +479,39 @@ initarm(void *arg0, void *arg1, void *arg2, paddr_t 
loadaddr)
        boothowto |= RB_DFLTROOT;
 #endif /* RAMDISK_HOOKS */
 
-       physical_freestart = (((unsigned long)esym - KERNEL_TEXT_BASE + 0xfff) 
& ~0xfff) + loadaddr;
-       physical_freeend = MIN((uint64_t)physical_end, (paddr_t)-PAGE_SIZE);
+       physical_freestart = round_page(esym - KERNEL_TEXT_BASE) + loadaddr;
 
        physmem = (physical_end - physical_start) / PAGE_SIZE;
 
+       /* Load memory into UVM. */
+#ifdef VERBOSE_INIT_ARM
+       printf("page ");
+#endif
+       uvm_setpagesize();        /* initialize PAGE_SIZE-dependent variables */
+       uvm_page_physload(atop(physical_freestart), atop(physical_end),
+           atop(physical_freestart), atop(physical_end), 0);
+
+       if (physical_start < loadaddr) {
+               uvm_page_physload(atop(physical_start), atop(loadaddr),
+                   atop(physical_start), atop(loadaddr), 0);
+               physsegs--;
+       }
+
+       for (i = 1; i < physsegs; i++) {
+               paddr_t memstart, memend;
+
+               if (fdt_get_reg(node, i, &reg))
+                       break;
+               if (reg.size == 0)
+                       continue;
+
+               memstart = reg.addr;
+               memend = MIN(reg.addr + reg.size, (paddr_t)-PAGE_SIZE);
+               physmem += atop(memend - memstart);
+               uvm_page_physload(atop(memstart), atop(memend),
+                   atop(memstart), atop(memend), 0);
+       }
+
 #ifdef DEBUG
        /* Tell the user about the memory */
        printf("physmemory: %d pages at 0x%08lx -> 0x%08lx\n", physmem,
@@ -505,7 +540,7 @@ initarm(void *arg0, void *arg1, void *arg2, paddr_t 
loadaddr)
        printf("Allocating page tables\n");
 #endif
 
-       free_pages = (physical_freeend - physical_freestart) / PAGE_SIZE;
+       free_pages = atop(physical_end - physical_freestart);
 
 #ifdef VERBOSE_INIT_ARM
        printf("freestart = 0x%08lx, free_pages = %d (0x%08x)\n",
@@ -513,34 +548,28 @@ initarm(void *arg0, void *arg1, void *arg2, paddr_t 
loadaddr)
 #endif
 
        /* Define a macro to simplify memory allocation */
-#define        valloc_pages(var, np)                           \
-       alloc_pages((var).pv_pa, (np));                 \
-       (var).pv_va = KERNEL_BASE + (var).pv_pa - loadaddr;
-
-#define alloc_pages(var, np)                           \
-       (var) = physical_freestart;                     \
-       physical_freestart += ((np) * PAGE_SIZE);       \
-       if (physical_freeend < physical_freestart)      \
-               panic("initarm: out of memory");        \
-       free_pages -= (np);                             \
-       memset((char *)(var), 0, ((np) * PAGE_SIZE));
-
-       loop1 = 0;
-       kernel_l1pt.pv_pa = 0;
-       for (loop = 0; loop <= NUM_KERNEL_PTS; ++loop) {
+#define        valloc_pages(var, np)   _bs_valloc(&(var), ptoa((np)), loadaddr)
+#define alloc_pages(var, np)   (var) = _bs_alloc(ptoa((np)))
+
+       kernel_l1pt.pv_pa = kl1pt = 0;
+       size = ptoa(NUM_KERNEL_PTS) + L1_TABLE_SIZE;
+       klxpt_areap = _bs_alloc(round_page(size));
+       for (i = 0; i < NUM_KERNEL_PTS; i++) {
                /* Are we 16KB aligned for an L1 ? */
-               if (((physical_freestart) & (L1_TABLE_SIZE - 1)) == 0
-                   && kernel_l1pt.pv_pa == 0) {
-                       valloc_pages(kernel_l1pt, L1_TABLE_SIZE / PAGE_SIZE);
-               } else {
-                       valloc_pages(kernel_pt_table[loop1],
-                           L2_TABLE_SIZE / PAGE_SIZE);
-                       ++loop1;
+               if ((klxpt_areap & (L1_TABLE_SIZE - 1)) == 0 && kl1pt == 0) {
+                       kernel_l1pt.pv_pa = kl1pt = klxpt_areap;
+                       kernel_l1pt.pv_va = _BS_RPA2VA(klxpt_areap, loadaddr);
+                       klxpt_areap += L1_TABLE_SIZE;
+                       i--;
+                       continue;
                }
+               kernel_pt_table[i].pv_pa = klxpt_areap;
+               kernel_pt_table[i].pv_va = _BS_RPA2VA(klxpt_areap, loadaddr);
+               klxpt_areap += L2_TABLE_SIZE;
        }
 
        /* This should never be able to happen but better confirm that. */
-       if (!kernel_l1pt.pv_pa || (kernel_l1pt.pv_pa & (L1_TABLE_SIZE-1)) != 0)
+       if (!kernel_l1pt.pv_pa || (kl1pt & (L1_TABLE_SIZE - 1)) != 0)
                panic("initarm: Failed to align the kernel page directory");
 
        /*
@@ -558,8 +587,6 @@ initarm(void *arg0, void *arg1, void *arg2, paddr_t 
loadaddr)
        valloc_pages(undstack, UND_STACK_SIZE);
        valloc_pages(kernelstack, UPAGES);
 
-       /* Allocate enough pages for cleaning the Mini-Data cache. */
-
 #ifdef VERBOSE_INIT_ARM
        printf("IRQ stack: p0x%08lx v0x%08lx\n", irqstack.pv_pa,
            irqstack.pv_va);
@@ -572,11 +599,15 @@ initarm(void *arg0, void *arg1, void *arg2, paddr_t 
loadaddr)
 #endif
 
        /*
-        * Allocate pages for an FDT copy.
+        * Allocate pages for FDT, copy it there, and zero the original.
         */
        size = fdt_get_size(config);
        valloc_pages(fdt, round_page(size) / PAGE_SIZE);
        memcpy((void *)fdt.pv_pa, config, size);
+       memset(config, 0, size);
+
+       /* Now we must reinit the FDT, using the virtual address. */
+       fdt_init((void *)fdt.pv_va);
 
        /*
         * XXX Defer this to later so that we can reclaim the memory
@@ -589,7 +620,7 @@ initarm(void *arg0, void *arg1, void *arg2, paddr_t 
loadaddr)
         */
 
 #ifdef VERBOSE_INIT_ARM
-       printf("Creating L1 page table at 0x%08lx\n", kernel_l1pt.pv_pa);
+       printf("Creating L1 page table at %#8lx\n", kernel_l1pt.pv_pa);
 #endif
 
        /*
@@ -597,18 +628,17 @@ initarm(void *arg0, void *arg1, void *arg2, paddr_t 
loadaddr)
         * We start by mapping the L2 page tables into the L1.
         * This means that we can replace L1 mappings later on if necessary
         */
-       l1pagetable = kernel_l1pt.pv_pa;
 
        /* Map the L2 pages tables in the L1 page table */
-       pmap_link_l2pt(l1pagetable, vector_page & ~(0x00400000 - 1),
+       pmap_link_l2pt(kl1pt, vector_page & ~(0x00400000 - 1),
            &kernel_pt_table[KERNEL_PT_SYS]);
 
        for (loop = 0; loop < KERNEL_PT_KERNEL_NUM; loop++)
-               pmap_link_l2pt(l1pagetable, KERNEL_BASE + loop * 0x00400000,
+               pmap_link_l2pt(kl1pt, KERNEL_BASE + loop * 0x00400000,
                    &kernel_pt_table[KERNEL_PT_KERNEL + loop]);
 
        for (loop = 0; loop < KERNEL_PT_VMDATA_NUM; loop++)
-               pmap_link_l2pt(l1pagetable, KERNEL_VM_BASE + loop * 0x00400000,
+               pmap_link_l2pt(kl1pt, KERNEL_VM_BASE + loop * 0x00400000,
                    &kernel_pt_table[KERNEL_PT_VMDATA + loop]);
 
        /* update the top of the kernel VM */
@@ -631,10 +661,10 @@ initarm(void *arg0, void *arg1, void *arg2, paddr_t 
loadaddr)
 
                logical = 0x00000000;   /* offset of kernel in RAM */
 
-               logical += pmap_map_chunk(l1pagetable, KERNEL_BASE + logical,
+               logical += pmap_map_chunk(kl1pt, KERNEL_BASE + logical,
                    loadaddr + logical, textsize,
                    PROT_READ | PROT_EXEC, PTE_CACHE);
-               logical += pmap_map_chunk(l1pagetable, KERNEL_BASE + logical,
+               logical += pmap_map_chunk(kl1pt, KERNEL_BASE + logical,
                    loadaddr + logical, totalsize - textsize,
                    PROT_READ | PROT_WRITE, PTE_CACHE);
        }
@@ -644,32 +674,30 @@ initarm(void *arg0, void *arg1, void *arg2, paddr_t 
loadaddr)
 #endif
 
        /* Map the stack pages */
-       pmap_map_chunk(l1pagetable, irqstack.pv_va, irqstack.pv_pa,
+       pmap_map_chunk(kl1pt, irqstack.pv_va, irqstack.pv_pa,
            IRQ_STACK_SIZE * PAGE_SIZE, PROT_READ | PROT_WRITE, PTE_CACHE);
-       pmap_map_chunk(l1pagetable, abtstack.pv_va, abtstack.pv_pa,
+       pmap_map_chunk(kl1pt, abtstack.pv_va, abtstack.pv_pa,
            ABT_STACK_SIZE * PAGE_SIZE, PROT_READ | PROT_WRITE, PTE_CACHE);
-       pmap_map_chunk(l1pagetable, undstack.pv_va, undstack.pv_pa,
+       pmap_map_chunk(kl1pt, undstack.pv_va, undstack.pv_pa,
            UND_STACK_SIZE * PAGE_SIZE, PROT_READ | PROT_WRITE, PTE_CACHE);
-       pmap_map_chunk(l1pagetable, kernelstack.pv_va, kernelstack.pv_pa,
+       pmap_map_chunk(kl1pt, kernelstack.pv_va, kernelstack.pv_pa,
            UPAGES * PAGE_SIZE, PROT_READ | PROT_WRITE, PTE_CACHE);
 
-       pmap_map_chunk(l1pagetable, kernel_l1pt.pv_va, kernel_l1pt.pv_pa,
+       pmap_map_chunk(kl1pt, kernel_l1pt.pv_va, kernel_l1pt.pv_pa,
            L1_TABLE_SIZE, PROT_READ | PROT_WRITE, PTE_PAGETABLE);
 
        for (loop = 0; loop < NUM_KERNEL_PTS; ++loop) {
-               pmap_map_chunk(l1pagetable, kernel_pt_table[loop].pv_va,
+               pmap_map_chunk(kl1pt, kernel_pt_table[loop].pv_va,
                    kernel_pt_table[loop].pv_pa, L2_TABLE_SIZE,
                    PROT_READ | PROT_WRITE, PTE_PAGETABLE);
        }
 
-       /* Map the Mini-Data cache clean area. */
-
        /* Map the vector page. */
-       pmap_map_entry(l1pagetable, vector_page, systempage.pv_pa,
+       pmap_map_entry(kl1pt, vector_page, systempage.pv_pa,
            PROT_READ | PROT_WRITE, PTE_CACHE);
 
        /* Map the FDT. */
-       pmap_map_chunk(l1pagetable, fdt.pv_va, fdt.pv_pa,
+       pmap_map_chunk(kl1pt, fdt.pv_va, fdt.pv_pa,
            round_page(fdt_get_size((void *)fdt.pv_pa)),
            PROT_READ | PROT_WRITE, PTE_CACHE);
 
@@ -677,14 +705,14 @@ initarm(void *arg0, void *arg1, void *arg2, paddr_t 
loadaddr)
         * map integrated peripherals at same address in l1pagetable
         * so that we can continue to use console.
         */
-       copy_io_area_map((pd_entry_t *)l1pagetable);
+       copy_io_area_map((pd_entry_t *)kl1pt);
 
        /*
         * Now we have the real page tables in place so we can switch to them.
         * Once this is done we will be running with the REAL kernel page
         * tables.
         */
-       setttb(kernel_l1pt.pv_pa);
+       setttb(kl1pt);
        cpu_tlb_flushID();
 
        /*
@@ -726,43 +754,12 @@ initarm(void *arg0, void *arg1, void *arg2, paddr_t 
loadaddr)
        prefetch_abort_handler_address = (u_int)prefetch_abort_handler;
        undefined_handler_address = (u_int)undefinedinstruction_bounce;
 
-       /* Now we can reinit the FDT, using the virtual address. */
-       fdt_init((void *)fdt.pv_va);
-
        /* Initialise the undefined instruction handlers */
 #ifdef VERBOSE_INIT_ARM
        printf("undefined ");
 #endif
        undefined_init();
 
-       /* Load memory into UVM. */
-#ifdef VERBOSE_INIT_ARM
-       printf("page ");
-#endif
-       uvm_setpagesize();        /* initialize PAGE_SIZE-dependent variables */
-       uvm_page_physload(atop(physical_freestart), atop(physical_freeend),
-           atop(physical_freestart), atop(physical_freeend), 0);
-
-       if (physical_start < loadaddr) {
-               uvm_page_physload(atop(physical_start), atop(loadaddr),
-                   atop(physical_start), atop(loadaddr), 0);
-               physsegs--;
-       }
-
-       node = fdt_find_node("/memory");
-       for (i = 1; i < physsegs; i++) {
-               if (fdt_get_reg(node, i, &reg))
-                       break;
-               if (reg.size == 0)
-                       continue;
-
-               memstart = reg.addr;
-               memend = MIN(reg.addr + reg.size, (paddr_t)-PAGE_SIZE);
-               physmem += (memend - memstart) / PAGE_SIZE;
-               uvm_page_physload(atop(memstart), atop(memend),
-                   atop(memstart), atop(memend), 0);
-       }
-
        /* Boot strap pmap telling it where the kernel page table is */
 #ifdef VERBOSE_INIT_ARM
        printf("pmap ");
diff --git a/sys/arch/armv7/armv7/locore0.S b/sys/arch/armv7/armv7/locore0.S
index 2a4e98cbe8c..69648d696e9 100644
--- a/sys/arch/armv7/armv7/locore0.S
+++ b/sys/arch/armv7/armv7/locore0.S
@@ -134,7 +134,15 @@ _C_LABEL(bootstrap_start):
        orr     r3, r3, r9
        str     r2, [r4, #4]
        str     r3, [r4, #8]
-       str     r3, [r4, #0x14] // ram address for 0xc0000000
+       str     r3, [r4, #(12+8)]       /* ram address for 0xc0000000 */
+
+       mov     r1, r8, lsr #20
+       mov     r2, r1, lsl #2
+       str     r2, [r4, #(24 + 4)]
+       mov     r1, r2, lsl #18
+       ldr     r3, [r4, #(24 + 8)]
+       orr     r3, r3, r1
+       str     r3, [r4, #(24 + 8)]
 
        /*
         * the first entry has two fields that need to be updated for
@@ -191,6 +199,9 @@ mmu_init_table:
        /* map VA 0xc0000000..0xc3ffffff to PA */
        MMU_INIT(0xc0000000, 0x00000000, 64,
                 L1_TYPE_S|L1_S_C|L1_S_V7_AP(AP_KRW)|L1_S_V7_AF)
+       /* map SDRAM VA==PA, WT cacheable @ r2 = fdt */
+       MMU_INIT(0x00000000, 0x00000000, 4,
+                L1_TYPE_S|L1_S_C|L1_S_V7_AP(AP_KRW)|L1_S_V7_AF)
 
        .word 0 /* end of table */
 

Reply via email to