This has been used to boot a full Linux kernel environment
to multiuser.

Change-Id: I12c399aafd30cdf05208831ca30129753f027f2a
Signed-off-by: Ronald G. Minnich <[email protected]>
---
 tests/vmm/Makefrag      |   2 +-
 tests/vmm/vmrunkernel.c | 192 +++++++++++++++++++++++++++++++++++-------------
 2 files changed, 142 insertions(+), 52 deletions(-)

diff --git a/tests/vmm/Makefrag b/tests/vmm/Makefrag
index 4208f63..49eb20f 100644
--- a/tests/vmm/Makefrag
+++ b/tests/vmm/Makefrag
@@ -4,7 +4,7 @@ VMM_TESTS_CFLAGS += $(CFLAGS_TESTS)
 
 ALL_VMM_TEST_FILES := $(wildcard $(VMM_TESTS_DIR)/*.c)
 
-VMM_TESTS_LDLIBS := $(TESTS_LDLIBS)
+VMM_TESTS_LDLIBS := $(TESTS_LDLIBS) -lelf
 
 VMM_TESTS_SRCS := $(ALL_VMM_TEST_FILES)
 
diff --git a/tests/vmm/vmrunkernel.c b/tests/vmm/vmrunkernel.c
index ea2ac5a..5f2787f 100644
--- a/tests/vmm/vmrunkernel.c
+++ b/tests/vmm/vmrunkernel.c
@@ -6,7 +6,9 @@
 #include <parlib/arch/arch.h>
 #include <parlib/ros_debug.h>
 #include <unistd.h>
+#include <gelf.h>
 #include <errno.h>
+#include <libelf.h>
 #include <dirent.h>
 #include <stdlib.h>
 #include <string.h>
@@ -36,6 +38,9 @@
 #include <sys/eventfd.h>
 #include <sys/uio.h>
 
+#define DBG(format, ...) if (debug)\
+                               fprintf(stderr, format, __VA_ARGS__)
+
 struct virtual_machine local_vm, *vm = &local_vm;
 
 struct vmm_gpcore_init gpci;
@@ -292,13 +297,13 @@ static uint8_t acpi_tb_checksum(uint8_t *buffer, uint32_t 
length)
 {
        uint8_t sum = 0;
        uint8_t *end = buffer + length;
-       fprintf(stderr, "tbchecksum %p for %d", buffer, length);
+       DBG("tbchecksum %p for %d", buffer, length);
        while (buffer < end) {
-               if (end - buffer < 2)
+               if (0 && end - buffer < 2)
                        fprintf(stderr, "%02x\n", sum);
                sum = (uint8_t)(sum + *(buffer++));
        }
-       fprintf(stderr, " is %02x\n", sum);
+       DBG(" is %02x\n", sum);
        return (sum);
 }
 
@@ -307,11 +312,11 @@ static void gencsum(uint8_t *target, void *data, int len)
        uint8_t csum;
        // blast target to zero so it does not get counted
        // (it might be in the struct we checksum) And, yes, it is, goodness.
-       fprintf(stderr, "gencsum %p target %p source %d bytes\n", target, data, 
len);
+       DBG("gencsum %p target %p source %d bytes\n", target, data, len);
        *target = 0;
        csum  = acpi_tb_checksum((uint8_t *)data, len);
        *target = ~csum + 1;
-       fprintf(stderr, "Cmoputed is %02x\n", *target);
+       DBG("ACPI: genchecksum: Computed is %02x\n", *target);
 }
 
 static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
@@ -324,15 +329,108 @@ static inline int test_and_set_bit(int nr, volatile 
unsigned long *addr)
        return oldbit;
 }
 
-static void pir_dump()
+/* load_kernel loads an ELF file as a kernel. */
+uintptr_t
+load_kernel(char *filename, uintptr_t *kernstart, uintptr_t *kernend)
 {
-       unsigned long *pir_ptr = gpci.posted_irq_desc;
-       int i;
-       fprintf(stderr, "-------Begin PIR dump-------\n");
-       for (i = 0; i < 8; i++){
-               fprintf(stderr, "Byte %d: 0x%016x\n", i, pir_ptr[i]);
+       Elf64_Ehdr *ehdr;
+       Elf *elf;
+       size_t phnum = 0;
+       Elf64_Phdr *hdrs;
+       int fd;
+
+       elf_version(EV_CURRENT);
+       fd = open(filename, O_RDONLY);
+       if (fd < 0) {
+               fprintf(stderr, "Can't open %s: %r\n", filename);
+               return 0;
+       }
+
+       elf = elf_begin(fd, ELF_C_READ, NULL);
+       if (elf == NULL) {
+               fprintf(stderr, "%s: cannot read %s ELF file.\n", __func__, 
filename);
+               close(fd);
+               return 0;
+       }
+
+       ehdr = elf64_getehdr(elf);
+       if (ehdr == NULL) {
+               fprintf(stderr, "%s: cannot get exec header of %s.\n",
+                       __func__, filename);
+               goto fail;
+       }
+       fprintf(stderr, "%s ELF entry point is %p\n", filename, ehdr->e_entry);
+
+       if (elf_getphdrnum(elf, &phnum) < 0) {
+               fprintf(stderr, "%s: cannot get program header num of %s.\n",
+                       __func__, filename);
+               goto fail;
        }
-       fprintf(stderr, "-------End PIR dump-------\n");
+       fprintf(stderr, "%s has %d program headers\n", filename, phnum);
+
+       hdrs = elf64_getphdr(elf);
+       if (hdrs == NULL) {
+               fprintf(stderr, "%s: cannot get program headers of %s.\n",
+                       __func__, filename);
+               goto fail;
+       }
+
+       for (int i = 0; i < phnum; i++) {
+               size_t tot;
+               Elf64_Phdr *h = &hdrs[i];
+               uintptr_t pa;
+
+               fprintf(stderr,
+                       "%d: type 0x%lx flags 0x%lx  offset 0x%lx vaddr 0x%lx 
paddr 0x%lx size 0x%lx  memsz 0x%lx align 0x%lx\n",
+                       i,
+                       h->p_type,              /* Segment type */
+                       h->p_flags,             /* Segment flags */
+                       h->p_offset,            /* Segment file offset */
+                       h->p_vaddr,             /* Segment virtual address */
+                       h->p_paddr,             /* Segment physical address */
+                       h->p_filesz,            /* Segment size in file */
+                       h->p_memsz,             /* Segment size in memory */
+                       h->p_align              /* Segment alignment */);
+               if (h->p_type != PT_LOAD)
+                       continue;
+               if ((h->p_flags & (PF_R|PF_W|PF_X)) == 0)
+                       continue;
+
+               /* we do the memset purely to ensure everything gets paged in. 
*/
+               /* compute the offset from the desired address. */
+               /* this ONLY works now if kernaddr > h->p_paddr */
+               pa = h->p_paddr;
+               memset((void *)pa, 0, h->p_memsz);
+               if (*kernstart > pa)
+                       *kernstart = pa;
+               if (*kernend < pa+h->p_memsz)
+                       *kernend = pa+h->p_memsz;
+               fprintf(stderr,
+                       "Read header %d @offset %p to %p (elf PA is %p) %d 
bytes:",
+                       i, h->p_offset, pa, h->p_paddr, h->p_filesz);
+               tot = 0;
+               while (tot < h->p_filesz) {
+                       int amt = pread(fd, (void *)(pa + tot), h->p_filesz - 
tot,
+                                       h->p_offset + tot);
+                       if (amt < 1)
+                               break;
+                       tot += amt;
+               }
+               fprintf(stderr, "read a total of %d bytes\n", tot);
+               if (tot < h->p_filesz) {
+                       fprintf(stderr, "%s: got %d bytes, wanted %d bytes\n",
+                               filename, tot, h->p_filesz);
+                       goto fail;
+               }
+       }
+
+       close(fd);
+       elf_end(elf);
+       return ehdr->e_entry;
+ fail:
+       close(fd);
+       elf_end(elf);
+       return 0;
 }
 
 int main(int argc, char **argv)
@@ -347,11 +445,9 @@ int main(int argc, char **argv)
        struct acpi_table_fadt *f;
        struct acpi_table_madt *m;
        struct acpi_table_xsdt *x;
-       // lowmem is a bump allocated pointer to 2M at the "physbase" of memory
-       void *lowmem = (void *) 0x1000000;
        int amt;
        int vmmflags = 0; // Disabled probably forever. VMM_VMCALL_PRINTF;
-       uint64_t entry = 0x1200000, kerneladdress = 0x1200000;
+       uint64_t entry = 0;
        int ret;
        uintptr_t size;
        void * xp;
@@ -370,6 +466,7 @@ int main(int argc, char **argv)
        struct stat stat_result;
        int num_read;
        int option_index;
+       uintptr_t kernstart = (uintptr_t)~1, kernend = 0;
        static struct option long_options[] = {
                {"debug",         no_argument,       0, 'd'},
                {"vmm_vmcall",    no_argument,       0, 'v'},
@@ -394,7 +491,6 @@ int main(int argc, char **argv)
                exit(1);
        }
        memset(_kernel, 0, sizeof(_kernel));
-       memset(lowmem, 0xff, 2*1048576);
        vm->low4k = malloc(PGSIZE);
        memset(vm->low4k, 0xff, PGSIZE);
        vm->low4k[0x40e] = 0;
@@ -492,33 +588,16 @@ int main(int argc, char **argv)
                fprintf(stderr, "Usage: %s vmimage [-n (no vmcall printf)] 
[coreboot_tables [loadaddress [entrypoint]]]\n", argv[0]);
                exit(1);
        }
+
        if (argc > 1)
                coreboot_tables = (void *) strtoull(argv[1], 0, 0);
-       if (argc > 2)
-               kerneladdress = strtoull(argv[2], 0, 0);
-       if (argc > 3)
-               entry = strtoull(argv[3], 0, 0);
-       kfd = open(argv[0], O_RDONLY);
-       if (kfd < 0) {
-               perror(argv[0]);
+
+       entry = load_kernel(argv[0], &kernstart, &kernend);
+       if (entry == 0) {
+               fprintf(stderr, "Unable to load kernel %s\n", argv[0]);
                exit(1);
        }
-       // read in the kernel, one 2M page at a time.
-       xp = (void *)kerneladdress;
-       for(;;) {
-               amt = read(kfd, xp, PML2_PTE_REACH);
-               if (amt < 0) {
-                       perror("read");
-                       exit(1);
-               }
-               if (amt == 0) {
-                       break;
-               }
-               xp += amt;
-       }
-       size = ROUNDUP((uintptr_t)xp - kerneladdress, PML2_PTE_REACH);
-       fprintf(stderr, "Read in %d bytes\n", size);
-       close(kfd);
+
 
        // The low 1m so we can fill in bullshit like ACPI. */
        // And, sorry, due to the STUPID format of the RSDP for now we need the 
low 1M.
@@ -530,7 +609,7 @@ int main(int argc, char **argv)
        }
        memset(low1m, 0xff, MiB-4096);
        r = a;
-       fprintf(stderr, "install rsdp to %p\n", r);
+       DBG("install rsdp to %p\n", r);
        *r = rsdp;
        a += sizeof(*r);
        r->xsdt_physical_address = (uint64_t)a;
@@ -553,21 +632,21 @@ int main(int argc, char **argv)
        x = a;
        a += sizeof(*x) + 8*sizeof(void *);
        memset(x, 0, a - (void *)x);
-       fprintf(stderr, "install xsdt to %p\n", x);
+       DBG("install xsdt to %p\n", x);
        *x = xsdt;
        x->table_offset_entry[0] = 0;
        x->table_offset_entry[1] = 0;
        x->header.length = a - (void *)x;
 
        f = a;
-       fprintf(stderr, "install fadt to %p\n", f);
+       DBG("install fadt to %p\n", f);
        *f = fadt;
        x->table_offset_entry[0] = (uint64_t)f; // fadt MUST be first in xsdt!
        a += sizeof(*f);
        f->header.length = a - (void *)f;
 
        f->Xdsdt = (uint64_t) a;
-       fprintf(stderr, "install dsdt to %p\n", a);
+       DBG("install dsdt to %p\n", a);
        memcpy(a, &DSDT_DSDTTBL_Header, 36);
        a += 36;
 
@@ -581,7 +660,7 @@ int main(int argc, char **argv)
        *m = madt;
        x->table_offset_entry[3] = (uint64_t) m;
        a += sizeof(*m);
-       fprintf(stderr, "install madt to %p\n", m);
+       DBG("install madt to %p\n", m);
        memmove(a, &Apic0, sizeof(Apic0));
        a += sizeof(Apic0);
        memmove(a, &Apic1, sizeof(Apic1));
@@ -606,7 +685,10 @@ int main(int argc, char **argv)
 
        fprintf(stderr, "allchecksums ok\n");
 
-       hexdump(stdout, r, a-(void *)r);
+       if (debug) {
+               fprintf(stderr, "ACPI tables:\n");
+               hexdump(stderr, r, a-(void *)r);
+       }
 
        a = (void *)(((unsigned long)a + 0xfff) & ~0xfff);
        gpci.posted_irq_desc = a;
@@ -722,7 +804,10 @@ int main(int argc, char **argv)
        vm->nr_gpcs = 1;
        vm->gpcis = &gpci;
        ret = vmm_init(vm, vmmflags);
-       assert(!ret);
+       if (ret) {
+               fprintf(stderr, "vmm_init failed, returns %d: %r\n", ret);
+               assert(!ret);
+       }
 
        /* Allocate 3 pages for page table pages: a page of 512 GiB
         * PTEs with only one entry filled to point to a page of 1 GiB
@@ -746,16 +831,21 @@ int main(int argc, char **argv)
        p1 = &p512[NPTENTRIES];
        p2m = &p512[2 * NPTENTRIES];
 
-       p512[PML4(kerneladdress)] = (uint64_t)p1 | PTE_KERN_RW;
-       p1[PML3(kerneladdress)] = (uint64_t)p2m | PTE_KERN_RW;
+       size = kernend - kernstart;
+       fprintf(stderr, "Map %p for %zu bytes\n", kernstart, size);
+       p512[PML4(kernstart)] = (uint64_t)p1 | PTE_KERN_RW;
+       p1[PML3(kernstart)] = (uint64_t)p2m | PTE_KERN_RW;
        for (uintptr_t i = 0; i < size; i += PML2_PTE_REACH) {
-               p2m[PML2(kerneladdress + i)] =
-                   (uint64_t)(kerneladdress + i) | PTE_KERN_RW | PTE_PS;
+               p2m[PML2(kernstart + i)] =
+                   (uint64_t)(kernstart + i) | PTE_KERN_RW | PTE_PS;
        }
 
        uint8_t *kernel = (void *)GKERNBASE;
        //write_coreboot_table(coreboot_tables, ((void *)VIRTIOBASE) 
/*kernel*/, KERNSIZE + 1048576);
-       hexdump(stdout, coreboot_tables, 512);
+       if (debug) {
+               fprintf(stderr, "Coreboot tables\n");
+               hexdump(stderr, coreboot_tables, 512);
+       }
        fprintf(stderr, "p512 %p p512[0] is 0x%lx p1 %p p1[0] is 0x%x\n", p512, 
p512[0], p1, p1[0]);
 
        vmm_run_task(vm, timer_thread, 0);
-- 
2.8.0.rc3.226.g39d4020

-- 
You received this message because you are subscribed to the Google Groups 
"Akaros" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To post to this group, send email to [email protected].
For more options, visit https://groups.google.com/d/optout.

Reply via email to