From: Waldemar Kozaczuk <jwkozac...@gmail.com>
Committer: Nadav Har'El <n...@scylladb.com>
Branch: master

Move kernel to 0x40200000 address (1 GiB higher) in virtual memory

This patch provides all necessary changes to move OSv kernel by 1 GiB higher
in virtual memory space to start at 0x40200000. Most changes involve adding
or subtracting 0x40000000 (OSV_KERNEL_VM_SHIFT) in all relevant places. Please
note that the kernel is still loaded at 2MiB in physical memory.

The motivation for this patch is to make as much space as possible (or just enough) in virtual memory to allow running unmodified Linux non-PIE executables (issue #190). Even though due to the advancement of ASLR more and more applications are PIEs (Position Independent Executables) which are pretty well supported by OSv, there are still many non-PIEs (Position Dependent Executables) that are out. The most prominent one is actualy JVM whose most distributions come with tiny (~20K) bootstrap java non-PIE executable. There are many other examples where small non-PIE executable loads
other shared libraries.

As issue #1043 explains there are at least 3 possible solutions and
this patch implements the 3rd last one described there. Please note that in future with little effort we could provide slightly beter scheme for OSV_KERNEL_VM_SHIFT that would allow us to place the kernel even higher at the end of the 2GiB limit (small memory model) and thus support virtually any non-PIE built using small memory model.

Due to its impact this patch has been tested on following hypervisors:
- QEMU without KVM
- QEMU with KVM
- Firecracker
- VirtualBox 6
- VMware Player
- XEN on EC2
- XEN locally in HVM mode

Fixes #1043

Signed-off-by: Waldemar Kozaczuk <jwkozac...@gmail.com>
Message-Id: <20190620040707.23249-1-jwkozac...@gmail.com>

---
diff --git a/Makefile b/Makefile
--- a/Makefile
+++ b/Makefile
@@ -312,7 +312,7 @@ gcc-sysroot = $(if $(CROSS_PREFIX), --sysroot external/$(arch)/gcc.bin) \
 # To add something that will *not* be part of the main kernel, you can do:
 #
 #   mydir/*.o EXTRA_FLAGS = <MY_STUFF>
-EXTRA_FLAGS = -D__OSV_CORE__ -DOSV_KERNEL_BASE=$(kernel_base) -DOSV_LZKERNEL_BASE=$(lzkernel_base) +EXTRA_FLAGS = -D__OSV_CORE__ -DOSV_KERNEL_BASE=$(kernel_base) -DOSV_KERNEL_VM_SHIFT=$(kernel_vm_shift) -DOSV_LZKERNEL_BASE=$(lzkernel_base)
 EXTRA_LIBS =
COMMON = $(autodepend) -g -Wall -Wno-pointer-arith $(CFLAGS_WERROR) -Wformat=0 -Wno-format-security \
        -D __BSD_VISIBLE=1 -U _FORTIFY_SOURCE -fno-stack-protector $(INCLUDES) \
@@ -421,6 +421,7 @@ ifeq ($(arch),x64)
 # lzkernel_base is where the compressed kernel is loaded from disk.
 kernel_base := 0x200000
 lzkernel_base := 0x100000
+kernel_vm_base := 0x40200000

 $(out)/arch/x64/boot16.o: $(out)/lzloader.elf
 $(out)/boot.bin: arch/x64/boot16.ld $(out)/arch/x64/boot16.o
@@ -480,6 +481,7 @@ endif # x64
 ifeq ($(arch),aarch64)

 kernel_base := 0x40080000
+kernel_vm_base := 0x40080000

 include $(libfdt_base)/Makefile.libfdt
 libfdt-source := $(patsubst %.c, $(libfdt_base)/%.c, $(LIBFDT_SRCS))
@@ -500,6 +502,8 @@ $(out)/loader.img: $(out)/preboot.bin $(out)/loader-stripped.elf

 endif # aarch64

+kernel_vm_shift := $(shell printf "0x%X" $(shell expr $$(( $(kernel_vm_base) - $(kernel_base) )) ))
+
$(out)/bsd/sys/crypto/rijndael/rijndael-api-fst.o: COMMON+=-fno-strict-aliasing
 $(out)/bsd/sys/crypto/sha2/sha2.o: COMMON+=-fno-strict-aliasing
 $(out)/bsd/sys/net/route.o: COMMON+=-fno-strict-aliasing
@@ -1873,6 +1877,7 @@ stage1: $(stage1_targets) links

 $(out)/loader.elf: $(stage1_targets) arch/$(arch)/loader.ld $(out)/bootfs.o
        $(call quiet, $(LD) -o $@ --defsym=OSV_KERNEL_BASE=$(kernel_base) \
+ --defsym=OSV_KERNEL_VM_BASE=$(kernel_vm_base) --defsym=OSV_KERNEL_VM_SHIFT=$(kernel_vm_shift) \
                -Bdynamic --export-dynamic --eh-frame-hdr --enable-new-dtags \
            $(^:%.ld=-T %.ld) \
            --whole-archive \
diff --git a/arch/x64/arch-setup.cc b/arch/x64/arch-setup.cc
--- a/arch/x64/arch-setup.cc
+++ b/arch/x64/arch-setup.cc
@@ -85,12 +85,15 @@ extern boot_time_chart boot_time;
 // it by placing address of start32 at the known offset at memory
 // as defined by section .start32_address in loader.ld
 extern "C" void start32();
-void * __attribute__((section (".start32_address"))) start32_address = reinterpret_cast<void*>(&start32);
+void * __attribute__((section (".start32_address"))) start32_address =
+  reinterpret_cast<void*>((long)&start32 - OSV_KERNEL_VM_SHIFT);

 void arch_setup_free_memory()
 {
-    static ulong edata;
+    static ulong edata, edata_phys;
     asm ("movl $.edata, %0" : "=rm"(edata));
+    edata_phys = edata - OSV_KERNEL_VM_SHIFT;
+
     // copy to stack so we don't free it now
     auto omb = *osv_multiboot_info;
     auto mb = omb.mb;
@@ -129,13 +132,13 @@ void arch_setup_free_memory()
     // page tables have been set up, so we can't reference the memory being
     // freed.
     for_each_e820_entry(e820_buffer, e820_size, [] (e820ent ent) {
-        // can't free anything below edata, it's core code.
+        // can't free anything below edata_phys, it's core code.
         // can't free anything below kernel at this moment
-        if (ent.addr + ent.size <= edata) {
+        if (ent.addr + ent.size <= edata_phys) {
             return;
         }
-        if (intersects(ent, edata)) {
-            ent = truncate_below(ent, edata);
+        if (intersects(ent, edata_phys)) {
+            ent = truncate_below(ent, edata_phys);
         }
         // ignore anything above 1GB, we haven't mapped it yet
         if (intersects(ent, initial_map)) {
@@ -149,21 +152,27 @@ void arch_setup_free_memory()
         auto base = reinterpret_cast<void*>(get_mem_area_base(area));
         mmu::linear_map(base, 0, initial_map, initial_map);
     }
-    // map the core, loaded 1:1 by the boot loader
-    mmu::phys elf_phys = reinterpret_cast<mmu::phys>(elf_header);
-    elf_start = reinterpret_cast<void*>(elf_header);
-    elf_size = edata - elf_phys;
-    mmu::linear_map(elf_start, elf_phys, elf_size, OSV_KERNEL_BASE);
+    // Map the core, loaded by the boot loader
+    // In order to properly setup mapping between virtual
+    // and physical we need to take into account where kernel
+    // is loaded in physical memory - elf_phys_start - and
+    // where it is linked to start in virtual memory - elf_start
+ static mmu::phys elf_phys_start = reinterpret_cast<mmu::phys>(elf_header);
+    // There is simple invariant between elf_phys_start and elf_start
+    // as expressed by the assignment below
+ elf_start = reinterpret_cast<void*>(elf_phys_start + OSV_KERNEL_VM_SHIFT);
+    elf_size = edata_phys - elf_phys_start;
+    mmu::linear_map(elf_start, elf_phys_start, elf_size, OSV_KERNEL_BASE);
     // get rid of the command line, before low memory is unmapped
     parse_cmdline(mb);
     // now that we have some free memory, we can start mapping the rest
     mmu::switch_to_runtime_page_tables();
     for_each_e820_entry(e820_buffer, e820_size, [] (e820ent ent) {
         //
-        // Free the memory below elf_start which we could not before
-        if (ent.addr < (u64)elf_start) {
-            if (ent.addr + ent.size >= (u64)elf_start) {
-                ent = truncate_above(ent, (u64) elf_start);
+        // Free the memory below elf_phys_start which we could not before
+        if (ent.addr < (u64)elf_phys_start) {
+            if (ent.addr + ent.size >= (u64)elf_phys_start) {
+                ent = truncate_above(ent, (u64) elf_phys_start);
             }
             mmu::free_initial_memory_range(ent.addr, ent.size);
             return;
diff --git a/arch/x64/boot.S b/arch/x64/boot.S
--- a/arch/x64/boot.S
+++ b/arch/x64/boot.S
@@ -24,13 +24,28 @@
 .align 4096
 .global ident_pt_l4
 ident_pt_l4:
-    .quad ident_pt_l3 + 0x67
+ # The addresses of the paging tables have to be the physical ones, so we have to
+    # manually subtract OSV_KERNEL_VM_SHIFT in all relevant places
+    .quad ident_pt_l3 + 0x67 - OSV_KERNEL_VM_SHIFT
     .rept 511
     .quad 0
     .endr
+#if OSV_KERNEL_VM_SHIFT != 0x40000000 && OSV_KERNEL_VM_SHIFT != 0
+#error This code only works correctly for OSV_KERNEL_VM_SHIFT = 0x40000000 or 0
+#endif
 ident_pt_l3:
-    .quad ident_pt_l2 + 0x67
-    .rept 511
+    # Each of the 512 entries in this table maps the very 1st 512 GiB of
+    # virtual address space 1 GiB at a time
+    # The very 1st entry maps 1st GiB 1:1 by pointing to ident_pt_l2 table
+ # that specifies addresses of every one of 512 2MiB slots of physical memory
+    .quad ident_pt_l2 + 0x67 - OSV_KERNEL_VM_SHIFT
+ # The 2nd entry maps 2nd GiB to the same 1st GiB of physical memory by pointing
+    # to the same ident_pt_l2 table as the 1st entry above
+    # This way we effectively provide correct mapping for the kernel linked
+    # to start at 1 GiB + 2 MiB (0x40200000) in virtual memory and point to
+    # 2 MiB address (0x200000) where it starts in physical memory
+    .quad ident_pt_l2 + 0x67 - OSV_KERNEL_VM_SHIFT
+    .rept 510
     .quad 0
     .endr
 ident_pt_l2:
@@ -42,7 +57,8 @@ ident_pt_l2:

 gdt_desc:
     .short gdt_end - gdt - 1
-    .long gdt
+ # subtract OSV_KERNEL_VM_SHIFT because when gdt_desc is referenced, the memory is mapped 1:1
+    .long gdt - OSV_KERNEL_VM_SHIFT

 # Set up the 64-bit compatible version of GDT description structure
 # that points to the same GDT (Global segments Descriptors Table) and
@@ -53,7 +69,8 @@ gdt_desc:
 .align 8
 gdt64_desc:
     .short gdt_end - gdt - 1
-    .quad gdt
+ # subtract OSV_KERNEL_VM_SHIFT because when gdt64_desc is referenced, the memory is mapped 1:1
+    .quad gdt - OSV_KERNEL_VM_SHIFT

 .align 8
 gdt = . - 8
@@ -77,10 +94,12 @@ init_stack_top = .
 .globl start32
 .globl start32_from_64
 start32:
+    # Because the memory is mapped 1:1 at this point, we have to manualy
+ # subtract OSV_KERNEL_VM_SHIFT from virtual addresses in all relevant places
     # boot16.S set %eax to ELF start address, we'll use it later
     mov %eax, %ebp
     mov $0x0, %edi
-    lgdt gdt_desc
+    lgdt gdt_desc-OSV_KERNEL_VM_SHIFT

 # Add an address the vmlinux_entry64 will jump to when
 # switching from 64-bit to 32-bit mode
@@ -91,7 +110,7 @@ start32_from_64:
     mov %eax, %fs
     mov %eax, %gs
     mov %eax, %ss
-    ljmp $0x18, $1f
+    ljmp $0x18, $1f-OSV_KERNEL_VM_SHIFT
 1:
     and $~7, %esp
     # Enable PAE (Physical Address Extension) - ability to address 64GB
@@ -101,6 +120,9 @@ start32_from_64:

     # Set root of a page table in cr3
     lea ident_pt_l4, %eax
+    # The address of the root paging table has to be physical
+    # so substract OSV_KERNEL_VM_SHIFT from ident_pt_l4
+    sub $OSV_KERNEL_VM_SHIFT, %eax
     mov %eax, %cr3

     # Set long mode
@@ -128,7 +150,7 @@ start64:
     jz start64_continue
     call extract_linux_boot_params
     mov $0x1000, %rbx
-    mov $0x200000, %rbp
+    mov $OSV_KERNEL_BASE, %rbp

 start64_continue:
     lea .bss, %rdi
@@ -168,6 +190,7 @@ smpboot:
     mov smpboot_cr4-smpboot, %eax
     mov %eax, %cr4
     lea ident_pt_l4, %eax
+    sub $OSV_KERNEL_VM_SHIFT, %eax
     mov %eax, %cr3
     mov smpboot_efer-smpboot, %eax
     mov smpboot_efer+4-smpboot, %edx
@@ -181,7 +204,7 @@ smpboot:

 smpboot_gdt_desc:
     .short gdt_end - gdt - 1
-    .long gdt
+    .long gdt - OSV_KERNEL_VM_SHIFT
 .global smpboot_cr0
 smpboot_cr0:
     .long 0
diff --git a/arch/x64/entry-xen.S b/arch/x64/entry-xen.S
--- a/arch/x64/entry-xen.S
+++ b/arch/x64/entry-xen.S
@@ -23,7 +23,7 @@

 elfnote_val(XEN_ELFNOTE_ENTRY, xen_start)
 elfnote_val(XEN_ELFNOTE_HYPERCALL_PAGE, hypercall_page)
-elfnote_val(XEN_ELFNOTE_VIRT_BASE, 0)
+elfnote_val(XEN_ELFNOTE_VIRT_BASE, OSV_KERNEL_VM_SHIFT)
 elfnote_str(XEN_ELFNOTE_XEN_VERSION, "xen-3.0")
 elfnote_str(XEN_ELFNOTE_GUEST_OS, "osv")
 elfnote_str(XEN_ELFNOTE_GUEST_VERSION, "?.?")
@@ -50,4 +50,5 @@ xen_start:
     mov %rsp, xen_bootstrap_end
     mov %rsi, %rdi
     call xen_init
+    mov $0x0, %rdi
     jmp start64
diff --git a/arch/x64/loader.ld b/arch/x64/loader.ld
--- a/arch/x64/loader.ld
+++ b/arch/x64/loader.ld
@@ -14,77 +14,79 @@ SECTIONS
         *
         * We can't export the ELF header base as a symbol, because ld
         * insists on moving stuff around if we do.
-        *
+        */
+    . = OSV_KERNEL_VM_BASE + 0x800;
+       /*
         * Place address of start32 routine at predefined offset in memory
         */
-    . = OSV_KERNEL_BASE + 0x800;
-    .start32_address : {
+    .start32_address : AT(ADDR(.start32_address) - OSV_KERNEL_VM_SHIFT) {
         *(.start32_address)
     }
-    . = OSV_KERNEL_BASE + 0x1000;
-    .dynamic : { *(.dynamic) } :dynamic :text
-    .text : {
+    . = OSV_KERNEL_VM_BASE + 0x1000;
+ .dynamic : AT(ADDR(.dynamic) - OSV_KERNEL_VM_SHIFT) { *(.dynamic) } :dynamic :text
+    .text : AT(ADDR(.text) - OSV_KERNEL_VM_SHIFT) {
         text_start = .;
         *(.text.hot .text.hot.*)
         *(.text.unlikely .text.*_unlikely)
         *(.text.fixup)
         *(.text.startup .text.startup.*)
         *(.text .text.*)
         text_end = .;
+ PROVIDE(low_vmlinux_entry64 = vmlinux_entry64 - OSV_KERNEL_VM_SHIFT);
     } :text
     . = ALIGN(8);
-    .fixup : {
+    .fixup : AT(ADDR(.fixup) - OSV_KERNEL_VM_SHIFT) {
         fault_fixup_start = .;
         *(.fixup)
         fault_fixup_end = .;
     } :text

     . = ALIGN(8);
-    .memcpy_decode : {
+    .memcpy_decode : AT(ADDR(.memcpy_decode) - OSV_KERNEL_VM_SHIFT) {
         memcpy_decode_start = .;
         *(.memcpy_decode)
         memcpy_decode_end = .;
     } :text

-    .eh_frame : { *(.eh_frame) } : text
-    .rodata : { *(.rodata*) } :text
-    .eh_frame : { *(.eh_frame) } :text
-    .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame
-    .note : { *(.note*) } :text :note
- .gcc_except_table : { *(.gcc_except_table) *(.gcc_except_table.*) } : text
-    .tracepoint_patch_sites ALIGN(8) : {
+ .eh_frame : AT(ADDR(.eh_frame) - OSV_KERNEL_VM_SHIFT) { *(.eh_frame) } : text
+    .rodata : AT(ADDR(.rodata) - OSV_KERNEL_VM_SHIFT) { *(.rodata*) } :text
+ .eh_frame : AT(ADDR(.eh_frame) - OSV_KERNEL_VM_SHIFT) { *(.eh_frame) } :text + .eh_frame_hdr : AT(ADDR(.eh_frame_hdr) - OSV_KERNEL_VM_SHIFT) { *(.eh_frame_hdr) } :text :eh_frame
+    .note : AT(ADDR(.note) - OSV_KERNEL_VM_SHIFT) { *(.note*) } :text :note
+ .gcc_except_table : AT(ADDR(.gcc_except_table) - OSV_KERNEL_VM_SHIFT) { *(.gcc_except_table) *(.gcc_except_table.*) } : text + .tracepoint_patch_sites ALIGN(8) : AT(ADDR(.tracepoint_patch_sites) - OSV_KERNEL_VM_SHIFT) {
         __tracepoint_patch_sites_start = .;
         *(.tracepoint_patch_sites)
         __tracepoint_patch_sites_end = .;
     } : text
- .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) } : text
-    .data : { *(.data) } :text
+ .data.rel.ro : AT(ADDR(.data.rel.ro) - OSV_KERNEL_VM_SHIFT) { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) } : text
+    .data : AT(ADDR(.data) - OSV_KERNEL_VM_SHIFT) { *(.data) } :text
     _init_array_start = .;
-    .init_array : {
+    .init_array : AT(ADDR(.init_array) - OSV_KERNEL_VM_SHIFT) {
*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*))
         *(.init_array .ctors)
     } : text
     _init_array_end = .;
     . = ALIGN(4096);
-    .percpu : {
+    .percpu : AT(ADDR(.percpu) - OSV_KERNEL_VM_SHIFT) {
         _percpu_start = .;
         *(.percpu)
         . = ALIGN(4096);
         _percpu_end = .;
     }
-    .percpu_workers : {
+    .percpu_workers : AT(ADDR(.percpu_workers) - OSV_KERNEL_VM_SHIFT) {
         _percpu_workers_start = .;
         *(.percpu_workers)
         _percpu_workers_end = .;
     }
     . = ALIGN(64);
-    .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) } :tls :text
-    .tbss : {
+ .tdata : AT(ADDR(.tdata) - OSV_KERNEL_VM_SHIFT) { *(.tdata .tdata.* .gnu.linkonce.td.*) } :tls :text
+    .tbss : AT(ADDR(.tbss) - OSV_KERNEL_VM_SHIFT) {
         *(.tbss .tbss.* .gnu.linkonce.tb.*)
         . = ALIGN(64);
     } :tls :text
     .tls_template_size = SIZEOF(.tdata) + SIZEOF(.tbss);
-    .bss : { *(.bss .bss.*) } :text
+    .bss : AT(ADDR(.bss) - OSV_KERNEL_VM_SHIFT) { *(.bss .bss.*) } :text
     . = ALIGN(64);
     tcb0 = .;
     . = . + .tls_template_size + 256;
@@ -114,4 +116,4 @@ PHDRS {
        eh_frame PT_GNU_EH_FRAME;
        note PT_NOTE;
 }
-ENTRY(vmlinux_entry64);
+ENTRY(low_vmlinux_entry64);
diff --git a/arch/x64/vmlinux-boot64.S b/arch/x64/vmlinux-boot64.S
--- a/arch/x64/vmlinux-boot64.S
+++ b/arch/x64/vmlinux-boot64.S
@@ -13,7 +13,9 @@ vmlinux_entry64:
     mov %rsi, %rdi

     # Load the 64-bit version of the GDT
-    lgdt gdt64_desc
+    # Because the memory is mapped 1:1 at this point, we have to manualy
+    # subtract OSV_KERNEL_VM_SHIFT from the gdt address
+    lgdt gdt64_desc-OSV_KERNEL_VM_SHIFT

     # Setup the stack to switch back to 32-bit mode in order
# to converge with the code that sets up transiton to 64-bit mode later.
@@ -32,6 +34,6 @@ vmlinux_entry64:
     # to start32_from_64 which is where the boot process converges.
     subq $8, %rsp
     movl $0x18, 4(%rsp)
-    movl $start32_from_64, %eax
+ movl $start32_from_64-OSV_KERNEL_VM_SHIFT, %eax # Because memory is mapped 1:1 subtract OSV_KERNEL_VM_SHIFT
     movl %eax, (%rsp)
     lret
diff --git a/arch/x64/xen.cc b/arch/x64/xen.cc
--- a/arch/x64/xen.cc
+++ b/arch/x64/xen.cc
@@ -172,7 +172,7 @@ void xen_init(processor::features_type &features, unsigned base)
     // Base + 1 would have given us the version number, it is mostly
     // uninteresting for us now
     auto x = processor::cpuid(base + 2);
-    processor::wrmsr(x.b, cast_pointer(&hypercall_page));
+ processor::wrmsr(x.b, cast_pointer(&hypercall_page) - OSV_KERNEL_VM_SHIFT);

     struct xen_feature_info info;
     // To fill up the array used by C code
@@ -192,7 +192,7 @@ void xen_init(processor::features_type &features, unsigned base)
     map.domid = DOMID_SELF;
     map.idx = 0;
     map.space = 0;
-    map.gpfn = cast_pointer(&xen_shared_info) >> 12;
+ map.gpfn = (cast_pointer(&xen_shared_info) - OSV_KERNEL_VM_SHIFT) >> 12;

     // 7 => add to physmap
     if (memory_hypercall(XENMEM_add_to_physmap, &map))
diff --git a/core/elf.cc b/core/elf.cc
--- a/core/elf.cc
+++ b/core/elf.cc
@@ -1099,7 +1099,7 @@ void create_main_program()
 program::program(void* addr)
     : _next_alloc(addr)
 {
-    _core = std::make_shared<memory_image>(*this, (void*)ELF_IMAGE_START);
+ _core = std::make_shared<memory_image>(*this, (void*)(ELF_IMAGE_START + OSV_KERNEL_VM_SHIFT));
     assert(_core->module_index() == core_module_index);
     _core->load_segments();
     set_search_path({"/", "/usr/lib"});
diff --git a/core/mmu.cc b/core/mmu.cc
--- a/core/mmu.cc
+++ b/core/mmu.cc
@@ -91,12 +91,12 @@ phys pte_level_mask(unsigned level)
     return ~((phys(1) << shift) - 1);
 }

+static void *elf_phys_start = (void*)OSV_KERNEL_BASE;
 void* phys_to_virt(phys pa)
 {
-    // The ELF is mapped 1:1
     void* phys_addr = reinterpret_cast<void*>(pa);
-    if ((phys_addr >= elf_start) && (phys_addr < elf_start + elf_size)) {
-        return phys_addr;
+ if ((phys_addr >= elf_phys_start) && (phys_addr < elf_phys_start + elf_size)) {
+        return (void*)(phys_addr + OSV_KERNEL_VM_SHIFT);
     }

     return phys_mem + pa;
@@ -106,9 +106,8 @@ phys virt_to_phys_pt(void* virt);

 phys virt_to_phys(void *virt)
 {
-    // The ELF is mapped 1:1
     if ((virt >= elf_start) && (virt < elf_start + elf_size)) {
-        return reinterpret_cast<phys>(virt);
+        return reinterpret_cast<phys>((void*)(virt - OSV_KERNEL_VM_SHIFT));
     }

 #if CONF_debug_memory
diff --git a/loader.cc b/loader.cc
--- a/loader.cc
+++ b/loader.cc
@@ -102,7 +102,8 @@ void premain()

     arch_init_premain();

-    auto inittab = elf::get_init(elf_header);
+    auto inittab = elf::get_init(reinterpret_cast<elf::Elf64_Ehdr*>(
+        (void*)elf_header + OSV_KERNEL_VM_SHIFT));

     if (inittab.tls.start == nullptr) {
         debug_early("premain: failed to get TLS data from ELF\n");

--
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/0000000000005307ed058bfa02a7%40google.com.
For more options, visit https://groups.google.com/d/optout.

Reply via email to