This is the kvm-userland patch to use after applying the reserved-ram
patch to the host kernel. Bios must be rebuilt after applying the
patch, to do that just 'make bios'.
Then it's enough to pass '-reserved-ram' on the command line.
4997 ? Sl 2:56 3515 1544 4677235 1697028 47.9
/home/andrea/bin/x86_64/kvm/bin/qemu-system-x86_64 -hda tmp/virt
5002 ? Sl 3:23 4728 1544 4677235 1600980 45.2
/home/andrea/bin/x86_64/kvm/bin/qemu-system-x86_64 -hda tmp/virt
5008 ? Sl 2:39 239 1544 892127 15496 0.4
/home/andrea/bin/x86_64/kvm/bin/qemu-system-x86_64 -hda tmp/virtual
total used free shared buffers cached
Mem: 3540492 3525108 15384 0 1892 51896
-/+ buffers/cache: 3471320 69172
Swap: 5863684 3014072 2849612
eth0: no IPv6 routers present
loaded kvm module (kvm-70-399-g275f337)
apic write: bad size=1 fee00030
Ignoring de-assert INIT to vcpu 0
apic write: bad size=1 fee00030
Ignoring de-assert INIT to vcpu 0
Ignoring de-assert INIT to vcpu 0
Ignoring de-assert INIT to vcpu 0
kvm: emulating exchange as write
apic write: bad size=1 fee00030
Ignoring de-assert INIT to vcpu 0
Ignoring de-assert INIT to vcpu 0
You can see above 3 KVM guests, last one with -reserved-ram -m 512,
the first two with -m 3000. Host kernel has both mmu-notifier v18 and
-reserved-ram patch applied. KVM kernel has the pfn-mmio patch applied
plus my fix to export the reserved RAM through vma->fault, and the kvm
mmu notifier support for reliable and efficient swapping. All 3 guests
seems to work great together while system is 3G into swap. The
reserved-ram guest is almost responsive as if there would be no swap
of course (only the userland bits need to be paged in but all the
virtual ram remains in ram).
You can also see the RSS of the -reserved-ram task is only 15M which
is about the footprint of kvm userland (part of which are shared libs,
so it's actually much less).
Signed-off-by: Andrea Arcangeli <[EMAIL PROTECTED]>
diff --git a/bios/rombios.c b/bios/rombios.c
index 318de57..f93a6c6 100644
--- a/bios/rombios.c
+++ b/bios/rombios.c
@@ -4251,6 +4251,7 @@ int15_function32(regs, ES, DS, FLAGS)
Bit32u extra_lowbits_memory_size=0;
Bit16u CX,DX;
Bit8u extra_highbits_memory_size=0;
+ Bit32u below_640_end;
BX_DEBUG_INT15("int15 AX=%04x\n",regs.u.r16.ax);
@@ -4305,6 +4306,11 @@ ASM_END
case 0x20: // coded by osmaker aka K.J.
if(regs.u.r32.edx == 0x534D4150)
{
+ below_640_end = inb_cmos(0x16);
+ below_640_end <<= 8;
+ below_640_end |= inb_cmos(0x15);
+ below_640_end *= 1024;
+
extended_memory_size = inb_cmos(0x35);
extended_memory_size <<= 8;
extended_memory_size |= inb_cmos(0x34);
@@ -4334,7 +4340,7 @@ ASM_END
{
case 0:
set_e820_range(ES, regs.u.r16.di,
- 0x0000000L, 0x0009fc00L, 0, 0, 1);
+ 0x0000000L, below_640_end, 0, 0, 1);
regs.u.r32.ebx = 1;
regs.u.r32.eax = 0x534D4150;
regs.u.r32.ecx = 0x14;
@@ -4343,7 +4349,7 @@ ASM_END
break;
case 1:
set_e820_range(ES, regs.u.r16.di,
- 0x0009fc00L, 0x000a0000L, 0, 0, 2);
+ below_640_end, 0x000a0000L, 0, 0, 2);
regs.u.r32.ebx = 2;
regs.u.r32.eax = 0x534D4150;
regs.u.r32.ecx = 0x14;
diff --git a/qemu/hw/pc.c b/qemu/hw/pc.c
index 42c2687..c6a21d5 100644
--- a/qemu/hw/pc.c
+++ b/qemu/hw/pc.c
@@ -235,6 +235,8 @@ static void cmos_init(ram_addr_t ram_size, ram_addr_t
above_4g_mem_size,
/* memory size */
val = 640; /* base memory in K */
+ if (reserved_ram)
+ val = reserved[1] / 1024;
rtc_set_memory(s, 0x15, val);
rtc_set_memory(s, 0x16, val >> 8);
diff --git a/qemu/pc-bios/bios.bin b/qemu/pc-bios/bios.bin
index 3e5d96a..c9c94e6 100644
Binary files a/qemu/pc-bios/bios.bin and b/qemu/pc-bios/bios.bin differ
diff --git a/qemu/sysemu.h b/qemu/sysemu.h
index 97d73e9..964fee4 100644
--- a/qemu/sysemu.h
+++ b/qemu/sysemu.h
@@ -102,6 +102,8 @@ extern int autostart;
extern int old_param;
extern int hpagesize;
extern const char *bootp_filename;
+extern int reserved_ram;
+extern int64_t reserved[4];
#ifdef USE_KQEMU
diff --git a/qemu/vl.c b/qemu/vl.c
index f573dce..3ce2f2a 100644
--- a/qemu/vl.c
+++ b/qemu/vl.c
@@ -235,6 +235,8 @@ int time_drift_fix = 0;
unsigned int kvm_shadow_memory = 0;
const char *mem_path = NULL;
int hpagesize = 0;
+int reserved_ram = 0;
+int64_t reserved[4];
const char *cpu_vendor_string;
#ifdef TARGET_ARM
int old_param = 0;
@@ -7817,6 +7819,7 @@ static void help(int exitcode)
"-clock force the use of the given methods for timer
alarm.\n"
" To see what timers are available use -clock ?\n"
"-startdate select initial date of the clock\n"
+ "-reserved-ram use reserved RAM in /proc/iomem with spte identity
mapping\n"
"\n"
"During emulation, the following keys are useful:\n"
"ctrl-alt-f toggle full screen\n"
@@ -7932,6 +7935,7 @@ enum {
QEMU_OPTION_tdf,
QEMU_OPTION_kvm_shadow_memory,
QEMU_OPTION_mempath,
+ QEMU_OPTION_reserved_ram,
};
typedef struct QEMUOption {
@@ -8059,6 +8063,7 @@ const QEMUOption qemu_options[] = {
{ "startdate", HAS_ARG, QEMU_OPTION_startdate },
{ "tb-size", HAS_ARG, QEMU_OPTION_tb_size },
{ "mem-path", HAS_ARG, QEMU_OPTION_mempath },
+ { "reserved-ram", 0, QEMU_OPTION_reserved_ram },
{ NULL },
};
@@ -8276,6 +8281,77 @@ static int gethugepagesize(void)
return hugepagesize;
}
+static int find_reserved_ram(int64_t *_start, int64_t *_end,
+ unsigned long below, unsigned long above,
+ unsigned long min_size)
+{
+ int ret, fd;
+ char buf[4096];
+ char *needle = "reserved RAM\n";
+ char *size, *curr;
+ int64_t start, end;
+
+ fd = open("/proc/iomem", O_RDONLY);
+ if (fd < 0) {
+ perror("open");
+ exit(0);
+ }
+
+ ret = read(fd, buf, sizeof(buf)-1);
+ if (ret < 0) {
+ perror("read");
+ exit(0);
+ }
+ buf[ret] = 0;
+
+ size = buf;
+ while (1) {
+ size = strstr(size, needle);
+ if (!size)
+ return 0;
+ size += strlen(needle);
+ curr = size - strlen(needle) - 20;
+ start = strtoll(curr, &curr, 16);
+ end = strtoll(curr+1, NULL, 16);
+ if ((!above || start >= above) && (!below || end <= below) &&
+ (!min_size || end-start >= min_size)) {
+ *_start = start;
+ *_end = end+1;
+ return 1;
+ }
+ }
+}
+
+static void init_reserved_ram(void)
+{
+ if (find_reserved_ram(&reserved[0], &reserved[1],
+ 640*1024, 0, 500*1024) &&
+ find_reserved_ram(&reserved[2], &reserved[3],
+ 0, 1024*1024, 1024*1024)) {
+ reserved_ram = 1;
+ if (reserved[0] != 4096) {
+ fprintf(stderr,
+ "strange host ram layout\n");
+ exit(1);
+ }
+ if (reserved[2] != 1024*1024) {
+ fprintf(stderr,
+ "strange host ram layout\n");
+ exit(1);
+ }
+ if (reserved[3] < ram_size) {
+ fprintf(stderr,
+ "not enough host reserved ram, decrease -m\n");
+ exit(1);
+ }
+ reserved[1] &= TARGET_PAGE_MASK;
+ } else {
+ fprintf(stderr,
+ "host reserved ram not found\n");
+ exit(1);
+ }
+}
+
void *alloc_mem_area(unsigned long memory, const char *path)
{
char *filename;
@@ -8322,10 +8398,43 @@ void *qemu_alloc_physram(unsigned long memory)
{
void *area = NULL;
- if (mem_path)
+ if (!area && mem_path)
area = alloc_mem_area(memory, mem_path);
- if (!area)
+ if (!area) {
area = qemu_vmalloc(memory);
+ if (reserved_ram) {
+ int fd;
+ if (memory < reserved[2]) {
+ printf("memory < reserved[2]\n");
+ return NULL;
+ }
+ fd = open("/dev/mem", O_RDWR);
+ if (fd < 0) {
+ perror("reserved_ram requires access to /dev/mem");
+ return NULL;
+ }
+ if (mmap((char *)area+reserved[0],
+ reserved[1]-reserved[0],
+ PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FIXED,
+ fd, 0) == MAP_FAILED) {
+ perror("reserved_ram mmap failed on /dev/mem");
+ return NULL;
+ }
+ bzero((char *)area+reserved[0], reserved[1]-reserved[0]);
+ if (mmap((char *)area+reserved[2],
+ ram_size-reserved[2],
+ PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FIXED,
+ fd, reserved[2]) == MAP_FAILED) {
+ perror("reserved_ram mmap failed on /dev/mem");
+ return NULL;
+ }
+ bzero((char *)area+reserved[2], ram_size-reserved[2]);
+ if (close(fd) < 0) {
+ perror("/dev/mem");
+ return NULL;
+ }
+ }
+ }
return area;
}
@@ -8962,6 +9071,9 @@ int main(int argc, char **argv)
case QEMU_OPTION_mempath:
mem_path = optarg;
break;
+ case QEMU_OPTION_reserved_ram:
+ init_reserved_ram();
+ break;
case QEMU_OPTION_name:
qemu_name = optarg;
break;
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html