[PATCH] ram: add support for dirty page tracking

2020-07-06 Thread Bingsong Si
In production, the VM with insentive memory activity maybe failed to migrate,
because of the change of memory in the VM greater than the throughtput of the
network interface, and we want to identify it before migration.

1. dirty tracking start:
virsh qemu-monitor-command  --hmp dirty_track

2. wait some time, stop dirty tracking:
virsh qemu-monitor-command  --hmp dirty_track_stop
Dirty rate: 607 pages/s

Signed-off-by: Bingsong Si 
---
 hmp-commands.hx   | 26 +++
 include/monitor/hmp.h |  2 ++
 migration/migration.c |  5 +++
 migration/ram.c   | 65 
 migration/ram.h   |  5 +++
 migration/savevm.c| 77 +++
 migration/savevm.h|  2 ++
 7 files changed, 182 insertions(+)

diff --git a/hmp-commands.hx b/hmp-commands.hx
index 60f395c276..05a688286b 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1819,6 +1819,32 @@ SRST
   Set QOM property *property* of object at location *path* to value *value*
 ERST
 
+{
+.name   = "dirty_track",
+.args_type  = "",
+.params = "",
+.help   = "track dirty pages rate",
+.cmd= hmp_dirty_track,
+},
+
+SRST
+``dirty_track``
+  Track dirty pages rate.
+ERST
+
+{
+.name   = "dirty_track_stop",
+.args_type  = "",
+.params = "",
+.help   = "stop current dirty pages track",
+.cmd= hmp_dirty_track_stop,
+},
+
+SRST
+``dirty_track_stop``
+  Stop current dirty pages track.
+ERST
+
 {
 .name   = "info",
 .args_type  = "item:s?",
diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
index c986cfd28b..c139fe8758 100644
--- a/include/monitor/hmp.h
+++ b/include/monitor/hmp.h
@@ -130,5 +130,7 @@ void hmp_hotpluggable_cpus(Monitor *mon, const QDict 
*qdict);
 void hmp_info_vm_generation_id(Monitor *mon, const QDict *qdict);
 void hmp_info_memory_size_summary(Monitor *mon, const QDict *qdict);
 void hmp_info_sev(Monitor *mon, const QDict *qdict);
+void hmp_dirty_track(Monitor *mon, const QDict *qdict);
+void hmp_dirty_track_stop(Monitor *mon, const QDict *qdict);
 
 #endif
diff --git a/migration/migration.c b/migration/migration.c
index 481a590f72..5550afafe6 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1964,6 +1964,11 @@ static bool migrate_prepare(MigrationState *s, bool blk, 
bool blk_inc,
 {
 Error *local_err = NULL;
 
+if (dirty_track_is_running()) {
+error_setg(errp, "There is a dirty tracking process in progress");
+return false;
+}
+
 if (resume) {
 if (s->state != MIGRATION_STATUS_POSTCOPY_PAUSED) {
 error_setg(errp, "Cannot resume if there is no "
diff --git a/migration/ram.c b/migration/ram.c
index 5554a7d2d8..64c50b31cc 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -3782,6 +3782,71 @@ static int ram_resume_prepare(MigrationState *s, void 
*opaque)
 return 0;
 }
 
+void dirty_track_init(void)
+{
+RAMBlock *block;
+
+if (ram_bytes_total()) {
+RAMBLOCK_FOREACH_NOT_IGNORED(block) {
+unsigned long pages = block->max_length >> TARGET_PAGE_BITS;
+
+block->bmap = bitmap_new(pages);
+bitmap_set(block->bmap, 0, pages);
+}
+}
+ram_state = g_new0(RAMState, 1);
+ram_state->migration_dirty_pages = 0;
+memory_global_dirty_log_start();
+}
+
+uint64_t dirty_track_dirty_pages(void)
+{
+return ram_state->migration_dirty_pages;
+}
+
+void dirty_track_sync(void)
+{
+RAMBlock *block = NULL;
+unsigned long offset = 0;
+
+memory_global_dirty_log_sync();
+rcu_read_lock();
+RAMBLOCK_FOREACH_NOT_IGNORED(block) {
+   ramblock_sync_dirty_bitmap(ram_state, block);
+}
+rcu_read_unlock();
+
+rcu_read_lock();
+block = QLIST_FIRST_RCU(_list.blocks);
+
+while (block) {
+offset = migration_bitmap_find_dirty(ram_state, block, offset);
+
+if (offset << TARGET_PAGE_BITS >= block->used_length) {
+offset = 0;
+block = QLIST_NEXT_RCU(block, next);
+} else {
+test_and_clear_bit(offset, block->bmap);
+}
+}
+
+rcu_read_unlock();
+}
+
+void dirty_track_cleanup(void)
+{
+RAMBlock *block;
+
+memory_global_dirty_log_stop();
+RAMBLOCK_FOREACH_NOT_IGNORED(block) {
+g_free(block->bmap);
+block->bmap = NULL;
+}
+
+g_free(ram_state);
+ram_state = NULL;
+}
+
 static SaveVMHandlers savevm_ram_handlers = {
 .save_setup = ram_save_setup,
 .save_live_iterate = ram_save_iterate,
diff --git a/migration/ram.h b/migration/ram.h
index 2eeaacfa13..104c48285c 100644
--- a/migration/ram.h
+++ b/migration/ram.h
@@ -69,4 +69,9 @@ void colo_flush_ram_cache(void);
 void colo_release_ram

[PATCH] ram: add support for dirty page tracking

2020-07-03 Thread Bingsong Si
In production, the VM with insentive memory activity maybe failed to migrate,
because of the change of memory in the VM greater than the throughtput of the
network interface, and we want to identify it before migration.

1. dirty tracking start:
virsh qemu-monitor-command  --hmp dirty_track

2. wait some time, stop dirty tracking:
virsh qemu-monitor-command  --hmp dirty_track_stop
Dirty rate: 607 pages/s

Signed-off-by: Bingsong Si 
---
 hmp-commands.hx   | 26 ++
 include/monitor/hmp.h |  2 ++
 migration/migration.c |  5 +++
 migration/ram.c   | 65 +
 migration/ram.h   |  5 +++
 migration/savevm.c| 83 +++
 migration/savevm.h|  2 ++
 7 files changed, 188 insertions(+)

diff --git a/hmp-commands.hx b/hmp-commands.hx
index 60f395c276..05a688286b 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1819,6 +1819,32 @@ SRST
   Set QOM property *property* of object at location *path* to value *value*
 ERST
 
+{
+.name   = "dirty_track",
+.args_type  = "",
+.params = "",
+.help   = "track dirty pages rate",
+.cmd= hmp_dirty_track,
+},
+
+SRST
+``dirty_track``
+  Track dirty pages rate.
+ERST
+
+{
+.name   = "dirty_track_stop",
+.args_type  = "",
+.params = "",
+.help   = "stop current dirty pages track",
+.cmd= hmp_dirty_track_stop,
+},
+
+SRST
+``dirty_track_stop``
+  Stop current dirty pages track.
+ERST
+
 {
 .name   = "info",
 .args_type  = "item:s?",
diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
index c986cfd28b..c139fe8758 100644
--- a/include/monitor/hmp.h
+++ b/include/monitor/hmp.h
@@ -130,5 +130,7 @@ void hmp_hotpluggable_cpus(Monitor *mon, const QDict 
*qdict);
 void hmp_info_vm_generation_id(Monitor *mon, const QDict *qdict);
 void hmp_info_memory_size_summary(Monitor *mon, const QDict *qdict);
 void hmp_info_sev(Monitor *mon, const QDict *qdict);
+void hmp_dirty_track(Monitor *mon, const QDict *qdict);
+void hmp_dirty_track_stop(Monitor *mon, const QDict *qdict);
 
 #endif
diff --git a/migration/migration.c b/migration/migration.c
index 481a590f72..5550afafe6 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1964,6 +1964,11 @@ static bool migrate_prepare(MigrationState *s, bool blk, 
bool blk_inc,
 {
 Error *local_err = NULL;
 
+if (dirty_track_is_running()) {
+error_setg(errp, "There is a dirty tracking process in progress");
+return false;
+}
+
 if (resume) {
 if (s->state != MIGRATION_STATUS_POSTCOPY_PAUSED) {
 error_setg(errp, "Cannot resume if there is no "
diff --git a/migration/ram.c b/migration/ram.c
index 069b6e30bc..03a5e44617 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -3780,6 +3780,71 @@ static int ram_resume_prepare(MigrationState *s, void 
*opaque)
 return 0;
 }
 
+void dirty_track_init(void)
+{
+RAMBlock *block;
+
+if (ram_bytes_total()) {
+RAMBLOCK_FOREACH_NOT_IGNORED(block) {
+unsigned long pages = block->max_length >> TARGET_PAGE_BITS;
+
+block->bmap = bitmap_new(pages);
+bitmap_set(block->bmap, 0, pages);
+}
+}
+ram_state = g_new0(RAMState, 1);
+ram_state->migration_dirty_pages = 0;
+memory_global_dirty_log_start();
+}
+
+uint64_t dirty_track_dirty_pages(void)
+{
+return ram_state->migration_dirty_pages;
+}
+
+void dirty_track_sync(void)
+{
+RAMBlock *block = NULL;
+unsigned long offset = 0;
+
+memory_global_dirty_log_sync();
+rcu_read_lock();
+RAMBLOCK_FOREACH_NOT_IGNORED(block) {
+   ramblock_sync_dirty_bitmap(ram_state, block);
+}
+rcu_read_unlock();
+
+rcu_read_lock();
+block = QLIST_FIRST_RCU(_list.blocks);
+
+while (block) {
+offset = migration_bitmap_find_dirty(ram_state, block, offset);
+
+if (offset << TARGET_PAGE_BITS >= block->used_length) {
+offset = 0;
+block = QLIST_NEXT_RCU(block, next);
+} else {
+test_and_clear_bit(offset, block->bmap);
+}
+}
+
+rcu_read_unlock();
+}
+
+void dirty_track_cleanup(void)
+{
+RAMBlock *block;
+
+memory_global_dirty_log_stop();
+RAMBLOCK_FOREACH_NOT_IGNORED(block) {
+g_free(block->bmap);
+block->bmap = NULL;
+}
+
+g_free(ram_state);
+ram_state = NULL;
+}
+
 static SaveVMHandlers savevm_ram_handlers = {
 .save_setup = ram_save_setup,
 .save_live_iterate = ram_save_iterate,
diff --git a/migration/ram.h b/migration/ram.h
index 2eeaacfa13..104c48285c 100644
--- a/migration/ram.h
+++ b/migration/ram.h
@@ -69,4 +69,9 @@ void colo_flush_ram_cache(void);
 void colo_release_ram

[Qemu-devel] [PATCH] i386: Fix legacy guest with xsave panic on host kvm without update cpuid.

2019-08-21 Thread Bingsong Si
without kvm commit 412a3c41, CPUID(EAX=0xd,ECX=0).EBX always equal to 0 even
through guest update xcr0, this will crash legacy guest(e.g., CentOS 6).
Below is the call trace on the guest.

[0.00] kernel BUG at mm/bootmem.c:469!
[0.00] invalid opcode:  [#1] SMP
[0.00] last sysfs file:
[0.00] CPU 0
[0.00] Modules linked in:
[0.00]
[0.00] Pid: 0, comm: swapper Tainted: G   --- H  
2.6.32-279#2 Red Hat KVM
[0.00] RIP: 0010:[]  [] 
alloc_bootmem_core+0x7b/0x29e
[0.00] RSP: 0018:81a01cd8  EFLAGS: 00010046
[0.00] RAX: 81cb1748 RBX: 81cb1720 RCX: 0100
[0.00] RDX: 0040 RSI:  RDI: 81cb1720
[0.00] RBP: 81a01d38 R08:  R09: 1000
[0.00] R10: 02008921da802087 R11: 8800 R12: 
[0.00] R13:  R14:  R15: 0100
[0.00] FS:  () GS:88000220() 
knlGS:
[0.00] CS:  0010 DS: 0018 ES: 0018 CR0: 80050033
[0.00] CR2:  CR3: 01a85000 CR4: 001406b0
[0.00] DR0:  DR1:  DR2: 
[0.00] DR3:  DR6: 0ff0 DR7: 0400
[0.00] Process swapper (pid: 0, threadinfo 81a0, task 
81a8d020)
[0.00] Stack:
[0.00]  0002 81a01dd881eaf060 7e5fe227 
1001
[0.00]  0040 0001 006c 
0100
[0.00]  81cb1720   

[0.00] Call Trace:
[0.00]  [] ___alloc_bootmem_nopanic+0x8d/0xca
[0.00]  [] ___alloc_bootmem+0x11/0x39
[0.00]  [] __alloc_bootmem+0xb/0xd
[0.00]  [] xsave_cntxt_init+0x249/0x2c0
[0.00]  [] init_thread_xstate+0x17/0x25
[0.00]  [] fpu_init+0x79/0xaa
[0.00]  [] cpu_init+0x301/0x344
[0.00]  [] ? sort+0x155/0x230
[0.00]  [] trap_init+0x24e/0x25f
[0.00]  [] start_kernel+0x21c/0x430
[0.00]  [] x86_64_start_reservations+0x125/0x129
[0.00]  [] x86_64_start_kernel+0xfa/0x109
[0.00] Code: 03 48 89 f1 49 c1 e8 0c 48 0f af d0 48 c7 c6 00 a6 61 81 
48 c7 c7 00 e5 79 81 31 c0 4c 89 74 24 08 e8 f2 d7 89 ff 4d 85 e4 75 04 <0f> 0b 
eb fe 48 8b 45 c0 48 83 e8 01 48 85 45
c0 74 04 0f 0b eb

Signed-off-by: Bingsong Si 
---
 target/i386/cpu.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index ff65e11008..77510cdacd 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -4416,7 +4416,13 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
 *ecx = xsave_area_size(x86_cpu_xsave_components(cpu));
 *eax = env->features[FEAT_XSAVE_COMP_LO];
 *edx = env->features[FEAT_XSAVE_COMP_HI];
-*ebx = xsave_area_size(env->xcr0);
+/*
+ * The initial value of xcr0 and ebx == 0, On host without kvm
+ * commit 412a3c41(e.g., CentOS 6), the ebx's value always == 0
+ * even through guest update xcr0, this will crash some legacy 
guest
+ * (e.g., CentOS 6), So set ebx == ecx to workaroud it.
+ */
+*ebx = kvm_enabled() ? *ecx : xsave_area_size(env->xcr0);
 } else if (count == 1) {
 *eax = env->features[FEAT_XSAVE];
 } else if (count < ARRAY_SIZE(x86_ext_save_areas)) {
-- 
2.22.0




[Qemu-devel] [PATCH] Revert "i386: correct cpu_x86_cpuid(0xd)"

2019-08-19 Thread Bingsong Si
This reverts commit de2e68c902f7b6e438b0fa3cfedd74a06a20704f.

Initial value of env->xcr0 == 0, then CPUID(EAX=0xd,ECX=0).EBX == 0, after kvm
upstream commit 412a3c41, It is ok.
On host before commit 412a3c41, some legacy guest, i.e. CentOS 6, get
xstate_size == 0, will crash the guest.

Signed-off-by: Bingsong Si 
---
 target/i386/cpu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index ff65e11008..69562e21ed 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -4416,7 +4416,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
 *ecx = xsave_area_size(x86_cpu_xsave_components(cpu));
 *eax = env->features[FEAT_XSAVE_COMP_LO];
 *edx = env->features[FEAT_XSAVE_COMP_HI];
-*ebx = xsave_area_size(env->xcr0);
+*ebx = *ecx;
 } else if (count == 1) {
 *eax = env->features[FEAT_XSAVE];
 } else if (count < ARRAY_SIZE(x86_ext_save_areas)) {
-- 
2.22.0