From: xuchuangxclwt <[email protected]>

In our long-term experience in Bytedance, we've found that under
the same load, live migration of larger VMs with more devices is
often more difficult to converge (requiring a larger downtime limit).

Through some testing and calculations, we conclude that bitmap sync time
affects the calculation of live migration bandwidth.

When the addresses processed are not aligned, a large number of
clear_dirty ioctl occur (e.g. a 4MB misaligned memory can generate
2048 clear_dirty ioctls from two different memory_listener),
which increases the time required for bitmap_sync and makes it
more difficult for dirty pages to converge.

For a 64C256G vm with 8 vhost-user-net(32 queue per nic) and
16 vhost-user-blk(4 queue per blk), the sync time is as high as *73ms*
(tested with 10GBps dirty rate, the sync time increases as the dirty
page rate increases), Here are each part of the sync time:

- sync from kvm to ram_list: 2.5ms
- vhost_log_sync:3ms
- sync aligned memory from ram_list to RAMBlock: 5ms
- sync misaligned memory from ram_list to RAMBlock: 61ms

Attempt to merge those fragmented clear_dirty ioctls, then syncing
misaligned memory from ram_list to RAMBlock takes only about 1ms,
and the total sync time is only *12ms*.

Signed-off-by: Chuang Xu <[email protected]>
---
 accel/tcg/cputlb.c       |  5 ++--
 include/system/physmem.h |  7 +++---
 migration/ram.c          | 17 ++++----------
 system/memory.c          |  2 +-
 system/physmem.c         | 49 ++++++++++++++++++++++++++++------------
 5 files changed, 47 insertions(+), 33 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index fd1606c856..c8827c8b0d 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -857,8 +857,9 @@ void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState 
*src_cpu,
 void tlb_protect_code(ram_addr_t ram_addr)
 {
     physical_memory_test_and_clear_dirty(ram_addr & TARGET_PAGE_MASK,
-                                             TARGET_PAGE_SIZE,
-                                             DIRTY_MEMORY_CODE);
+                                         TARGET_PAGE_SIZE,
+                                         DIRTY_MEMORY_CODE,
+                                         NULL);
 }
 
 /* update the TLB so that writes in physical page 'phys_addr' are no longer
diff --git a/include/system/physmem.h b/include/system/physmem.h
index 879f6eae38..8eeace9d1f 100644
--- a/include/system/physmem.h
+++ b/include/system/physmem.h
@@ -39,9 +39,10 @@ uint64_t physical_memory_set_dirty_lebitmap(unsigned long 
*bitmap,
 
 void physical_memory_dirty_bits_cleared(ram_addr_t start, ram_addr_t length);
 
-bool physical_memory_test_and_clear_dirty(ram_addr_t start,
-                                          ram_addr_t length,
-                                          unsigned client);
+uint64_t physical_memory_test_and_clear_dirty(ram_addr_t start,
+                                              ram_addr_t length,
+                                              unsigned client,
+                                              unsigned long *dest);
 
 DirtyBitmapSnapshot *
 physical_memory_snapshot_and_clear_dirty(MemoryRegion *mr, hwaddr offset,
diff --git a/migration/ram.c b/migration/ram.c
index 29f016cb25..a03c9874a2 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -942,7 +942,6 @@ static uint64_t physical_memory_sync_dirty_bitmap(RAMBlock 
*rb,
                                                   ram_addr_t start,
                                                   ram_addr_t length)
 {
-    ram_addr_t addr;
     unsigned long word = BIT_WORD((start + rb->offset) >> TARGET_PAGE_BITS);
     uint64_t num_dirty = 0;
     unsigned long *dest = rb->bmap;
@@ -996,17 +995,11 @@ static uint64_t 
physical_memory_sync_dirty_bitmap(RAMBlock *rb,
     } else {
         ram_addr_t offset = rb->offset;
 
-        for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) {
-            if (physical_memory_test_and_clear_dirty(
-                        start + addr + offset,
-                        TARGET_PAGE_SIZE,
-                        DIRTY_MEMORY_MIGRATION)) {
-                long k = (start + addr) >> TARGET_PAGE_BITS;
-                if (!test_and_set_bit(k, dest)) {
-                    num_dirty++;
-                }
-            }
-        }
+        num_dirty = physical_memory_test_and_clear_dirty(
+                        start + offset,
+                        length,
+                        DIRTY_MEMORY_MIGRATION,
+                        dest);
     }
 
     return num_dirty;
diff --git a/system/memory.c b/system/memory.c
index 8b84661ae3..666364392d 100644
--- a/system/memory.c
+++ b/system/memory.c
@@ -2424,7 +2424,7 @@ void memory_region_reset_dirty(MemoryRegion *mr, hwaddr 
addr,
 {
     assert(mr->ram_block);
     physical_memory_test_and_clear_dirty(
-        memory_region_get_ram_addr(mr) + addr, size, client);
+        memory_region_get_ram_addr(mr) + addr, size, client, NULL);
 }
 
 int memory_region_get_fd(MemoryRegion *mr)
diff --git a/system/physmem.c b/system/physmem.c
index c9869e4049..f8b660dafe 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -1089,19 +1089,31 @@ void physical_memory_set_dirty_range(ram_addr_t start, 
ram_addr_t length,
     }
 }
 
-/* Note: start and end must be within the same ram block.  */
-bool physical_memory_test_and_clear_dirty(ram_addr_t start,
+/*
+ * Note: start and end must be within the same ram block.
+ *
+ * @dest usage:
+ * - When @dest is provided, set bits for newly discovered dirty pages
+ *   only if the bit wasn't already set in dest, and count those pages
+ *   in num_dirty.
+ * - When @dest is NULL, count all dirty pages in the range
+ *
+ * @return:
+ * - Number of dirty guest pages found within [start, start + length).
+ */
+uint64_t physical_memory_test_and_clear_dirty(ram_addr_t start,
                                               ram_addr_t length,
-                                              unsigned client)
+                                              unsigned client,
+                                              unsigned long *dest)
 {
     DirtyMemoryBlocks *blocks;
     unsigned long end, page, start_page;
-    bool dirty = false;
+    uint64_t num_dirty = 0;
     RAMBlock *ramblock;
     uint64_t mr_offset, mr_size;
 
     if (length == 0) {
-        return false;
+        return 0;
     }
 
     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
@@ -1118,12 +1130,19 @@ bool physical_memory_test_and_clear_dirty(ram_addr_t 
start,
         while (page < end) {
             unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
             unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
-            unsigned long num = MIN(end - page,
-                                    DIRTY_MEMORY_BLOCK_SIZE - offset);
 
-            dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
-                                                  offset, num);
-            page += num;
+            if (bitmap_test_and_clear_atomic(blocks->blocks[idx], offset, 1)) {
+                if (dest) {
+                    unsigned long k = page - (ramblock->offset >> 
TARGET_PAGE_BITS);
+                    if (!test_and_set_bit(k, dest)) {
+                        num_dirty++;
+                    }
+                } else {
+                    num_dirty++;
+                }
+            }
+
+            page++;
         }
 
         mr_offset = (ram_addr_t)(start_page << TARGET_PAGE_BITS) - 
ramblock->offset;
@@ -1131,18 +1150,18 @@ bool physical_memory_test_and_clear_dirty(ram_addr_t 
start,
         memory_region_clear_dirty_bitmap(ramblock->mr, mr_offset, mr_size);
     }
 
-    if (dirty) {
+    if (num_dirty) {
         physical_memory_dirty_bits_cleared(start, length);
     }
 
-    return dirty;
+    return num_dirty;
 }
 
 static void physical_memory_clear_dirty_range(ram_addr_t addr, ram_addr_t 
length)
 {
-    physical_memory_test_and_clear_dirty(addr, length, DIRTY_MEMORY_MIGRATION);
-    physical_memory_test_and_clear_dirty(addr, length, DIRTY_MEMORY_VGA);
-    physical_memory_test_and_clear_dirty(addr, length, DIRTY_MEMORY_CODE);
+    physical_memory_test_and_clear_dirty(addr, length, DIRTY_MEMORY_MIGRATION, 
NULL);
+    physical_memory_test_and_clear_dirty(addr, length, DIRTY_MEMORY_VGA, NULL);
+    physical_memory_test_and_clear_dirty(addr, length, DIRTY_MEMORY_CODE, 
NULL);
 }
 
 DirtyBitmapSnapshot *physical_memory_snapshot_and_clear_dirty
-- 
2.39.3 (Apple Git-146)

Reply via email to