On Sun, 2013-06-09 at 12:34 +1000, Alexey Kardashevskiy wrote: > It is _live_ migration, the source sends changes, same pages can change and > be sent several times. So we would need to turn tracking on on the > destination to know if some page was received from the source or changed by > the destination itself (by writing there bios/firmware images, etc) and > then clear pages which were touched by the destination and were not sent by > the source.
Or we can set some kind of flag so that when creating a "migration target" VM we don't load all these things into memory. > Or we do not make guesses, the source sends everything and the destination > simply checks if a page which is empty on the source is empty on the > destination and avoid writing zeroes to it. Looks simpler to me and this is > what the new patch does. But you end up sending a lot of zero's ... is the migration compressed (I am not familiar with it at all) ? If it is, that shouldn't be a big deal, but else it feels to me that you should be able to send a special packet instead that says "all zeros" because you'll potentially have an awful lot of these. Ben. > > > >> > >>> Also, you mean following code is from qemu and it does not allocate > >>> memory with you gcc right? Maybe it is related to KVM, how about > >>> turn off KVM and retry following code in qemu? > >>> > >>>> #include <stdio.h> > >>>> #include <stdlib.h> > >>>> #include <assert.h> > >>>> #include <unistd.h> > >>>> #include <sys/resource.h> > >>>> #include <inttypes.h> > >>>> #include <string.h> > >>>> #include <sys/mman.h> > >>>> #include <errno.h> > >>>> > >>>> #if defined __SSE2__ > >>>> #include <emmintrin.h> > >>>> #define VECTYPE __m128i > >>>> #define SPLAT(p) _mm_set1_epi8(*(p)) > >>>> #define ALL_EQ(v1, v2) (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) == > >>>> 0xFFFF) > >>>> #else > >>>> #define VECTYPE unsigned long > >>>> #define SPLAT(p) (*(p) * (~0UL / 255)) > >>>> #define ALL_EQ(v1, v2) ((v1) == (v2)) > >>>> #endif > >>>> > >>>> #define BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR 8 > >>>> > >>>> /* Round number down to multiple */ > >>>> #define QEMU_ALIGN_DOWN(n, m) ((n) / (m) * (m)) > >>>> > >>>> /* Round number up to multiple */ > >>>> #define QEMU_ALIGN_UP(n, m) QEMU_ALIGN_DOWN((n) + (m) - 1, (m)) > >>>> > >>>> #define QEMU_VMALLOC_ALIGN (256 * 4096) > >>>> > >>>> /* alloc shared memory pages */ > >>>> void *qemu_anon_ram_alloc(size_t size) > >>>> { > >>>> size_t align = QEMU_VMALLOC_ALIGN; > >>>> size_t total = size + align - getpagesize(); > >>>> void *ptr = mmap(0, total, PROT_READ | PROT_WRITE, > >>>> MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); > >>>> size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - > >>>> (uintptr_t)ptr; > >>>> > >>>> if (ptr == MAP_FAILED) { > >>>> fprintf(stderr, "Failed to allocate %zu B: %s\n", > >>>> size, strerror(errno)); > >>>> abort(); > >>>> } > >>>> > >>>> ptr += offset; > >>>> total -= offset; > >>>> > >>>> if (offset > 0) { > >>>> munmap(ptr - offset, offset); > >>>> } > >>>> if (total > size) { > >>>> munmap(ptr + size, total - size); > >>>> } > >>>> > >>>> return ptr; > >>>> } > >>>> > >>>> static inline int > >>>> can_use_buffer_find_nonzero_offset(const void *buf, size_t len) > >>>> { > >>>> return (len % (BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR > >>>> * sizeof(VECTYPE)) == 0 > >>>> && ((uintptr_t) buf) % sizeof(VECTYPE) == 0); > >>>> } > >>>> > >>>> size_t buffer_find_nonzero_offset(const void *buf, size_t len) > >>>> { > >>>> const VECTYPE *p = buf; > >>>> const VECTYPE zero = (VECTYPE){0}; > >>>> size_t i; > >>>> > >>>> if (!len) { > >>>> return 0; > >>>> } > >>>> > >>>> assert(can_use_buffer_find_nonzero_offset(buf, len)); > >>>> > >>>> for (i = 0; i < BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR; i++) { > >>>> if (!ALL_EQ(p[i], zero)) { > >>>> return i * sizeof(VECTYPE); > >>>> } > >>>> } > >>>> > >>>> for (i = BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR; > >>>> i < len / sizeof(VECTYPE); > >>>> i += BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR) { > >>>> VECTYPE tmp0 = p[i + 0] | p[i + 1]; > >>>> VECTYPE tmp1 = p[i + 2] | p[i + 3]; > >>>> VECTYPE tmp2 = p[i + 4] | p[i + 5]; > >>>> VECTYPE tmp3 = p[i + 6] | p[i + 7]; > >>>> VECTYPE tmp01 = tmp0 | tmp1; > >>>> VECTYPE tmp23 = tmp2 | tmp3; > >>>> if (!ALL_EQ(tmp01 | tmp23, zero)) { > >>>> break; > >>>> } > >>>> } > >>>> > >>>> return i * sizeof(VECTYPE); > >>>> } > >>>> > >>>> int main() > >>>> { > >>>> //char *x = malloc(1024 << 20); > >>>> char *x = qemu_anon_ram_alloc(1024 << 20); > >>>> > >>>> int i, j; > >>>> int ret = 0; > >>>> struct rusage rusage; > >>>> for (i = 0; i < 500; i ++) { > >>>> for (j = 0; j < 10 << 20; j += 4096) { > >>>> ret += buffer_find_nonzero_offset((char*) (x + (i << 20) > >>>> + j), 4096); > >>>> } > >>>> getrusage( RUSAGE_SELF, &rusage ); > >>>> printf("read offset: %d kB, RSS size: %ld kB", ((i+1) << 10), > >>>> rusage.ru_maxrss); > >>>> getchar(); > >>>> } > >>>> printf("%d zero pages\n", ret); > >>>> } > >>>> > >>> > >>> > >> > >> > > > > > >