> > From: Zhang Yi <yi.z.zh...@linux.intel.com> > > When a file supporting DAX is used as vNVDIMM backend, mmap it with > MAP_SYNC flag in addition which can ensure file system metadata > synced in each guest writes to the backend file, without other QEMU > actions (e.g., periodic fsync() by QEMU). > > Current, We have below different possible use cases: > > 1. pmem=on is set, shared=on is set, MAP_SYNC supported: > a: backend is a dax supporting file. > - MAP_SYNC will active. > b: backend is not a dax supporting file. > - mmap will trigger a warning. then MAP_SYNC flag will be ignored > > 2. The rest of cases: > - we will never pass the MAP_SYNC to mmap2 > > Signed-off-by: Haozhong Zhang <haozhong.zh...@intel.com> > Signed-off-by: Zhang Yi <yi.z.zh...@linux.intel.com> > --- > include/qemu/osdep.h | 21 +++++++++++++++++++++ > util/mmap-alloc.c | 28 +++++++++++++++++++++++++++- > 2 files changed, 48 insertions(+), 1 deletion(-) > > diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h > index 457d24e..96209bb 100644 > --- a/include/qemu/osdep.h > +++ b/include/qemu/osdep.h > @@ -419,6 +419,27 @@ void qemu_anon_ram_free(void *ptr, size_t size); > # define QEMU_VMALLOC_ALIGN getpagesize() > #endif > > +/* > + * MAP_SHARED_VALIDATE and MAP_SYNC are introduced in Linux kernel > + * 4.15, so they may not be defined when compiling on older kernels. > + */ > +#ifdef CONFIG_LINUX > + > +#include <linux/mman.h> > + > +#ifndef MAP_SYNC > +#define MAP_SYNC 0x80000 > +#endif > + > +#ifndef MAP_SHARED_VALIDATE > +#define MAP_SHARED_VALIDATE 0x03 > +#endif > + > +#else /* !CONFIG_LINUX */ > +#define MAP_SYNC 0x0 > +#define MAP_SHARED_VALIDATE 0x0 > +#endif /* CONFIG_LINUX */ > + > #ifdef CONFIG_POSIX > struct qemu_signalfd_siginfo { > uint32_t ssi_signo; /* Signal number */ > diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c > index 97bbeed..2c86ad2 100644 > --- a/util/mmap-alloc.c > +++ b/util/mmap-alloc.c > @@ -101,6 +101,7 @@ void *qemu_ram_mmap(int fd, > #else > void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, > 0); > #endif > + int mmap_xflags = 0; > size_t offset; > void *ptr1; > > @@ -111,13 +112,38 @@ void *qemu_ram_mmap(int fd, > assert(is_power_of_2(align)); > /* Always align to host page size */ > assert(align >= getpagesize()); > + if (shared && is_pmem) { > + mmap_xflags = MAP_SYNC | MAP_SHARED_VALIDATE; > + } > > offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr; > +retry_mmap: > ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE, > MAP_FIXED | > (fd == -1 ? MAP_ANONYMOUS : 0) | > - (shared ? MAP_SHARED : MAP_PRIVATE), > + (shared ? MAP_SHARED : MAP_PRIVATE) | mmap_xflags, > fd, 0); > + > + /* if map failed with MAP_SHARED_VALIDATE | MAP_SYNC, > + * we try with MAP_SHARED_VALIDATE without MAP_SYNC > + */ > + if (ptr1 == MAP_FAILED && > + mmap_xflags == (MAP_SYNC | MAP_SHARED_VALIDATE)) { > + if (errno == ENOTSUP) { > + perror("failed to validate with mapping flags"); > + } > + mmap_xflags = MAP_SHARED_VALIDATE; > + goto retry_mmap; > + } > + /* MAP_SHARED_VALIDATE flag is available since Linux 4.15 > + * Test only with MAP_SHARED_VALIDATE flag for compatibility. > + * Then ignore the MAP_SHARED_VALIDATE flag and retry again > + */ > + if (mmap_xflags == MAP_SHARED_VALIDATE && > + ptr1 == MAP_FAILED) { > + mmap_xflags &= ~MAP_SHARED_VALIDATE; > + goto retry_mmap; > + }
I am not sure if we need this multiple validation. If MAP_SYNC with MAP_SHARED_VALIDATE is not supported or failed, just fallback to mmap without MAP_SYNC & MAP_SHARED_VALIDATE? I saw a'lot of discussion in previous version of this patch series. I am not sure if its suggested this way or I am missing anything important here. Thanks, Pankaj > if (ptr1 == MAP_FAILED) { > munmap(ptr, total); > return MAP_FAILED; > -- > 2.7.4 > > >