On Thu, Feb 09, 2017 at 12:48:19PM +0100, Paolo Bonzini wrote: > > > On 09/02/2017 06:43, Alexey Kardashevskiy wrote: > > On 03/01/17 10:34, David Gibson wrote: > >> On Thu, Dec 22, 2016 at 04:22:11PM +1100, Alexey Kardashevskiy wrote: > >>> getrampagesize() returns the largest supported page size and mainly > >>> used to know if huge pages are enabled. > >>> > >>> However is implemented in target-ppc/kvm.c and not available > >>> in TCG or other architectures. > >>> > >>> This renames and moves gethugepagesize() to mmap-alloc.c where > >>> fd-based analog of it is already implemented. This renames and moves > >>> getrampagesize() to exec.c as it seems to be the common place for > >>> helpers like this. > >>> > >>> This first user for it is going to be a spapr-pci-host-bridge which > >>> needs to know the largest RAM page size so the guest could try > >>> using bigger IOMMU pages to save memory. > >>> > >>> Signed-off-by: Alexey Kardashevskiy <a...@ozlabs.ru> > >> > >> Reviewed-by: David Gibson <da...@gibson.dropbear.id.au> > >> > >> Seems sensible to me, but I'm not comfortable merging this via my tree > >> since it touches such core code. Probably should go via Paolo. > > > > Paolo, ping? > > It's just code movement, go ahead.
Ok, I've merged this in my tree. > > Paolo > > > > > > >> > >>> --- > >>> include/exec/ram_addr.h | 1 + > >>> include/qemu/mmap-alloc.h | 2 + > >>> exec.c | 82 ++++++++++++++++++++++++++++++++++++ > >>> target-ppc/kvm.c | 105 > >>> ++-------------------------------------------- > >>> util/mmap-alloc.c | 25 +++++++++++ > >>> 5 files changed, 113 insertions(+), 102 deletions(-) > >>> > >>> diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h > >>> index 54d7108a9e..3935cbcfcd 100644 > >>> --- a/include/exec/ram_addr.h > >>> +++ b/include/exec/ram_addr.h > >>> @@ -91,6 +91,7 @@ typedef struct RAMList { > >>> } RAMList; > >>> extern RAMList ram_list; > >>> > >>> +long qemu_getrampagesize(void); > >>> ram_addr_t last_ram_offset(void); > >>> void qemu_mutex_lock_ramlist(void); > >>> void qemu_mutex_unlock_ramlist(void); > >>> diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h > >>> index 933c024ac5..50385e3f81 100644 > >>> --- a/include/qemu/mmap-alloc.h > >>> +++ b/include/qemu/mmap-alloc.h > >>> @@ -5,6 +5,8 @@ > >>> > >>> size_t qemu_fd_getpagesize(int fd); > >>> > >>> +size_t qemu_mempath_getpagesize(const char *mem_path); > >>> + > >>> void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared); > >>> > >>> void qemu_ram_munmap(void *ptr, size_t size); > >>> diff --git a/exec.c b/exec.c > >>> index 08c558eecf..d73b477a70 100644 > >>> --- a/exec.c > >>> +++ b/exec.c > >>> @@ -32,6 +32,7 @@ > >>> #endif > >>> #include "sysemu/kvm.h" > >>> #include "sysemu/sysemu.h" > >>> +#include "sysemu/numa.h" > >>> #include "qemu/timer.h" > >>> #include "qemu/config-file.h" > >>> #include "qemu/error-report.h" > >>> @@ -1218,6 +1219,87 @@ void qemu_mutex_unlock_ramlist(void) > >>> } > >>> > >>> #ifdef __linux__ > >>> +/* > >>> + * FIXME TOCTTOU: this iterates over memory backends' mem-path, which > >>> + * may or may not name the same files / on the same filesystem now as > >>> + * when we actually open and map them. Iterate over the file > >>> + * descriptors instead, and use qemu_fd_getpagesize(). > >>> + */ > >>> +static int find_max_supported_pagesize(Object *obj, void *opaque) > >>> +{ > >>> + char *mem_path; > >>> + long *hpsize_min = opaque; > >>> + > >>> + if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) { > >>> + mem_path = object_property_get_str(obj, "mem-path", NULL); > >>> + if (mem_path) { > >>> + long hpsize = qemu_mempath_getpagesize(mem_path); > >>> + if (hpsize < *hpsize_min) { > >>> + *hpsize_min = hpsize; > >>> + } > >>> + } else { > >>> + *hpsize_min = getpagesize(); > >>> + } > >>> + } > >>> + > >>> + return 0; > >>> +} > >>> + > >>> +long qemu_getrampagesize(void) > >>> +{ > >>> + long hpsize = LONG_MAX; > >>> + long mainrampagesize; > >>> + Object *memdev_root; > >>> + > >>> + if (mem_path) { > >>> + mainrampagesize = qemu_mempath_getpagesize(mem_path); > >>> + } else { > >>> + mainrampagesize = getpagesize(); > >>> + } > >>> + > >>> + /* it's possible we have memory-backend objects with > >>> + * hugepage-backed RAM. these may get mapped into system > >>> + * address space via -numa parameters or memory hotplug > >>> + * hooks. we want to take these into account, but we > >>> + * also want to make sure these supported hugepage > >>> + * sizes are applicable across the entire range of memory > >>> + * we may boot from, so we take the min across all > >>> + * backends, and assume normal pages in cases where a > >>> + * backend isn't backed by hugepages. > >>> + */ > >>> + memdev_root = object_resolve_path("/objects", NULL); > >>> + if (memdev_root) { > >>> + object_child_foreach(memdev_root, find_max_supported_pagesize, > >>> &hpsize); > >>> + } > >>> + if (hpsize == LONG_MAX) { > >>> + /* No additional memory regions found ==> Report main RAM page > >>> size */ > >>> + return mainrampagesize; > >>> + } > >>> + > >>> + /* If NUMA is disabled or the NUMA nodes are not backed with a > >>> + * memory-backend, then there is at least one node using "normal" > >>> RAM, > >>> + * so if its page size is smaller we have got to report that size > >>> instead. > >>> + */ > >>> + if (hpsize > mainrampagesize && > >>> + (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) { > >>> + static bool warned; > >>> + if (!warned) { > >>> + error_report("Huge page support disabled (n/a for main > >>> memory)."); > >>> + warned = true; > >>> + } > >>> + return mainrampagesize; > >>> + } > >>> + > >>> + return hpsize; > >>> +} > >>> +#else > >>> +long qemu_getrampagesize(void) > >>> +{ > >>> + return getpagesize(); > >>> +} > >>> +#endif > >>> + > >>> +#ifdef __linux__ > >>> static int64_t get_file_size(int fd) > >>> { > >>> int64_t size = lseek(fd, 0, SEEK_END); > >>> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c > >>> index 6e91a4d8bb..e0abffa8ad 100644 > >>> --- a/target-ppc/kvm.c > >>> +++ b/target-ppc/kvm.c > >>> @@ -42,6 +42,7 @@ > >>> #include "trace.h" > >>> #include "exec/gdbstub.h" > >>> #include "exec/memattrs.h" > >>> +#include "exec/ram_addr.h" > >>> #include "sysemu/hostmem.h" > >>> #include "qemu/cutils.h" > >>> #if defined(TARGET_PPC64) > >>> @@ -325,106 +326,6 @@ static void kvm_get_smmu_info(PowerPCCPU *cpu, > >>> struct kvm_ppc_smmu_info *info) > >>> kvm_get_fallback_smmu_info(cpu, info); > >>> } > >>> > >>> -static long gethugepagesize(const char *mem_path) > >>> -{ > >>> - struct statfs fs; > >>> - int ret; > >>> - > >>> - do { > >>> - ret = statfs(mem_path, &fs); > >>> - } while (ret != 0 && errno == EINTR); > >>> - > >>> - if (ret != 0) { > >>> - fprintf(stderr, "Couldn't statfs() memory path: %s\n", > >>> - strerror(errno)); > >>> - exit(1); > >>> - } > >>> - > >>> -#define HUGETLBFS_MAGIC 0x958458f6 > >>> - > >>> - if (fs.f_type != HUGETLBFS_MAGIC) { > >>> - /* Explicit mempath, but it's ordinary pages */ > >>> - return getpagesize(); > >>> - } > >>> - > >>> - /* It's hugepage, return the huge page size */ > >>> - return fs.f_bsize; > >>> -} > >>> - > >>> -/* > >>> - * FIXME TOCTTOU: this iterates over memory backends' mem-path, which > >>> - * may or may not name the same files / on the same filesystem now as > >>> - * when we actually open and map them. Iterate over the file > >>> - * descriptors instead, and use qemu_fd_getpagesize(). > >>> - */ > >>> -static int find_max_supported_pagesize(Object *obj, void *opaque) > >>> -{ > >>> - char *mem_path; > >>> - long *hpsize_min = opaque; > >>> - > >>> - if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) { > >>> - mem_path = object_property_get_str(obj, "mem-path", NULL); > >>> - if (mem_path) { > >>> - long hpsize = gethugepagesize(mem_path); > >>> - if (hpsize < *hpsize_min) { > >>> - *hpsize_min = hpsize; > >>> - } > >>> - } else { > >>> - *hpsize_min = getpagesize(); > >>> - } > >>> - } > >>> - > >>> - return 0; > >>> -} > >>> - > >>> -static long getrampagesize(void) > >>> -{ > >>> - long hpsize = LONG_MAX; > >>> - long mainrampagesize; > >>> - Object *memdev_root; > >>> - > >>> - if (mem_path) { > >>> - mainrampagesize = gethugepagesize(mem_path); > >>> - } else { > >>> - mainrampagesize = getpagesize(); > >>> - } > >>> - > >>> - /* it's possible we have memory-backend objects with > >>> - * hugepage-backed RAM. these may get mapped into system > >>> - * address space via -numa parameters or memory hotplug > >>> - * hooks. we want to take these into account, but we > >>> - * also want to make sure these supported hugepage > >>> - * sizes are applicable across the entire range of memory > >>> - * we may boot from, so we take the min across all > >>> - * backends, and assume normal pages in cases where a > >>> - * backend isn't backed by hugepages. > >>> - */ > >>> - memdev_root = object_resolve_path("/objects", NULL); > >>> - if (memdev_root) { > >>> - object_child_foreach(memdev_root, find_max_supported_pagesize, > >>> &hpsize); > >>> - } > >>> - if (hpsize == LONG_MAX) { > >>> - /* No additional memory regions found ==> Report main RAM page > >>> size */ > >>> - return mainrampagesize; > >>> - } > >>> - > >>> - /* If NUMA is disabled or the NUMA nodes are not backed with a > >>> - * memory-backend, then there is at least one node using "normal" > >>> RAM, > >>> - * so if its page size is smaller we have got to report that size > >>> instead. > >>> - */ > >>> - if (hpsize > mainrampagesize && > >>> - (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) { > >>> - static bool warned; > >>> - if (!warned) { > >>> - error_report("Huge page support disabled (n/a for main > >>> memory)."); > >>> - warned = true; > >>> - } > >>> - return mainrampagesize; > >>> - } > >>> - > >>> - return hpsize; > >>> -} > >>> - > >>> static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t > >>> shift) > >>> { > >>> if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) { > >>> @@ -454,7 +355,7 @@ static void kvm_fixup_page_sizes(PowerPCCPU *cpu) > >>> has_smmu_info = true; > >>> } > >>> > >>> - rampagesize = getrampagesize(); > >>> + rampagesize = qemu_getrampagesize(); > >>> > >>> /* Convert to QEMU form */ > >>> memset(&env->sps, 0, sizeof(env->sps)); > >>> @@ -2177,7 +2078,7 @@ uint64_t kvmppc_rma_size(uint64_t current_size, > >>> unsigned int hash_shift) > >>> /* Find the largest hardware supported page size that's less than > >>> * or equal to the (logical) backing page size of guest RAM */ > >>> kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info); > >>> - rampagesize = getrampagesize(); > >>> + rampagesize = qemu_getrampagesize(); > >>> best_page_shift = 0; > >>> > >>> for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) { > >>> diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c > >>> index 5a85aa3c89..564c79109c 100644 > >>> --- a/util/mmap-alloc.c > >>> +++ b/util/mmap-alloc.c > >>> @@ -39,6 +39,31 @@ size_t qemu_fd_getpagesize(int fd) > >>> return getpagesize(); > >>> } > >>> > >>> +size_t qemu_mempath_getpagesize(const char *mem_path) > >>> +{ > >>> +#ifdef CONFIG_LINUX > >>> + struct statfs fs; > >>> + int ret; > >>> + > >>> + do { > >>> + ret = statfs(mem_path, &fs); > >>> + } while (ret != 0 && errno == EINTR); > >>> + > >>> + if (ret != 0) { > >>> + fprintf(stderr, "Couldn't statfs() memory path: %s\n", > >>> + strerror(errno)); > >>> + exit(1); > >>> + } > >>> + > >>> + if (fs.f_type == HUGETLBFS_MAGIC) { > >>> + /* It's hugepage, return the huge page size */ > >>> + return fs.f_bsize; > >>> + } > >>> +#endif > >>> + > >>> + return getpagesize(); > >>> +} > >>> + > >>> void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) > >>> { > >>> /* > >> > > > > > -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson
signature.asc
Description: PGP signature