On 03/01/17 10:34, David Gibson wrote: > On Thu, Dec 22, 2016 at 04:22:11PM +1100, Alexey Kardashevskiy wrote: >> getrampagesize() returns the largest supported page size and mainly >> used to know if huge pages are enabled. >> >> However is implemented in target-ppc/kvm.c and not available >> in TCG or other architectures. >> >> This renames and moves gethugepagesize() to mmap-alloc.c where >> fd-based analog of it is already implemented. This renames and moves >> getrampagesize() to exec.c as it seems to be the common place for >> helpers like this. >> >> This first user for it is going to be a spapr-pci-host-bridge which >> needs to know the largest RAM page size so the guest could try >> using bigger IOMMU pages to save memory. >> >> Signed-off-by: Alexey Kardashevskiy <a...@ozlabs.ru> > > Reviewed-by: David Gibson <da...@gibson.dropbear.id.au> > > Seems sensible to me, but I'm not comfortable merging this via my tree > since it touches such core code. Probably should go via Paolo.
Paolo, ping? > >> --- >> include/exec/ram_addr.h | 1 + >> include/qemu/mmap-alloc.h | 2 + >> exec.c | 82 ++++++++++++++++++++++++++++++++++++ >> target-ppc/kvm.c | 105 >> ++-------------------------------------------- >> util/mmap-alloc.c | 25 +++++++++++ >> 5 files changed, 113 insertions(+), 102 deletions(-) >> >> diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h >> index 54d7108a9e..3935cbcfcd 100644 >> --- a/include/exec/ram_addr.h >> +++ b/include/exec/ram_addr.h >> @@ -91,6 +91,7 @@ typedef struct RAMList { >> } RAMList; >> extern RAMList ram_list; >> >> +long qemu_getrampagesize(void); >> ram_addr_t last_ram_offset(void); >> void qemu_mutex_lock_ramlist(void); >> void qemu_mutex_unlock_ramlist(void); >> diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h >> index 933c024ac5..50385e3f81 100644 >> --- a/include/qemu/mmap-alloc.h >> +++ b/include/qemu/mmap-alloc.h >> @@ -5,6 +5,8 @@ >> >> size_t qemu_fd_getpagesize(int fd); >> >> +size_t qemu_mempath_getpagesize(const char *mem_path); >> + >> void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared); >> >> void qemu_ram_munmap(void *ptr, size_t size); >> diff --git a/exec.c b/exec.c >> index 08c558eecf..d73b477a70 100644 >> --- a/exec.c >> +++ b/exec.c >> @@ -32,6 +32,7 @@ >> #endif >> #include "sysemu/kvm.h" >> #include "sysemu/sysemu.h" >> +#include "sysemu/numa.h" >> #include "qemu/timer.h" >> #include "qemu/config-file.h" >> #include "qemu/error-report.h" >> @@ -1218,6 +1219,87 @@ void qemu_mutex_unlock_ramlist(void) >> } >> >> #ifdef __linux__ >> +/* >> + * FIXME TOCTTOU: this iterates over memory backends' mem-path, which >> + * may or may not name the same files / on the same filesystem now as >> + * when we actually open and map them. Iterate over the file >> + * descriptors instead, and use qemu_fd_getpagesize(). >> + */ >> +static int find_max_supported_pagesize(Object *obj, void *opaque) >> +{ >> + char *mem_path; >> + long *hpsize_min = opaque; >> + >> + if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) { >> + mem_path = object_property_get_str(obj, "mem-path", NULL); >> + if (mem_path) { >> + long hpsize = qemu_mempath_getpagesize(mem_path); >> + if (hpsize < *hpsize_min) { >> + *hpsize_min = hpsize; >> + } >> + } else { >> + *hpsize_min = getpagesize(); >> + } >> + } >> + >> + return 0; >> +} >> + >> +long qemu_getrampagesize(void) >> +{ >> + long hpsize = LONG_MAX; >> + long mainrampagesize; >> + Object *memdev_root; >> + >> + if (mem_path) { >> + mainrampagesize = qemu_mempath_getpagesize(mem_path); >> + } else { >> + mainrampagesize = getpagesize(); >> + } >> + >> + /* it's possible we have memory-backend objects with >> + * hugepage-backed RAM. these may get mapped into system >> + * address space via -numa parameters or memory hotplug >> + * hooks. we want to take these into account, but we >> + * also want to make sure these supported hugepage >> + * sizes are applicable across the entire range of memory >> + * we may boot from, so we take the min across all >> + * backends, and assume normal pages in cases where a >> + * backend isn't backed by hugepages. >> + */ >> + memdev_root = object_resolve_path("/objects", NULL); >> + if (memdev_root) { >> + object_child_foreach(memdev_root, find_max_supported_pagesize, >> &hpsize); >> + } >> + if (hpsize == LONG_MAX) { >> + /* No additional memory regions found ==> Report main RAM page size >> */ >> + return mainrampagesize; >> + } >> + >> + /* If NUMA is disabled or the NUMA nodes are not backed with a >> + * memory-backend, then there is at least one node using "normal" RAM, >> + * so if its page size is smaller we have got to report that size >> instead. >> + */ >> + if (hpsize > mainrampagesize && >> + (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) { >> + static bool warned; >> + if (!warned) { >> + error_report("Huge page support disabled (n/a for main >> memory)."); >> + warned = true; >> + } >> + return mainrampagesize; >> + } >> + >> + return hpsize; >> +} >> +#else >> +long qemu_getrampagesize(void) >> +{ >> + return getpagesize(); >> +} >> +#endif >> + >> +#ifdef __linux__ >> static int64_t get_file_size(int fd) >> { >> int64_t size = lseek(fd, 0, SEEK_END); >> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c >> index 6e91a4d8bb..e0abffa8ad 100644 >> --- a/target-ppc/kvm.c >> +++ b/target-ppc/kvm.c >> @@ -42,6 +42,7 @@ >> #include "trace.h" >> #include "exec/gdbstub.h" >> #include "exec/memattrs.h" >> +#include "exec/ram_addr.h" >> #include "sysemu/hostmem.h" >> #include "qemu/cutils.h" >> #if defined(TARGET_PPC64) >> @@ -325,106 +326,6 @@ static void kvm_get_smmu_info(PowerPCCPU *cpu, struct >> kvm_ppc_smmu_info *info) >> kvm_get_fallback_smmu_info(cpu, info); >> } >> >> -static long gethugepagesize(const char *mem_path) >> -{ >> - struct statfs fs; >> - int ret; >> - >> - do { >> - ret = statfs(mem_path, &fs); >> - } while (ret != 0 && errno == EINTR); >> - >> - if (ret != 0) { >> - fprintf(stderr, "Couldn't statfs() memory path: %s\n", >> - strerror(errno)); >> - exit(1); >> - } >> - >> -#define HUGETLBFS_MAGIC 0x958458f6 >> - >> - if (fs.f_type != HUGETLBFS_MAGIC) { >> - /* Explicit mempath, but it's ordinary pages */ >> - return getpagesize(); >> - } >> - >> - /* It's hugepage, return the huge page size */ >> - return fs.f_bsize; >> -} >> - >> -/* >> - * FIXME TOCTTOU: this iterates over memory backends' mem-path, which >> - * may or may not name the same files / on the same filesystem now as >> - * when we actually open and map them. Iterate over the file >> - * descriptors instead, and use qemu_fd_getpagesize(). >> - */ >> -static int find_max_supported_pagesize(Object *obj, void *opaque) >> -{ >> - char *mem_path; >> - long *hpsize_min = opaque; >> - >> - if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) { >> - mem_path = object_property_get_str(obj, "mem-path", NULL); >> - if (mem_path) { >> - long hpsize = gethugepagesize(mem_path); >> - if (hpsize < *hpsize_min) { >> - *hpsize_min = hpsize; >> - } >> - } else { >> - *hpsize_min = getpagesize(); >> - } >> - } >> - >> - return 0; >> -} >> - >> -static long getrampagesize(void) >> -{ >> - long hpsize = LONG_MAX; >> - long mainrampagesize; >> - Object *memdev_root; >> - >> - if (mem_path) { >> - mainrampagesize = gethugepagesize(mem_path); >> - } else { >> - mainrampagesize = getpagesize(); >> - } >> - >> - /* it's possible we have memory-backend objects with >> - * hugepage-backed RAM. these may get mapped into system >> - * address space via -numa parameters or memory hotplug >> - * hooks. we want to take these into account, but we >> - * also want to make sure these supported hugepage >> - * sizes are applicable across the entire range of memory >> - * we may boot from, so we take the min across all >> - * backends, and assume normal pages in cases where a >> - * backend isn't backed by hugepages. >> - */ >> - memdev_root = object_resolve_path("/objects", NULL); >> - if (memdev_root) { >> - object_child_foreach(memdev_root, find_max_supported_pagesize, >> &hpsize); >> - } >> - if (hpsize == LONG_MAX) { >> - /* No additional memory regions found ==> Report main RAM page size >> */ >> - return mainrampagesize; >> - } >> - >> - /* If NUMA is disabled or the NUMA nodes are not backed with a >> - * memory-backend, then there is at least one node using "normal" RAM, >> - * so if its page size is smaller we have got to report that size >> instead. >> - */ >> - if (hpsize > mainrampagesize && >> - (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) { >> - static bool warned; >> - if (!warned) { >> - error_report("Huge page support disabled (n/a for main >> memory)."); >> - warned = true; >> - } >> - return mainrampagesize; >> - } >> - >> - return hpsize; >> -} >> - >> static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t >> shift) >> { >> if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) { >> @@ -454,7 +355,7 @@ static void kvm_fixup_page_sizes(PowerPCCPU *cpu) >> has_smmu_info = true; >> } >> >> - rampagesize = getrampagesize(); >> + rampagesize = qemu_getrampagesize(); >> >> /* Convert to QEMU form */ >> memset(&env->sps, 0, sizeof(env->sps)); >> @@ -2177,7 +2078,7 @@ uint64_t kvmppc_rma_size(uint64_t current_size, >> unsigned int hash_shift) >> /* Find the largest hardware supported page size that's less than >> * or equal to the (logical) backing page size of guest RAM */ >> kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info); >> - rampagesize = getrampagesize(); >> + rampagesize = qemu_getrampagesize(); >> best_page_shift = 0; >> >> for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) { >> diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c >> index 5a85aa3c89..564c79109c 100644 >> --- a/util/mmap-alloc.c >> +++ b/util/mmap-alloc.c >> @@ -39,6 +39,31 @@ size_t qemu_fd_getpagesize(int fd) >> return getpagesize(); >> } >> >> +size_t qemu_mempath_getpagesize(const char *mem_path) >> +{ >> +#ifdef CONFIG_LINUX >> + struct statfs fs; >> + int ret; >> + >> + do { >> + ret = statfs(mem_path, &fs); >> + } while (ret != 0 && errno == EINTR); >> + >> + if (ret != 0) { >> + fprintf(stderr, "Couldn't statfs() memory path: %s\n", >> + strerror(errno)); >> + exit(1); >> + } >> + >> + if (fs.f_type == HUGETLBFS_MAGIC) { >> + /* It's hugepage, return the huge page size */ >> + return fs.f_bsize; >> + } >> +#endif >> + >> + return getpagesize(); >> +} >> + >> void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) >> { >> /* > -- Alexey
signature.asc
Description: OpenPGP digital signature