On 09/02/2017 06:43, Alexey Kardashevskiy wrote: > On 03/01/17 10:34, David Gibson wrote: >> On Thu, Dec 22, 2016 at 04:22:11PM +1100, Alexey Kardashevskiy wrote: >>> getrampagesize() returns the largest supported page size and mainly >>> used to know if huge pages are enabled. >>> >>> However is implemented in target-ppc/kvm.c and not available >>> in TCG or other architectures. >>> >>> This renames and moves gethugepagesize() to mmap-alloc.c where >>> fd-based analog of it is already implemented. This renames and moves >>> getrampagesize() to exec.c as it seems to be the common place for >>> helpers like this. >>> >>> This first user for it is going to be a spapr-pci-host-bridge which >>> needs to know the largest RAM page size so the guest could try >>> using bigger IOMMU pages to save memory. >>> >>> Signed-off-by: Alexey Kardashevskiy <a...@ozlabs.ru> >> >> Reviewed-by: David Gibson <da...@gibson.dropbear.id.au> >> >> Seems sensible to me, but I'm not comfortable merging this via my tree >> since it touches such core code. Probably should go via Paolo. > > Paolo, ping?
It's just code movement, go ahead. Paolo > > >> >>> --- >>> include/exec/ram_addr.h | 1 + >>> include/qemu/mmap-alloc.h | 2 + >>> exec.c | 82 ++++++++++++++++++++++++++++++++++++ >>> target-ppc/kvm.c | 105 >>> ++-------------------------------------------- >>> util/mmap-alloc.c | 25 +++++++++++ >>> 5 files changed, 113 insertions(+), 102 deletions(-) >>> >>> diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h >>> index 54d7108a9e..3935cbcfcd 100644 >>> --- a/include/exec/ram_addr.h >>> +++ b/include/exec/ram_addr.h >>> @@ -91,6 +91,7 @@ typedef struct RAMList { >>> } RAMList; >>> extern RAMList ram_list; >>> >>> +long qemu_getrampagesize(void); >>> ram_addr_t last_ram_offset(void); >>> void qemu_mutex_lock_ramlist(void); >>> void qemu_mutex_unlock_ramlist(void); >>> diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h >>> index 933c024ac5..50385e3f81 100644 >>> --- a/include/qemu/mmap-alloc.h >>> +++ b/include/qemu/mmap-alloc.h >>> @@ -5,6 +5,8 @@ >>> >>> size_t qemu_fd_getpagesize(int fd); >>> >>> +size_t qemu_mempath_getpagesize(const char *mem_path); >>> + >>> void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared); >>> >>> void qemu_ram_munmap(void *ptr, size_t size); >>> diff --git a/exec.c b/exec.c >>> index 08c558eecf..d73b477a70 100644 >>> --- a/exec.c >>> +++ b/exec.c >>> @@ -32,6 +32,7 @@ >>> #endif >>> #include "sysemu/kvm.h" >>> #include "sysemu/sysemu.h" >>> +#include "sysemu/numa.h" >>> #include "qemu/timer.h" >>> #include "qemu/config-file.h" >>> #include "qemu/error-report.h" >>> @@ -1218,6 +1219,87 @@ void qemu_mutex_unlock_ramlist(void) >>> } >>> >>> #ifdef __linux__ >>> +/* >>> + * FIXME TOCTTOU: this iterates over memory backends' mem-path, which >>> + * may or may not name the same files / on the same filesystem now as >>> + * when we actually open and map them. Iterate over the file >>> + * descriptors instead, and use qemu_fd_getpagesize(). >>> + */ >>> +static int find_max_supported_pagesize(Object *obj, void *opaque) >>> +{ >>> + char *mem_path; >>> + long *hpsize_min = opaque; >>> + >>> + if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) { >>> + mem_path = object_property_get_str(obj, "mem-path", NULL); >>> + if (mem_path) { >>> + long hpsize = qemu_mempath_getpagesize(mem_path); >>> + if (hpsize < *hpsize_min) { >>> + *hpsize_min = hpsize; >>> + } >>> + } else { >>> + *hpsize_min = getpagesize(); >>> + } >>> + } >>> + >>> + return 0; >>> +} >>> + >>> +long qemu_getrampagesize(void) >>> +{ >>> + long hpsize = LONG_MAX; >>> + long mainrampagesize; >>> + Object *memdev_root; >>> + >>> + if (mem_path) { >>> + mainrampagesize = qemu_mempath_getpagesize(mem_path); >>> + } else { >>> + mainrampagesize = getpagesize(); >>> + } >>> + >>> + /* it's possible we have memory-backend objects with >>> + * hugepage-backed RAM. these may get mapped into system >>> + * address space via -numa parameters or memory hotplug >>> + * hooks. we want to take these into account, but we >>> + * also want to make sure these supported hugepage >>> + * sizes are applicable across the entire range of memory >>> + * we may boot from, so we take the min across all >>> + * backends, and assume normal pages in cases where a >>> + * backend isn't backed by hugepages. >>> + */ >>> + memdev_root = object_resolve_path("/objects", NULL); >>> + if (memdev_root) { >>> + object_child_foreach(memdev_root, find_max_supported_pagesize, >>> &hpsize); >>> + } >>> + if (hpsize == LONG_MAX) { >>> + /* No additional memory regions found ==> Report main RAM page >>> size */ >>> + return mainrampagesize; >>> + } >>> + >>> + /* If NUMA is disabled or the NUMA nodes are not backed with a >>> + * memory-backend, then there is at least one node using "normal" RAM, >>> + * so if its page size is smaller we have got to report that size >>> instead. >>> + */ >>> + if (hpsize > mainrampagesize && >>> + (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) { >>> + static bool warned; >>> + if (!warned) { >>> + error_report("Huge page support disabled (n/a for main >>> memory)."); >>> + warned = true; >>> + } >>> + return mainrampagesize; >>> + } >>> + >>> + return hpsize; >>> +} >>> +#else >>> +long qemu_getrampagesize(void) >>> +{ >>> + return getpagesize(); >>> +} >>> +#endif >>> + >>> +#ifdef __linux__ >>> static int64_t get_file_size(int fd) >>> { >>> int64_t size = lseek(fd, 0, SEEK_END); >>> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c >>> index 6e91a4d8bb..e0abffa8ad 100644 >>> --- a/target-ppc/kvm.c >>> +++ b/target-ppc/kvm.c >>> @@ -42,6 +42,7 @@ >>> #include "trace.h" >>> #include "exec/gdbstub.h" >>> #include "exec/memattrs.h" >>> +#include "exec/ram_addr.h" >>> #include "sysemu/hostmem.h" >>> #include "qemu/cutils.h" >>> #if defined(TARGET_PPC64) >>> @@ -325,106 +326,6 @@ static void kvm_get_smmu_info(PowerPCCPU *cpu, struct >>> kvm_ppc_smmu_info *info) >>> kvm_get_fallback_smmu_info(cpu, info); >>> } >>> >>> -static long gethugepagesize(const char *mem_path) >>> -{ >>> - struct statfs fs; >>> - int ret; >>> - >>> - do { >>> - ret = statfs(mem_path, &fs); >>> - } while (ret != 0 && errno == EINTR); >>> - >>> - if (ret != 0) { >>> - fprintf(stderr, "Couldn't statfs() memory path: %s\n", >>> - strerror(errno)); >>> - exit(1); >>> - } >>> - >>> -#define HUGETLBFS_MAGIC 0x958458f6 >>> - >>> - if (fs.f_type != HUGETLBFS_MAGIC) { >>> - /* Explicit mempath, but it's ordinary pages */ >>> - return getpagesize(); >>> - } >>> - >>> - /* It's hugepage, return the huge page size */ >>> - return fs.f_bsize; >>> -} >>> - >>> -/* >>> - * FIXME TOCTTOU: this iterates over memory backends' mem-path, which >>> - * may or may not name the same files / on the same filesystem now as >>> - * when we actually open and map them. Iterate over the file >>> - * descriptors instead, and use qemu_fd_getpagesize(). >>> - */ >>> -static int find_max_supported_pagesize(Object *obj, void *opaque) >>> -{ >>> - char *mem_path; >>> - long *hpsize_min = opaque; >>> - >>> - if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) { >>> - mem_path = object_property_get_str(obj, "mem-path", NULL); >>> - if (mem_path) { >>> - long hpsize = gethugepagesize(mem_path); >>> - if (hpsize < *hpsize_min) { >>> - *hpsize_min = hpsize; >>> - } >>> - } else { >>> - *hpsize_min = getpagesize(); >>> - } >>> - } >>> - >>> - return 0; >>> -} >>> - >>> -static long getrampagesize(void) >>> -{ >>> - long hpsize = LONG_MAX; >>> - long mainrampagesize; >>> - Object *memdev_root; >>> - >>> - if (mem_path) { >>> - mainrampagesize = gethugepagesize(mem_path); >>> - } else { >>> - mainrampagesize = getpagesize(); >>> - } >>> - >>> - /* it's possible we have memory-backend objects with >>> - * hugepage-backed RAM. these may get mapped into system >>> - * address space via -numa parameters or memory hotplug >>> - * hooks. we want to take these into account, but we >>> - * also want to make sure these supported hugepage >>> - * sizes are applicable across the entire range of memory >>> - * we may boot from, so we take the min across all >>> - * backends, and assume normal pages in cases where a >>> - * backend isn't backed by hugepages. >>> - */ >>> - memdev_root = object_resolve_path("/objects", NULL); >>> - if (memdev_root) { >>> - object_child_foreach(memdev_root, find_max_supported_pagesize, >>> &hpsize); >>> - } >>> - if (hpsize == LONG_MAX) { >>> - /* No additional memory regions found ==> Report main RAM page >>> size */ >>> - return mainrampagesize; >>> - } >>> - >>> - /* If NUMA is disabled or the NUMA nodes are not backed with a >>> - * memory-backend, then there is at least one node using "normal" RAM, >>> - * so if its page size is smaller we have got to report that size >>> instead. >>> - */ >>> - if (hpsize > mainrampagesize && >>> - (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) { >>> - static bool warned; >>> - if (!warned) { >>> - error_report("Huge page support disabled (n/a for main >>> memory)."); >>> - warned = true; >>> - } >>> - return mainrampagesize; >>> - } >>> - >>> - return hpsize; >>> -} >>> - >>> static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t >>> shift) >>> { >>> if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) { >>> @@ -454,7 +355,7 @@ static void kvm_fixup_page_sizes(PowerPCCPU *cpu) >>> has_smmu_info = true; >>> } >>> >>> - rampagesize = getrampagesize(); >>> + rampagesize = qemu_getrampagesize(); >>> >>> /* Convert to QEMU form */ >>> memset(&env->sps, 0, sizeof(env->sps)); >>> @@ -2177,7 +2078,7 @@ uint64_t kvmppc_rma_size(uint64_t current_size, >>> unsigned int hash_shift) >>> /* Find the largest hardware supported page size that's less than >>> * or equal to the (logical) backing page size of guest RAM */ >>> kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info); >>> - rampagesize = getrampagesize(); >>> + rampagesize = qemu_getrampagesize(); >>> best_page_shift = 0; >>> >>> for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) { >>> diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c >>> index 5a85aa3c89..564c79109c 100644 >>> --- a/util/mmap-alloc.c >>> +++ b/util/mmap-alloc.c >>> @@ -39,6 +39,31 @@ size_t qemu_fd_getpagesize(int fd) >>> return getpagesize(); >>> } >>> >>> +size_t qemu_mempath_getpagesize(const char *mem_path) >>> +{ >>> +#ifdef CONFIG_LINUX >>> + struct statfs fs; >>> + int ret; >>> + >>> + do { >>> + ret = statfs(mem_path, &fs); >>> + } while (ret != 0 && errno == EINTR); >>> + >>> + if (ret != 0) { >>> + fprintf(stderr, "Couldn't statfs() memory path: %s\n", >>> + strerror(errno)); >>> + exit(1); >>> + } >>> + >>> + if (fs.f_type == HUGETLBFS_MAGIC) { >>> + /* It's hugepage, return the huge page size */ >>> + return fs.f_bsize; >>> + } >>> +#endif >>> + >>> + return getpagesize(); >>> +} >>> + >>> void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) >>> { >>> /* >> > >
signature.asc
Description: OpenPGP digital signature