> > IMO, not wasting memory should be priority #1 here, before simplicity > > or multithread-safety. ('cvs' is not a multithreaded program.) > > Hmm, well, if CVS is the only user and we have a fix that satisfies a > CVS maintainer, perhaps we can simply agree to disagree about this > particular simplicity-vs-performance tradeoff.
"agree to disagree" is not an option for me, until we have seen the actual figures. Therefore I'm committing this change that - adds a benchmark for the various implementations of pagealign_alloc, - for this purpose, allows to change the implementation without recompilation. You can run the benchmark like this: 1) $ ./gnulib-tool --create-testdir --dir=../testdir1 pagealign_alloc 2) In the testdir: $ ./configure CPPFLAGS=-Wall $ make $ gltests/bench-pagealign_alloc b $ gltests/bench-pagealign_alloc c $ gltests/bench-pagealign_alloc d $ gltests/bench-pagealign_alloc e $ gltests/bench-pagealign_alloc f where the implementation choice is: b: malloc c: mmap (if available) d: posix_memalign (if available) e: _aligned_malloc (if available) f: VirtualAlloc (if available) What I measure is: b c d e f glibc 176% 101% 175% --- --- musl libc 198% 101% 197% --- --- macOS 190% 100% 195% --- --- FreeBSD 190% 100% 380% --- --- NetBSD 185% 101% 379% --- --- OpenBSD 209% 134% 105% --- --- AIX 177% 101% 176% --- --- Solaris 11.4 176% 101% 175% --- --- Cygwin 181% 100% 181% --- --- native Windows 184% --- --- 180% crash Android 182% 101% 181% --- --- This means: * On all platforms, malloc() wastes a lot of memory: requesting 1 page actually allocates 2 pages. * On all platforms except OpenBSD, posix_memalign() wastes a lot of memory: requesting 1 page actually allocates 2 pages, on FreeBSD and NetBSD even 4 pages (!). * On all platforms except OpenBSD and native Windows, the mmap based approach is the winner: it hardly wastes any memory. * On native Windows, _aligned_malloc works but is essentially as bad as malloc. * On native Windows, there's a problem with my use of VirtualAlloc. If someone has a hint, that would be welcome. I therefore propose to 1) change the default back from posix_memalign to mmap, except on OpenBSD. 2) since the "list" is not going away, use a different implementation for it: either prev/next fields (doubly-linked list) or a hash-set container. Bruno
>From b4866d2991941cbd16e19f1d751fbb8a8f13647c Mon Sep 17 00:00:00 2001 From: Bruno Haible <br...@clisp.org> Date: Thu, 11 Sep 2025 16:59:54 +0200 Subject: [PATCH] pagealign_alloc: Add benchmark for comparing different implementations. * lib/pagealign_alloc.h (pagealign_impl_t): New type. (pagealign_impl): New declaration. * lib/pagealign_alloc.c: On native Windows, include <malloc.h> and <windows.h>. (pagealign_impl): New variable. (info_t): Change to a union type. (get_default_impl): New function. (pagealign_alloc, pagealign_free): Dispatch according to pagealign_impl. * tests/bench-pagealign_alloc.c: New file. * modules/pagealign_alloc-tests: New file. --- ChangeLog | 14 +++ lib/pagealign_alloc.c | 217 ++++++++++++++++++++++++++-------- lib/pagealign_alloc.h | 15 +++ modules/pagealign_alloc-tests | 13 ++ tests/bench-pagealign_alloc.c | 146 +++++++++++++++++++++++ 5 files changed, 357 insertions(+), 48 deletions(-) create mode 100644 modules/pagealign_alloc-tests create mode 100644 tests/bench-pagealign_alloc.c diff --git a/ChangeLog b/ChangeLog index 93901ebace..235fc24012 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,17 @@ +2025-09-11 Bruno Haible <br...@clisp.org> + + pagealign_alloc: Add benchmark for comparing different implementations. + * lib/pagealign_alloc.h (pagealign_impl_t): New type. + (pagealign_impl): New declaration. + * lib/pagealign_alloc.c: On native Windows, include <malloc.h> and + <windows.h>. + (pagealign_impl): New variable. + (info_t): Change to a union type. + (get_default_impl): New function. + (pagealign_alloc, pagealign_free): Dispatch according to pagealign_impl. + * tests/bench-pagealign_alloc.c: New file. + * modules/pagealign_alloc-tests: New file. + 2025-09-11 Bruno Haible <br...@clisp.org> pagealign_alloc, vma-prot tests: Fix use of HAVE_MMAP (regr. yesterday). diff --git a/lib/pagealign_alloc.c b/lib/pagealign_alloc.c index dc739b9509..28aca9baf5 100644 --- a/lib/pagealign_alloc.c +++ b/lib/pagealign_alloc.c @@ -19,6 +19,7 @@ #include <config.h> +/* Specification. */ #include "pagealign_alloc.h" #include <errno.h> @@ -31,6 +32,11 @@ #if HAVE_SYS_MMAN_H # include <sys/mman.h> #endif +#if defined _WIN32 && !defined __CYGWIN__ +# include <malloc.h> +# define WIN32_LEAN_AND_MEAN /* avoid including junk */ +# include <windows.h> +#endif #include <error.h> #include "xalloc.h" @@ -46,16 +52,22 @@ #endif -#if ! HAVE_POSIX_MEMALIGN +/* Implementation of pagealign_alloc. */ +pagealign_impl_t pagealign_impl; -# if HAVE_SYS_MMAN_H -/* For each memory region, we store its size. */ -typedef size_t info_t; -# else -/* For each memory region, we store the original pointer returned by - malloc(). */ -typedef void * info_t; -# endif +typedef union +{ + /* For PA_IMPL_MALLOC: + For each memory region, we store the original pointer returned by + malloc(). */ + void *pointer; + /* For PA_IMPL_MMAP, PA_IMPL_VIRTUAL_ALLOC: + For each memory region, we store its size. */ + size_t size; +} info_t; + + +/* For PA_IMPL_MALLOC, PA_IMPL_MMAP. */ /* A simple linked list of allocated memory regions. It is probably not the most efficient way to store these, but anyway... */ @@ -70,7 +82,6 @@ struct memnode_s /* The list of currently allocated memory regions. */ static memnode_t *memnode_table = NULL; - static void new_memnode (void *aligned_ptr, info_t info) { @@ -81,7 +92,6 @@ new_memnode (void *aligned_ptr, info_t info) memnode_table = new_node; } - /* Dispose of the memnode containing a map for the ALIGNED_PTR in question and return the content of the node's INFO field. */ static info_t @@ -108,48 +118,118 @@ get_memnode (void *aligned_ptr) return ret; } -#endif /* !HAVE_POSIX_MEMALIGN */ + +/* Returns the default implementation. */ +static pagealign_impl_t +get_default_impl (void) +{ +#if HAVE_POSIX_MEMALIGN + return PA_IMPL_POSIX_MEMALIGN; +#elif HAVE_SYS_MMAN_H + return PA_IMPL_MMAP; +#else + return PA_IMPL_MALLOC; +#endif +} void * pagealign_alloc (size_t size) { void *ret; -#if HAVE_POSIX_MEMALIGN - /* Prefer posix_memalign to malloc and mmap, - as it typically scales better when there are many allocations. */ - int status = posix_memalign (&ret, getpagesize (), size); - if (status) + pagealign_impl_t impl; + + impl = pagealign_impl; + if (impl == PA_IMPL_DEFAULT) + impl = get_default_impl (); + + switch (impl) { - errno = status; + case PA_IMPL_MALLOC: + { + size_t pagesize = getpagesize (); + void *unaligned_ptr = malloc (size + pagesize - 1); + if (unaligned_ptr == NULL) + { + /* Set errno. We don't know whether malloc already set errno: some + implementations of malloc do, some don't. */ + errno = ENOMEM; + return NULL; + } + ret = (char *) unaligned_ptr + + ((- (uintptr_t) unaligned_ptr) & (pagesize - 1)); + info_t info; + info.pointer = unaligned_ptr; + new_memnode (ret, info); + } + break; + + case PA_IMPL_MMAP: + #if HAVE_SYS_MMAN_H + ret = mmap (NULL, size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (ret == MAP_FAILED) + return NULL; + info_t info; + info.size = size; + new_memnode (ret, info); + break; + #else + errno = ENOSYS; return NULL; - } -#elif HAVE_SYS_MMAN_H - /* Prefer mmap to malloc, since the latter often wastes an entire - memory page per call. */ - ret = mmap (NULL, size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, - -1, 0); - if (ret == MAP_FAILED) - return NULL; - new_memnode (ret, size); -#else /* !HAVE_SYS_MMAN_H && !HAVE_POSIX_MEMALIGN */ - size_t pagesize = getpagesize (); - void *unaligned_ptr = malloc (size + pagesize - 1); - if (unaligned_ptr == NULL) - { - /* Set errno. We don't know whether malloc already set errno: some - implementations of malloc do, some don't. */ - errno = ENOMEM; + #endif + + case PA_IMPL_POSIX_MEMALIGN: + #if HAVE_POSIX_MEMALIGN + { + int status = posix_memalign (&ret, getpagesize (), size); + if (status) + { + errno = status; + return NULL; + } + } + break; + #else + errno = ENOSYS; + return NULL; + #endif + + case PA_IMPL_ALIGNED_MALLOC: + #if defined _WIN32 && !defined __CYGWIN__ + /* Documentation: + <https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/aligned-malloc> */ + return _aligned_malloc (size, getpagesize ()); + #else + errno = ENOSYS; return NULL; + #endif + + case PA_IMPL_VIRTUAL_ALLOC: + #if defined _WIN32 && !defined __CYGWIN__ + /* Documentation: + <https://learn.microsoft.com/en-us/windows/win32/api/memoryapi/nf-memoryapi-virtualalloc> */ + ret = VirtualAlloc (NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); + if (ret == NULL) + { + errno = ENOMEM; + return NULL; + } + info_t info; + info.size = size; + new_memnode (ret, info); + #else + errno = ENOSYS; + return NULL; + #endif + + default: + abort (); } - ret = (char *) unaligned_ptr - + ((- (uintptr_t) unaligned_ptr) & (pagesize - 1)); - new_memnode (ret, unaligned_ptr); -#endif /* HAVE_SYS_MMAN_H && HAVE_POSIX_MEMALIGN */ + return ret; } - void * pagealign_xalloc (size_t size) { @@ -165,12 +245,53 @@ pagealign_xalloc (size_t size) void pagealign_free (void *aligned_ptr) { -#if HAVE_POSIX_MEMALIGN - free (aligned_ptr); -#elif HAVE_SYS_MMAN_H - if (munmap (aligned_ptr, get_memnode (aligned_ptr)) < 0) - error (EXIT_FAILURE, errno, "Failed to unmap memory"); -#else - free (get_memnode (aligned_ptr)); -#endif + pagealign_impl_t impl; + + impl = pagealign_impl; + if (impl == PA_IMPL_DEFAULT) + impl = get_default_impl (); + + switch (impl) + { + case PA_IMPL_MALLOC: + free (get_memnode (aligned_ptr).pointer); + break; + + case PA_IMPL_MMAP: + #if HAVE_SYS_MMAN_H + if (munmap (aligned_ptr, get_memnode (aligned_ptr).size) < 0) + error (EXIT_FAILURE, errno, "Failed to unmap memory"); + #else + abort (); + #endif + + case PA_IMPL_POSIX_MEMALIGN: + #if HAVE_POSIX_MEMALIGN + free (aligned_ptr); + #else + abort (); + #endif + + case PA_IMPL_ALIGNED_MALLOC: + #if defined _WIN32 && !defined __CYGWIN__ + /* Documentation: + <https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/aligned-free> */ + _aligned_free (aligned_ptr); + #else + abort (); + #endif + + case PA_IMPL_VIRTUAL_ALLOC: + #if defined _WIN32 && !defined __CYGWIN__ + /* Documentation: + <https://learn.microsoft.com/en-us/windows/win32/api/memoryapi/nf-memoryapi-virtualfree> */ + if (!VirtualFree (aligned_ptr, get_memnode (aligned_ptr).size, MEM_RELEASE)) + error (EXIT_FAILURE, 0, "Failed to free memory"); + #else + abort (); + #endif + + default: + abort (); + } } diff --git a/lib/pagealign_alloc.h b/lib/pagealign_alloc.h index 3134d4e460..e04d895b48 100644 --- a/lib/pagealign_alloc.h +++ b/lib/pagealign_alloc.h @@ -55,6 +55,21 @@ extern void *pagealign_xalloc (size_t size) _GL_ATTRIBUTE_ALLOC_SIZE ((1)) _GL_ATTRIBUTE_RETURNS_NONNULL; +/* Selects the implementation of pagealign_alloc. + If you set pagealign_impl, it must be before the first invocation of + pagealign_alloc or pagealign_xalloc. */ +typedef enum +{ + PA_IMPL_DEFAULT = 0, /* platform-dependent default */ + PA_IMPL_MALLOC = 1, /* malloc */ + PA_IMPL_MMAP = 2, /* mmap (if available) */ + PA_IMPL_POSIX_MEMALIGN = 3, /* posix_memalign (if available) */ + PA_IMPL_ALIGNED_MALLOC = 4, /* _aligned_malloc (if available) */ + PA_IMPL_VIRTUAL_ALLOC = 5 /* VirtualAlloc (if available) */ +} pagealign_impl_t; +extern pagealign_impl_t pagealign_impl; + + #ifdef __cplusplus } #endif diff --git a/modules/pagealign_alloc-tests b/modules/pagealign_alloc-tests new file mode 100644 index 0000000000..bc9f16fc16 --- /dev/null +++ b/modules/pagealign_alloc-tests @@ -0,0 +1,13 @@ +Files: +tests/bench-pagealign_alloc.c + +Depends-on: +getpagesize +get-rusage-as +get-rusage-data + +configure.ac: + +Makefile.am: +noinst_PROGRAMS += bench-pagealign_alloc +bench_pagealign_alloc_CPPFLAGS = $(AM_CPPFLAGS) -DNDEBUG diff --git a/tests/bench-pagealign_alloc.c b/tests/bench-pagealign_alloc.c new file mode 100644 index 0000000000..bdba5bfecf --- /dev/null +++ b/tests/bench-pagealign_alloc.c @@ -0,0 +1,146 @@ +/* Benchmarks for the pagealign_alloc module. + Copyright (C) 2025 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +/* This benchmark focuses on the amount of allocated memory, in order + to minimize wasted memory. */ + +#include <config.h> + +/* Specification. */ +#include "pagealign_alloc.h" + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "resource-ext.h" + +static size_t pagesize; + +/* Does a few pagealign_alloc invocations. */ +static int +allocate_some_pages (void) +{ + int i; + + for (i = 1; i <= 9; i++) + if (pagealign_alloc (pagesize) == NULL) + return -1; + for (i = 1; i <= 2; i++) + if (pagealign_alloc (2 * pagesize) == NULL) + return -1; + for (i = 1; i <= 1; i++) + if (pagealign_alloc (3 * pagesize) == NULL) + return -1; + + return 0; +} + +/* Returns the total size requested by allocate_some_pages (). */ +static size_t +size_of_some_pages (void) +{ + return 16 * pagesize; +} + +static void +show_stats (intptr_t initial_as, intptr_t initial_data, int count) +{ + if (count == 0) + printf ("Allocation failed.\n"); + else + { + intptr_t requested = count * size_of_some_pages (); + intptr_t as_increase = get_rusage_as () - initial_as; + intptr_t data_increase = get_rusage_data () - initial_data; + printf ("count = %5d: requested: %6ld pages, allocated AS: %6ld pages (%.2f%%), allocated DATA: %6ld pages (%.2f%%)\n", + count, + (long int) (requested / pagesize), + (long int) (as_increase / pagesize), 100 * (double) as_increase / (double) requested, + (long int) (data_increase / pagesize), 100 * (double) data_increase / (double) requested); + } +} + +static void +do_test (void) +{ + intptr_t initial_as = get_rusage_as (); + intptr_t initial_data = get_rusage_data (); + int count; + for (count = 0; count <= 10000; count++) + { + if (count == 10 || count == 30 || count == 90 || count == 270 + || count == 810 || count == 2430 || count == 7290) + show_stats (initial_as, initial_data, count); + + if (allocate_some_pages () < 0) + break; + } + show_stats (initial_as, initial_data, count); +} + +int +main (int argc, char *argv[]) +{ + if (argc != 2) + { + fprintf (stderr, "Usage: %s TESTS\n", argv[0]); + fprintf (stderr, "Example: %s abcdef\n", argv[0]); + exit (1); + } + + const char *tests = argv[1]; + + pagesize = getpagesize (); + + /* Execute each test. */ + size_t i; + for (i = 0; i < strlen (tests); i++) + { + char test = tests[i]; + + switch (test) + { + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + pagealign_impl = test - 'a'; + do_test (); + break; + default: + /* Ignore. */ + ; + } + } + + return 0; +} + +/* Results: + + b c d e f + +glibc 176% 101% 175% --- --- +musl libc 198% 101% 197% --- --- +macOS 190% 100% 195% --- --- +FreeBSD 190% 100% 380% --- --- +NetBSD 185% 101% 379% --- --- +OpenBSD 209% 134% 105% --- --- +AIX 177% 101% 176% --- --- +Solaris 11.4 176% 101% 175% --- --- +Cygwin 181% 100% 181% --- --- +native Windows 184% --- --- 180% crash +Android 182% 101% 181% --- --- +*/ -- 2.50.1