> >     IMO, not wasting memory should be priority #1 here, before simplicity
> >     or multithread-safety. ('cvs' is not a multithreaded program.)
> 
> Hmm, well, if CVS is the only user and we have a fix that satisfies a 
> CVS maintainer, perhaps we can simply agree to disagree about this 
> particular simplicity-vs-performance tradeoff.

"agree to disagree" is not an option for me, until we have seen the
actual figures.

Therefore I'm committing this change that
  - adds a benchmark for the various implementations of pagealign_alloc,
  - for this purpose, allows to change the implementation without
    recompilation.

You can run the benchmark like this:
1)
  $ ./gnulib-tool --create-testdir --dir=../testdir1 pagealign_alloc
2) In the testdir:
  $ ./configure CPPFLAGS=-Wall
  $ make
  $ gltests/bench-pagealign_alloc b
  $ gltests/bench-pagealign_alloc c
  $ gltests/bench-pagealign_alloc d
  $ gltests/bench-pagealign_alloc e
  $ gltests/bench-pagealign_alloc f

where the implementation choice is:
  b:  malloc
  c:  mmap (if available)
  d:  posix_memalign (if available)
  e:  _aligned_malloc (if available)
  f:  VirtualAlloc (if available)

What I measure is:


                   b         c         d         e         f

glibc            176%      101%      175%       ---       ---
musl libc        198%      101%      197%       ---       ---
macOS            190%      100%      195%       ---       ---
FreeBSD          190%      100%      380%       ---       ---
NetBSD           185%      101%      379%       ---       ---
OpenBSD          209%      134%      105%       ---       ---
AIX              177%      101%      176%       ---       ---
Solaris 11.4     176%      101%      175%       ---       ---
Cygwin           181%      100%      181%       ---       ---
native Windows   184%       ---       ---      180%      crash
Android          182%      101%      181%       ---       ---


This means:
  * On all platforms, malloc() wastes a lot of memory: requesting 1 page
    actually allocates 2 pages.
  * On all platforms except OpenBSD, posix_memalign() wastes a lot of
    memory: requesting 1 page actually allocates 2 pages, on FreeBSD and
    NetBSD even 4 pages (!).
  * On all platforms except OpenBSD and native Windows, the mmap based
    approach is the winner: it hardly wastes any memory.
  * On native Windows, _aligned_malloc works but is essentially as bad
    as malloc.
  * On native Windows, there's a problem with my use of VirtualAlloc.
    If someone has a hint, that would be welcome.

I therefore propose to
  1) change the default back from posix_memalign to mmap,
     except on OpenBSD.
  2) since the "list" is not going away, use a different implementation
     for it: either prev/next fields (doubly-linked list) or a hash-set
     container.

Bruno
>From b4866d2991941cbd16e19f1d751fbb8a8f13647c Mon Sep 17 00:00:00 2001
From: Bruno Haible <br...@clisp.org>
Date: Thu, 11 Sep 2025 16:59:54 +0200
Subject: [PATCH] pagealign_alloc: Add benchmark for comparing different
 implementations.

* lib/pagealign_alloc.h (pagealign_impl_t): New type.
(pagealign_impl): New declaration.
* lib/pagealign_alloc.c: On native Windows, include <malloc.h> and
<windows.h>.
(pagealign_impl): New variable.
(info_t): Change to a union type.
(get_default_impl): New function.
(pagealign_alloc, pagealign_free): Dispatch according to pagealign_impl.
* tests/bench-pagealign_alloc.c: New file.
* modules/pagealign_alloc-tests: New file.
---
 ChangeLog                     |  14 +++
 lib/pagealign_alloc.c         | 217 ++++++++++++++++++++++++++--------
 lib/pagealign_alloc.h         |  15 +++
 modules/pagealign_alloc-tests |  13 ++
 tests/bench-pagealign_alloc.c | 146 +++++++++++++++++++++++
 5 files changed, 357 insertions(+), 48 deletions(-)
 create mode 100644 modules/pagealign_alloc-tests
 create mode 100644 tests/bench-pagealign_alloc.c

diff --git a/ChangeLog b/ChangeLog
index 93901ebace..235fc24012 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,17 @@
+2025-09-11  Bruno Haible  <br...@clisp.org>
+
+	pagealign_alloc: Add benchmark for comparing different implementations.
+	* lib/pagealign_alloc.h (pagealign_impl_t): New type.
+	(pagealign_impl): New declaration.
+	* lib/pagealign_alloc.c: On native Windows, include <malloc.h> and
+	<windows.h>.
+	(pagealign_impl): New variable.
+	(info_t): Change to a union type.
+	(get_default_impl): New function.
+	(pagealign_alloc, pagealign_free): Dispatch according to pagealign_impl.
+	* tests/bench-pagealign_alloc.c: New file.
+	* modules/pagealign_alloc-tests: New file.
+
 2025-09-11  Bruno Haible  <br...@clisp.org>
 
 	pagealign_alloc, vma-prot tests: Fix use of HAVE_MMAP (regr. yesterday).
diff --git a/lib/pagealign_alloc.c b/lib/pagealign_alloc.c
index dc739b9509..28aca9baf5 100644
--- a/lib/pagealign_alloc.c
+++ b/lib/pagealign_alloc.c
@@ -19,6 +19,7 @@
 
 #include <config.h>
 
+/* Specification.  */
 #include "pagealign_alloc.h"
 
 #include <errno.h>
@@ -31,6 +32,11 @@
 #if HAVE_SYS_MMAN_H
 # include <sys/mman.h>
 #endif
+#if defined _WIN32 && !defined __CYGWIN__
+# include <malloc.h>
+# define WIN32_LEAN_AND_MEAN /* avoid including junk */
+# include <windows.h>
+#endif
 
 #include <error.h>
 #include "xalloc.h"
@@ -46,16 +52,22 @@
 #endif
 
 
-#if ! HAVE_POSIX_MEMALIGN
+/* Implementation of pagealign_alloc.  */
+pagealign_impl_t pagealign_impl;
 
-# if HAVE_SYS_MMAN_H
-/* For each memory region, we store its size.  */
-typedef size_t info_t;
-# else
-/* For each memory region, we store the original pointer returned by
-   malloc().  */
-typedef void * info_t;
-# endif
+typedef union
+{
+  /* For PA_IMPL_MALLOC:
+     For each memory region, we store the original pointer returned by
+     malloc().  */
+  void *pointer;
+  /* For PA_IMPL_MMAP, PA_IMPL_VIRTUAL_ALLOC:
+     For each memory region, we store its size.  */
+  size_t size;
+} info_t;
+
+
+/* For PA_IMPL_MALLOC, PA_IMPL_MMAP.  */
 
 /* A simple linked list of allocated memory regions.  It is probably not the
    most efficient way to store these, but anyway...  */
@@ -70,7 +82,6 @@ struct memnode_s
 /* The list of currently allocated memory regions.  */
 static memnode_t *memnode_table = NULL;
 
-
 static void
 new_memnode (void *aligned_ptr, info_t info)
 {
@@ -81,7 +92,6 @@ new_memnode (void *aligned_ptr, info_t info)
   memnode_table = new_node;
 }
 
-
 /* Dispose of the memnode containing a map for the ALIGNED_PTR in question
    and return the content of the node's INFO field.  */
 static info_t
@@ -108,48 +118,118 @@ get_memnode (void *aligned_ptr)
   return ret;
 }
 
-#endif /* !HAVE_POSIX_MEMALIGN */
+
+/* Returns the default implementation.  */
+static pagealign_impl_t
+get_default_impl (void)
+{
+#if HAVE_POSIX_MEMALIGN
+  return PA_IMPL_POSIX_MEMALIGN;
+#elif HAVE_SYS_MMAN_H
+  return PA_IMPL_MMAP;
+#else
+  return PA_IMPL_MALLOC;
+#endif
+}
 
 
 void *
 pagealign_alloc (size_t size)
 {
   void *ret;
-#if HAVE_POSIX_MEMALIGN
-  /* Prefer posix_memalign to malloc and mmap,
-     as it typically scales better when there are many allocations.  */
-  int status = posix_memalign (&ret, getpagesize (), size);
-  if (status)
+  pagealign_impl_t impl;
+
+  impl = pagealign_impl;
+  if (impl == PA_IMPL_DEFAULT)
+    impl = get_default_impl ();
+
+  switch (impl)
     {
-      errno = status;
+    case PA_IMPL_MALLOC:
+      {
+        size_t pagesize = getpagesize ();
+        void *unaligned_ptr = malloc (size + pagesize - 1);
+        if (unaligned_ptr == NULL)
+          {
+            /* Set errno.  We don't know whether malloc already set errno: some
+               implementations of malloc do, some don't.  */
+            errno = ENOMEM;
+            return NULL;
+          }
+        ret = (char *) unaligned_ptr
+              + ((- (uintptr_t) unaligned_ptr) & (pagesize - 1));
+        info_t info;
+        info.pointer = unaligned_ptr;
+        new_memnode (ret, info);
+      }
+      break;
+
+    case PA_IMPL_MMAP:
+      #if HAVE_SYS_MMAN_H
+      ret = mmap (NULL, size, PROT_READ | PROT_WRITE,
+                  MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+      if (ret == MAP_FAILED)
+        return NULL;
+      info_t info;
+      info.size = size;
+      new_memnode (ret, info);
+      break;
+      #else
+      errno = ENOSYS;
       return NULL;
-    }
-#elif HAVE_SYS_MMAN_H
-  /* Prefer mmap to malloc, since the latter often wastes an entire
-     memory page per call.  */
-  ret = mmap (NULL, size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE,
-              -1, 0);
-  if (ret == MAP_FAILED)
-    return NULL;
-  new_memnode (ret, size);
-#else /* !HAVE_SYS_MMAN_H && !HAVE_POSIX_MEMALIGN */
-  size_t pagesize = getpagesize ();
-  void *unaligned_ptr = malloc (size + pagesize - 1);
-  if (unaligned_ptr == NULL)
-    {
-      /* Set errno.  We don't know whether malloc already set errno: some
-         implementations of malloc do, some don't.  */
-      errno = ENOMEM;
+      #endif
+
+    case PA_IMPL_POSIX_MEMALIGN:
+      #if HAVE_POSIX_MEMALIGN
+      {
+        int status = posix_memalign (&ret, getpagesize (), size);
+        if (status)
+          {
+            errno = status;
+            return NULL;
+          }
+      }
+      break;
+      #else
+      errno = ENOSYS;
+      return NULL;
+      #endif
+
+    case PA_IMPL_ALIGNED_MALLOC:
+      #if defined _WIN32 && !defined __CYGWIN__
+      /* Documentation:
+         <https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/aligned-malloc>  */
+      return _aligned_malloc (size, getpagesize ());
+      #else
+      errno = ENOSYS;
       return NULL;
+      #endif
+
+    case PA_IMPL_VIRTUAL_ALLOC:
+      #if defined _WIN32 && !defined __CYGWIN__
+      /* Documentation:
+         <https://learn.microsoft.com/en-us/windows/win32/api/memoryapi/nf-memoryapi-virtualalloc>  */
+      ret = VirtualAlloc (NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
+      if (ret == NULL)
+        {
+          errno = ENOMEM;
+          return NULL;
+        }
+      info_t info;
+      info.size = size;
+      new_memnode (ret, info);
+      #else
+      errno = ENOSYS;
+      return NULL;
+      #endif
+
+    default:
+      abort ();
     }
-  ret = (char *) unaligned_ptr
-        + ((- (uintptr_t) unaligned_ptr) & (pagesize - 1));
-  new_memnode (ret, unaligned_ptr);
-#endif /* HAVE_SYS_MMAN_H && HAVE_POSIX_MEMALIGN */
+
   return ret;
 }
 
-
 void *
 pagealign_xalloc (size_t size)
 {
@@ -165,12 +245,53 @@ pagealign_xalloc (size_t size)
 void
 pagealign_free (void *aligned_ptr)
 {
-#if HAVE_POSIX_MEMALIGN
-  free (aligned_ptr);
-#elif HAVE_SYS_MMAN_H
-  if (munmap (aligned_ptr, get_memnode (aligned_ptr)) < 0)
-    error (EXIT_FAILURE, errno, "Failed to unmap memory");
-#else
-  free (get_memnode (aligned_ptr));
-#endif
+  pagealign_impl_t impl;
+
+  impl = pagealign_impl;
+  if (impl == PA_IMPL_DEFAULT)
+    impl = get_default_impl ();
+
+  switch (impl)
+    {
+    case PA_IMPL_MALLOC:
+      free (get_memnode (aligned_ptr).pointer);
+      break;
+
+    case PA_IMPL_MMAP:
+      #if HAVE_SYS_MMAN_H
+      if (munmap (aligned_ptr, get_memnode (aligned_ptr).size) < 0)
+        error (EXIT_FAILURE, errno, "Failed to unmap memory");
+      #else
+      abort ();
+      #endif
+
+    case PA_IMPL_POSIX_MEMALIGN:
+      #if HAVE_POSIX_MEMALIGN
+      free (aligned_ptr);
+      #else
+      abort ();
+      #endif
+
+    case PA_IMPL_ALIGNED_MALLOC:
+      #if defined _WIN32 && !defined __CYGWIN__
+      /* Documentation:
+         <https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/aligned-free>  */
+      _aligned_free (aligned_ptr);
+      #else
+      abort ();
+      #endif
+
+    case PA_IMPL_VIRTUAL_ALLOC:
+      #if defined _WIN32 && !defined __CYGWIN__
+      /* Documentation:
+         <https://learn.microsoft.com/en-us/windows/win32/api/memoryapi/nf-memoryapi-virtualfree>  */
+      if (!VirtualFree (aligned_ptr, get_memnode (aligned_ptr).size, MEM_RELEASE))
+        error (EXIT_FAILURE, 0, "Failed to free memory");
+      #else
+      abort ();
+      #endif
+
+    default:
+      abort ();
+    }
 }
diff --git a/lib/pagealign_alloc.h b/lib/pagealign_alloc.h
index 3134d4e460..e04d895b48 100644
--- a/lib/pagealign_alloc.h
+++ b/lib/pagealign_alloc.h
@@ -55,6 +55,21 @@ extern void *pagealign_xalloc (size_t size)
   _GL_ATTRIBUTE_ALLOC_SIZE ((1)) _GL_ATTRIBUTE_RETURNS_NONNULL;
 
 
+/* Selects the implementation of pagealign_alloc.
+   If you set pagealign_impl, it must be before the first invocation of
+   pagealign_alloc or pagealign_xalloc.  */
+typedef enum
+{
+  PA_IMPL_DEFAULT        = 0, /* platform-dependent default */
+  PA_IMPL_MALLOC         = 1, /* malloc */
+  PA_IMPL_MMAP           = 2, /* mmap (if available) */
+  PA_IMPL_POSIX_MEMALIGN = 3, /* posix_memalign (if available) */
+  PA_IMPL_ALIGNED_MALLOC = 4, /* _aligned_malloc (if available) */
+  PA_IMPL_VIRTUAL_ALLOC  = 5  /* VirtualAlloc (if available) */
+} pagealign_impl_t;
+extern pagealign_impl_t pagealign_impl;
+
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/modules/pagealign_alloc-tests b/modules/pagealign_alloc-tests
new file mode 100644
index 0000000000..bc9f16fc16
--- /dev/null
+++ b/modules/pagealign_alloc-tests
@@ -0,0 +1,13 @@
+Files:
+tests/bench-pagealign_alloc.c
+
+Depends-on:
+getpagesize
+get-rusage-as
+get-rusage-data
+
+configure.ac:
+
+Makefile.am:
+noinst_PROGRAMS += bench-pagealign_alloc
+bench_pagealign_alloc_CPPFLAGS = $(AM_CPPFLAGS) -DNDEBUG
diff --git a/tests/bench-pagealign_alloc.c b/tests/bench-pagealign_alloc.c
new file mode 100644
index 0000000000..bdba5bfecf
--- /dev/null
+++ b/tests/bench-pagealign_alloc.c
@@ -0,0 +1,146 @@
+/* Benchmarks for the pagealign_alloc module.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+/* This benchmark focuses on the amount of allocated memory, in order
+   to minimize wasted memory.  */
+
+#include <config.h>
+
+/* Specification.  */
+#include "pagealign_alloc.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "resource-ext.h"
+
+static size_t pagesize;
+
+/* Does a few pagealign_alloc invocations.  */
+static int
+allocate_some_pages (void)
+{
+  int i;
+
+  for (i = 1; i <= 9; i++)
+    if (pagealign_alloc (pagesize) == NULL)
+      return -1;
+  for (i = 1; i <= 2; i++)
+    if (pagealign_alloc (2 * pagesize) == NULL)
+      return -1;
+  for (i = 1; i <= 1; i++)
+    if (pagealign_alloc (3 * pagesize) == NULL)
+      return -1;
+
+  return 0;
+}
+
+/* Returns the total size requested by allocate_some_pages ().  */
+static size_t
+size_of_some_pages (void)
+{
+  return 16 * pagesize;
+}
+
+static void
+show_stats (intptr_t initial_as, intptr_t initial_data, int count)
+{
+  if (count == 0)
+    printf ("Allocation failed.\n");
+  else
+    {
+      intptr_t requested = count * size_of_some_pages ();
+      intptr_t as_increase = get_rusage_as () - initial_as;
+      intptr_t data_increase = get_rusage_data () - initial_data;
+      printf ("count = %5d: requested: %6ld pages, allocated AS: %6ld pages (%.2f%%), allocated DATA: %6ld pages (%.2f%%)\n",
+              count,
+              (long int) (requested / pagesize),
+              (long int) (as_increase / pagesize), 100 * (double) as_increase / (double) requested,
+              (long int) (data_increase / pagesize), 100 * (double) data_increase / (double) requested);
+    }
+}
+
+static void
+do_test (void)
+{
+  intptr_t initial_as = get_rusage_as ();
+  intptr_t initial_data = get_rusage_data ();
+  int count;
+  for (count = 0; count <= 10000; count++)
+    {
+      if (count == 10 || count == 30 || count == 90 || count == 270
+          || count == 810 || count == 2430 || count == 7290)
+        show_stats (initial_as, initial_data, count);
+
+      if (allocate_some_pages () < 0)
+        break;
+    }
+  show_stats (initial_as, initial_data, count);
+}
+
+int
+main (int argc, char *argv[])
+{
+  if (argc != 2)
+    {
+      fprintf (stderr, "Usage: %s TESTS\n", argv[0]);
+      fprintf (stderr, "Example: %s abcdef\n", argv[0]);
+      exit (1);
+    }
+
+  const char *tests = argv[1];
+
+  pagesize = getpagesize ();
+
+  /* Execute each test.  */
+  size_t i;
+  for (i = 0; i < strlen (tests); i++)
+    {
+      char test = tests[i];
+
+      switch (test)
+        {
+        case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+          pagealign_impl = test - 'a';
+          do_test ();
+          break;
+        default:
+          /* Ignore.  */
+          ;
+        }
+    }
+
+  return 0;
+}
+
+/* Results:
+
+                   b         c         d         e         f
+
+glibc            176%      101%      175%       ---       ---
+musl libc        198%      101%      197%       ---       ---
+macOS            190%      100%      195%       ---       ---
+FreeBSD          190%      100%      380%       ---       ---
+NetBSD           185%      101%      379%       ---       ---
+OpenBSD          209%      134%      105%       ---       ---
+AIX              177%      101%      176%       ---       ---
+Solaris 11.4     176%      101%      175%       ---       ---
+Cygwin           181%      100%      181%       ---       ---
+native Windows   184%       ---       ---      180%      crash
+Android          182%      101%      181%       ---       ---
+*/
-- 
2.50.1

Reply via email to