Paul Eggert wrote:
> lib/stdbit.c was present only to define private helper functions.
> Move them into lib/stdc_leading_zeros.c and
> lib/stdc_trailing_zeros.c, depending on what they help.
Good point.
Now, it's probably better to declare these private helper functions
with the code that uses it, rather than upfront:
- to fulfil the principle "keep things together that are related",
- to avoid defining these helper functions if the modules
stdc_leading_zeros, stdc_trailing_zeros are not in use,
- to make it obvious which _GL_*_INLINE macro needs to be used,
- to make it obvious where the variable _gl_stdbit_popcount_support
needs to be defined.
Done through this patch.
2026-03-16 Bruno Haible <[email protected]>
stdbit-h: Reorder definitions.
* lib/stdbit.in.h: Move private helper macros and functions to the
section conditionalized by @GNULIB_STDC_LEADING_ZEROS@,
@GNULIB_STDC_TRAILING_ZEROS@, @GNULIB_STDC_COUNT_ONES@, respectively.
From 697211b4c5b004d7c0516debfafa73687b1b90c0 Mon Sep 17 00:00:00 2001
From: Bruno Haible <[email protected]>
Date: Mon, 16 Mar 2026 10:12:46 +0100
Subject: [PATCH] stdbit-h: Reorder definitions.
* lib/stdbit.in.h: Move private helper macros and functions to the
section conditionalized by @GNULIB_STDC_LEADING_ZEROS@,
@GNULIB_STDC_TRAILING_ZEROS@, @GNULIB_STDC_COUNT_ONES@, respectively.
---
ChangeLog | 7 +
lib/stdbit.in.h | 455 ++++++++++++++++++++++++------------------------
2 files changed, 237 insertions(+), 225 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index d6e3a6fecd..d804e6a13a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2026-03-16 Bruno Haible <[email protected]>
+
+ stdbit-h: Reorder definitions.
+ * lib/stdbit.in.h: Move private helper macros and functions to the
+ section conditionalized by @GNULIB_STDC_LEADING_ZEROS@,
+ @GNULIB_STDC_TRAILING_ZEROS@, @GNULIB_STDC_COUNT_ONES@, respectively.
+
2026-03-16 Bruno Haible <[email protected]>
stdbit-h: Fix syntax error in C++ mode (regression 2026-03-12).
diff --git a/lib/stdbit.in.h b/lib/stdbit.in.h
index 30e3b0b175..8f70855615 100644
--- a/lib/stdbit.in.h
+++ b/lib/stdbit.in.h
@@ -212,30 +212,22 @@ extern "C" {
#endif
-#if 3 < __GNUC__ + (4 <= __GNUC_MINOR__) || 4 <= __clang_major__
-# define _GL_STDBIT_HAS_BUILTIN_CLZ true
-# define _GL_STDBIT_HAS_BUILTIN_CTZ true
-# define _GL_STDBIT_HAS_BUILTIN_POPCOUNT true
-#elif defined __has_builtin
-# if (__has_builtin (__builtin_clz) \
- && __has_builtin (__builtin_clzl) \
- && __has_builtin (__builtin_clzll))
+/* ISO C 23 ?? 7.18.3 Count Leading Zeros */
+
+#if @GNULIB_STDC_LEADING_ZEROS@
+
+# if 3 < __GNUC__ + (4 <= __GNUC_MINOR__) || 4 <= __clang_major__
# define _GL_STDBIT_HAS_BUILTIN_CLZ true
+# elif defined __has_builtin
+# if (__has_builtin (__builtin_clz) \
+ && __has_builtin (__builtin_clzl) \
+ && __has_builtin (__builtin_clzll))
+# define _GL_STDBIT_HAS_BUILTIN_CLZ true
+# endif
# endif
-# if (__has_builtin (__builtin_ctz) \
- && __has_builtin (__builtin_ctzl) \
- && __has_builtin (__builtin_ctzll))
-# define _GL_STDBIT_HAS_BUILTIN_CTZ true
-# endif
-# if (__has_builtin (__builtin_popcount) \
- && __has_builtin (__builtin_popcountl) \
- && __has_builtin (__builtin_popcountll))
-# define _GL_STDBIT_HAS_BUILTIN_POPCOUNT true
-# endif
-#endif
/* Count leading 0 bits of N, even if N is 0. */
-#ifdef _GL_STDBIT_HAS_BUILTIN_CLZ
+# ifdef _GL_STDBIT_HAS_BUILTIN_CLZ
_GL_STDC_LEADING_ZEROS_INLINE int
_gl_stdbit_clz (unsigned int n)
{
@@ -251,16 +243,16 @@ _gl_stdbit_clzll (unsigned long long int n)
{
return n ? __builtin_clzll (n) : 8 * sizeof n;
}
-#elif defined _MSC_VER
+# elif defined _MSC_VER
/* Declare the few MSVC intrinsics that we need. We prefer not to include
<intrin.h> because it would pollute the namespace. */
extern unsigned char _BitScanReverse (unsigned long *, unsigned long);
-# pragma intrinsic (_BitScanReverse)
-# ifdef _M_X64
+# pragma intrinsic (_BitScanReverse)
+# ifdef _M_X64
extern unsigned char _BitScanReverse64 (unsigned long *, unsigned long long);
-# pragma intrinsic (_BitScanReverse64)
-# endif
+# pragma intrinsic (_BitScanReverse64)
+# endif
_GL_STDC_LEADING_ZEROS_INLINE int
_gl_stdbit_clzl (unsigned long int n)
@@ -276,16 +268,16 @@ _gl_stdbit_clz (unsigned int n)
_GL_STDC_LEADING_ZEROS_INLINE int
_gl_stdbit_clzll (unsigned long long int n)
{
-# ifdef _M_X64
+# ifdef _M_X64
unsigned long int r;
return 8 * sizeof n - (_BitScanReverse64 (&r, n) ? r + 1 : 0);
-# else
+# else
unsigned long int hi = n >> 32;
return _gl_stdbit_clzl (hi ? hi : n) + (hi ? 0 : 32);
-# endif
+# endif
}
-#else /* !_MSC_VER */
+# else /* !_MSC_VER */
_GL_STDC_LEADING_ZEROS_INLINE int
_gl_stdbit_clzll (unsigned long long int n)
@@ -311,203 +303,7 @@ _gl_stdbit_clzl (unsigned long int n)
{
return _gl_stdbit_clzll (n) - 8 * (sizeof 0ull - sizeof 0ul);
}
-#endif
-
-/* Count trailing 0 bits of N, even if N is 0. */
-#ifdef _GL_STDBIT_HAS_BUILTIN_CTZ
-_GL_STDC_TRAILING_ZEROS_INLINE int
-_gl_stdbit_ctz (unsigned int n)
-{
- return n ? __builtin_ctz (n) : 8 * sizeof n;
-}
-_GL_STDC_TRAILING_ZEROS_INLINE int
-_gl_stdbit_ctzl (unsigned long int n)
-{
- return n ? __builtin_ctzl (n) : 8 * sizeof n;
-}
-_GL_STDC_TRAILING_ZEROS_INLINE int
-_gl_stdbit_ctzll (unsigned long long int n)
-{
- return n ? __builtin_ctzll (n) : 8 * sizeof n;
-}
-#elif defined _MSC_VER
-
-/* Declare the few MSVC intrinsics that we need. We prefer not to include
- <intrin.h> because it would pollute the namespace. */
-extern unsigned char _BitScanForward (unsigned long *, unsigned long);
-# pragma intrinsic (_BitScanForward)
-# ifdef _M_X64
-extern unsigned char _BitScanForward64 (unsigned long *, unsigned long long);
-# pragma intrinsic (_BitScanForward64)
-# endif
-
-_GL_STDC_TRAILING_ZEROS_INLINE int
-_gl_stdbit_ctzl (unsigned long int n)
-{
- unsigned long int r;
- return _BitScanForward (&r, n) ? r : 8 * sizeof n;
-}
-_GL_STDC_TRAILING_ZEROS_INLINE int
-_gl_stdbit_ctz (unsigned int n)
-{
- return _gl_stdbit_ctzl (n | (1ul << (8 * sizeof n - 1) << 1));
-}
-_GL_STDC_TRAILING_ZEROS_INLINE int
-_gl_stdbit_ctzll (unsigned long long int n)
-{
-# ifdef _M_X64
- unsigned long int r;
- return _BitScanForward64 (&r, n) ? r : 8 * sizeof n;
-# else
- unsigned int lo = n;
- return _gl_stdbit_ctzl (lo ? lo : n >> 32) + (lo ? 0 : 32);
# endif
-}
-
-#else /* !_MSC_VER */
-
-_GL_STDC_TRAILING_ZEROS_INLINE int
-_gl_stdbit_ctz (unsigned int n)
-{
- return 8 * sizeof n - (n ? _gl_stdbit_clz (n & -n) + 1 : 0);
-}
-_GL_STDC_TRAILING_ZEROS_INLINE int
-_gl_stdbit_ctzl (unsigned long int n)
-{
- return 8 * sizeof n - (n ? _gl_stdbit_clzl (n & -n) + 1 : 0);
-}
-_GL_STDC_TRAILING_ZEROS_INLINE int
-_gl_stdbit_ctzll (unsigned long long int n)
-{
- return 8 * sizeof n - (n ? _gl_stdbit_clzll (n & -n) + 1 : 0);
-}
-#endif
-
-#if @GNULIB_STDC_COUNT_ONES@
-/* Count 1 bits in N. */
-# ifdef _GL_STDBIT_HAS_BUILTIN_POPCOUNT
-# define _gl_stdbit_popcount __builtin_popcount
-# define _gl_stdbit_popcountl __builtin_popcountl
-# define _gl_stdbit_popcountll __builtin_popcountll
-# else
-_GL_STDC_COUNT_ONES_INLINE int
-_gl_stdbit_popcount_wide (unsigned long long int n)
-{
- if (sizeof n & (sizeof n - 1))
- {
- /* Use a simple O(log N) loop on theoretical platforms where N's
- width is not a power of 2. */
- int count = 0;
- for (int i = 0; i < 8 * sizeof n; i++, n >>= 1)
- count += n & 1;
- return count;
- }
- else
- {
- /* N's width is a power of 2; count in parallel. */
- unsigned long long int
- max = -1ull,
- x555555 = max / (1 << 1 | 1), /* 0x555555... */
- x333333 = max / (1 << 2 | 1), /* 0x333333... */
- x0f0f0f = max / (1 << 4 | 1), /* 0x0f0f0f... */
- x010101 = max / ((1 << 8) - 1), /* 0x010101... */
- x000_7f = max / 0xffffffffffffffffLL * 0x7f; /* 0x000000000000007f... */
- n -= (n >> 1) & x555555;
- n = (n & x333333) + ((n >> 2) & x333333);
- n = (n + (n >> 4)) & x0f0f0f;
-
- /* If the popcount always fits in 8 bits, multiply so that the
- popcount is in the leading 8 bits of the product; these days
- this is typically faster than the alternative below. */
- if (8 * sizeof n < 1 << 8)
- return n * x010101 >> 8 * (sizeof n - 1);
-
- /* N is at least 256 bits wide! Fall back on an O(log log N)
- loop that a compiler could unroll. Unroll the first three
- iterations by hand, to skip some division and masking. This
- is the most we can easily do without hassling with constants
- that a typical-platform compiler would reject. */
- n += n >> (1 << 3);
- n += n >> (1 << 4);
- n += n >> (1 << 5);
- n &= x000_7f;
- for (int i = 64; i < 8 * sizeof n; i <<= 1)
- n = (n + (n >> i)) & max / (1ull << i | 1);
- return n;
- }
-}
-
-# ifdef _MSC_VER
-# if 1500 <= _MSC_VER && (defined _M_IX86 || defined _M_X64)
-/* Declare the few MSVC intrinsics that we need. We prefer not to include
- <intrin.h> because it would pollute the namespace. */
-extern void __cpuid (int[4], int);
-# pragma intrinsic (__cpuid)
-extern unsigned int __popcnt (unsigned int);
-# pragma intrinsic (__popcnt)
-# ifdef _M_X64
-extern unsigned long long __popcnt64 (unsigned long long);
-# pragma intrinsic (__popcnt64)
-# else
-_GL_STDC_COUNT_ONES_INLINE int
-__popcnt64 (unsigned long long int n)
-{
- return __popcnt (n >> 32) + __popcnt (n);
-}
-# endif
-# endif
-
-/* 1 if supported, -1 if not, 0 if unknown. */
-extern signed char _gl_stdbit_popcount_support;
-
-_GL_STDC_COUNT_ONES_INLINE bool
-_gl_stdbit_popcount_supported (void)
-{
- if (!_gl_stdbit_popcount_support)
- {
- /* Do as described in
- <https://docs.microsoft.com/en-us/cpp/intrinsics/popcnt16-popcnt-popcnt64>
- Although Microsoft started requiring POPCNT in MS-Windows 11 24H2,
- we'll be more cautious. */
- int cpu_info[4];
- __cpuid (cpu_info, 1);
- _gl_stdbit_popcount_support = cpu_info[2] & 1 << 23 ? 1 : -1;
- }
- return 0 < _gl_stdbit_popcount_support;
-}
-_GL_STDC_COUNT_ONES_INLINE int
-_gl_stdbit_popcount (unsigned int n)
-{
- return (_gl_stdbit_popcount_supported ()
- ? __popcnt (n)
- : _gl_stdbit_popcount_wide (n));
-}
-_GL_STDC_COUNT_ONES_INLINE int
-_gl_stdbit_popcountl (unsigned long int n)
-{
- return (_gl_stdbit_popcount_supported ()
- ? __popcnt (n)
- : _gl_stdbit_popcount_wide (n));
-}
-_GL_STDC_COUNT_ONES_INLINE int
-_gl_stdbit_popcountll (unsigned long long int n)
-{
- return (_gl_stdbit_popcount_supported ()
- ? __popcnt64 (n)
- : _gl_stdbit_popcount_wide (n));
-}
-# else /* !_MSC_VER */
-# define _gl_stdbit_popcount _gl_stdbit_popcount_wide
-# define _gl_stdbit_popcountl _gl_stdbit_popcount_wide
-# define _gl_stdbit_popcountll _gl_stdbit_popcount_wide
-# endif
-# endif
-#endif
-
-
-/* ISO C 23 ?? 7.18.3 Count Leading Zeros */
-
-#if @GNULIB_STDC_LEADING_ZEROS@
_GL_STDC_LEADING_ZEROS_INLINE unsigned int
stdc_leading_zeros_ui (unsigned int n)
@@ -597,6 +393,86 @@ stdc_leading_ones_ull (unsigned long long int n)
#if @GNULIB_STDC_TRAILING_ZEROS@
+# if 3 < __GNUC__ + (4 <= __GNUC_MINOR__) || 4 <= __clang_major__
+# define _GL_STDBIT_HAS_BUILTIN_CTZ true
+# elif defined __has_builtin
+# if (__has_builtin (__builtin_ctz) \
+ && __has_builtin (__builtin_ctzl) \
+ && __has_builtin (__builtin_ctzll))
+# define _GL_STDBIT_HAS_BUILTIN_CTZ true
+# endif
+# endif
+
+/* Count trailing 0 bits of N, even if N is 0. */
+# ifdef _GL_STDBIT_HAS_BUILTIN_CTZ
+_GL_STDC_TRAILING_ZEROS_INLINE int
+_gl_stdbit_ctz (unsigned int n)
+{
+ return n ? __builtin_ctz (n) : 8 * sizeof n;
+}
+_GL_STDC_TRAILING_ZEROS_INLINE int
+_gl_stdbit_ctzl (unsigned long int n)
+{
+ return n ? __builtin_ctzl (n) : 8 * sizeof n;
+}
+_GL_STDC_TRAILING_ZEROS_INLINE int
+_gl_stdbit_ctzll (unsigned long long int n)
+{
+ return n ? __builtin_ctzll (n) : 8 * sizeof n;
+}
+# elif defined _MSC_VER
+
+/* Declare the few MSVC intrinsics that we need. We prefer not to include
+ <intrin.h> because it would pollute the namespace. */
+extern unsigned char _BitScanForward (unsigned long *, unsigned long);
+# pragma intrinsic (_BitScanForward)
+# ifdef _M_X64
+extern unsigned char _BitScanForward64 (unsigned long *, unsigned long long);
+# pragma intrinsic (_BitScanForward64)
+# endif
+
+_GL_STDC_TRAILING_ZEROS_INLINE int
+_gl_stdbit_ctzl (unsigned long int n)
+{
+ unsigned long int r;
+ return _BitScanForward (&r, n) ? r : 8 * sizeof n;
+}
+_GL_STDC_TRAILING_ZEROS_INLINE int
+_gl_stdbit_ctz (unsigned int n)
+{
+ return _gl_stdbit_ctzl (n | (1ul << (8 * sizeof n - 1) << 1));
+}
+_GL_STDC_TRAILING_ZEROS_INLINE int
+_gl_stdbit_ctzll (unsigned long long int n)
+{
+# ifdef _M_X64
+ unsigned long int r;
+ return _BitScanForward64 (&r, n) ? r : 8 * sizeof n;
+# else
+ unsigned int lo = n;
+ return _gl_stdbit_ctzl (lo ? lo : n >> 32) + (lo ? 0 : 32);
+# endif
+}
+
+# else /* !_MSC_VER */
+
+_GL_STDC_TRAILING_ZEROS_INLINE int
+_gl_stdbit_ctz (unsigned int n)
+{
+ return 8 * sizeof n - (n ? _gl_stdbit_clz (n & -n) + 1 : 0);
+}
+_GL_STDC_TRAILING_ZEROS_INLINE int
+_gl_stdbit_ctzl (unsigned long int n)
+{
+ return 8 * sizeof n - (n ? _gl_stdbit_clzl (n & -n) + 1 : 0);
+}
+_GL_STDC_TRAILING_ZEROS_INLINE int
+_gl_stdbit_ctzll (unsigned long long int n)
+{
+ return 8 * sizeof n - (n ? _gl_stdbit_clzll (n & -n) + 1 : 0);
+}
+# endif
+
_GL_STDC_TRAILING_ZEROS_INLINE unsigned int
stdc_trailing_zeros_ui (unsigned int n)
{
@@ -901,6 +777,135 @@ stdc_first_trailing_one_ull (unsigned long long int n)
#if @GNULIB_STDC_COUNT_ONES@
+# if 3 < __GNUC__ + (4 <= __GNUC_MINOR__) || 4 <= __clang_major__
+# define _GL_STDBIT_HAS_BUILTIN_POPCOUNT true
+# elif defined __has_builtin
+# if (__has_builtin (__builtin_popcount) \
+ && __has_builtin (__builtin_popcountl) \
+ && __has_builtin (__builtin_popcountll))
+# define _GL_STDBIT_HAS_BUILTIN_POPCOUNT true
+# endif
+# endif
+
+/* Count 1 bits in N. */
+# ifdef _GL_STDBIT_HAS_BUILTIN_POPCOUNT
+# define _gl_stdbit_popcount __builtin_popcount
+# define _gl_stdbit_popcountl __builtin_popcountl
+# define _gl_stdbit_popcountll __builtin_popcountll
+# else
+_GL_STDC_COUNT_ONES_INLINE int
+_gl_stdbit_popcount_wide (unsigned long long int n)
+{
+ if (sizeof n & (sizeof n - 1))
+ {
+ /* Use a simple O(log N) loop on theoretical platforms where N's
+ width is not a power of 2. */
+ int count = 0;
+ for (int i = 0; i < 8 * sizeof n; i++, n >>= 1)
+ count += n & 1;
+ return count;
+ }
+ else
+ {
+ /* N's width is a power of 2; count in parallel. */
+ unsigned long long int
+ max = -1ull,
+ x555555 = max / (1 << 1 | 1), /* 0x555555... */
+ x333333 = max / (1 << 2 | 1), /* 0x333333... */
+ x0f0f0f = max / (1 << 4 | 1), /* 0x0f0f0f... */
+ x010101 = max / ((1 << 8) - 1), /* 0x010101... */
+ x000_7f = max / 0xffffffffffffffffLL * 0x7f; /* 0x000000000000007f... */
+ n -= (n >> 1) & x555555;
+ n = (n & x333333) + ((n >> 2) & x333333);
+ n = (n + (n >> 4)) & x0f0f0f;
+
+ /* If the popcount always fits in 8 bits, multiply so that the
+ popcount is in the leading 8 bits of the product; these days
+ this is typically faster than the alternative below. */
+ if (8 * sizeof n < 1 << 8)
+ return n * x010101 >> 8 * (sizeof n - 1);
+
+ /* N is at least 256 bits wide! Fall back on an O(log log N)
+ loop that a compiler could unroll. Unroll the first three
+ iterations by hand, to skip some division and masking. This
+ is the most we can easily do without hassling with constants
+ that a typical-platform compiler would reject. */
+ n += n >> (1 << 3);
+ n += n >> (1 << 4);
+ n += n >> (1 << 5);
+ n &= x000_7f;
+ for (int i = 64; i < 8 * sizeof n; i <<= 1)
+ n = (n + (n >> i)) & max / (1ull << i | 1);
+ return n;
+ }
+}
+
+# ifdef _MSC_VER
+# if 1500 <= _MSC_VER && (defined _M_IX86 || defined _M_X64)
+/* Declare the few MSVC intrinsics that we need. We prefer not to include
+ <intrin.h> because it would pollute the namespace. */
+extern void __cpuid (int[4], int);
+# pragma intrinsic (__cpuid)
+extern unsigned int __popcnt (unsigned int);
+# pragma intrinsic (__popcnt)
+# ifdef _M_X64
+extern unsigned long long __popcnt64 (unsigned long long);
+# pragma intrinsic (__popcnt64)
+# else
+_GL_STDC_COUNT_ONES_INLINE int
+__popcnt64 (unsigned long long int n)
+{
+ return __popcnt (n >> 32) + __popcnt (n);
+}
+# endif
+# endif
+
+/* 1 if supported, -1 if not, 0 if unknown. */
+extern signed char _gl_stdbit_popcount_support;
+
+_GL_STDC_COUNT_ONES_INLINE bool
+_gl_stdbit_popcount_supported (void)
+{
+ if (!_gl_stdbit_popcount_support)
+ {
+ /* Do as described in
+ <https://docs.microsoft.com/en-us/cpp/intrinsics/popcnt16-popcnt-popcnt64>
+ Although Microsoft started requiring POPCNT in MS-Windows 11 24H2,
+ we'll be more cautious. */
+ int cpu_info[4];
+ __cpuid (cpu_info, 1);
+ _gl_stdbit_popcount_support = cpu_info[2] & 1 << 23 ? 1 : -1;
+ }
+ return 0 < _gl_stdbit_popcount_support;
+}
+_GL_STDC_COUNT_ONES_INLINE int
+_gl_stdbit_popcount (unsigned int n)
+{
+ return (_gl_stdbit_popcount_supported ()
+ ? __popcnt (n)
+ : _gl_stdbit_popcount_wide (n));
+}
+_GL_STDC_COUNT_ONES_INLINE int
+_gl_stdbit_popcountl (unsigned long int n)
+{
+ return (_gl_stdbit_popcount_supported ()
+ ? __popcnt (n)
+ : _gl_stdbit_popcount_wide (n));
+}
+_GL_STDC_COUNT_ONES_INLINE int
+_gl_stdbit_popcountll (unsigned long long int n)
+{
+ return (_gl_stdbit_popcount_supported ()
+ ? __popcnt64 (n)
+ : _gl_stdbit_popcount_wide (n));
+}
+# else /* !_MSC_VER */
+# define _gl_stdbit_popcount _gl_stdbit_popcount_wide
+# define _gl_stdbit_popcountl _gl_stdbit_popcount_wide
+# define _gl_stdbit_popcountll _gl_stdbit_popcount_wide
+# endif
+# endif
+
_GL_STDC_COUNT_ONES_INLINE unsigned int
stdc_count_ones_ui (unsigned int n)
{
--
2.52.0