Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package primesieve for openSUSE:Factory checked in at 2025-03-03 16:04:38 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/primesieve (Old) and /work/SRC/openSUSE:Factory/.primesieve.new.19136 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "primesieve" Mon Mar 3 16:04:38 2025 rev:17 rq:1249566 version:12.7 Changes: -------- --- /work/SRC/openSUSE:Factory/primesieve/primesieve.changes 2024-11-18 20:03:42.331393735 +0100 +++ /work/SRC/openSUSE:Factory/.primesieve.new.19136/primesieve.changes 2025-03-03 16:04:53.082636201 +0100 @@ -1,0 +2,9 @@ +Sun Mar 02 13:20:00 UTC 2025 - Kim Walisch <kim.wali...@gmail.com> + +- Update to primesieve-12.7 +- multiarch_sve_arm.cmake: Improve ARM SVE detection +- src/arch/arm/sve.cpp: Detect ARM SVE on Linux and Windows +- EratBig.cpp: Simplify bucket handling +- Erat.cpp: Tune sieve size using FACTOR_SIEVESIZE + +------------------------------------------------------------------- Old: ---- primesieve-12.6.tar.gz New: ---- primesieve-12.7.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ primesieve.spec ++++++ --- /var/tmp/diff_new_pack.hyYQU7/_old 2025-03-03 16:04:53.738663646 +0100 +++ /var/tmp/diff_new_pack.hyYQU7/_new 2025-03-03 16:04:53.738663646 +0100 @@ -17,7 +17,7 @@ Name: primesieve -Version: 12.6 +Version: 12.7 Release: 0 Summary: A prime number generator License: BSD-2-Clause ++++++ primesieve-12.6.tar.gz -> primesieve-12.7.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/primesieve-12.6/CMakeLists.txt new/primesieve-12.7/CMakeLists.txt --- old/primesieve-12.6/CMakeLists.txt 2024-11-18 08:51:32.000000000 +0100 +++ new/primesieve-12.7/CMakeLists.txt 2025-03-01 10:11:34.000000000 +0100 @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.4...3.27) project(primesieve CXX) -set(PRIMESIEVE_VERSION "12.6") -set(PRIMESIEVE_SOVERSION "12.6.0") +set(PRIMESIEVE_VERSION "12.7") +set(PRIMESIEVE_SOVERSION "12.7.0") # Build options ###################################################### @@ -89,12 +89,13 @@ include("${PROJECT_SOURCE_DIR}/cmake/multiarch_avx512_bw.cmake") include("${PROJECT_SOURCE_DIR}/cmake/multiarch_avx512_vbmi2.cmake") - if(NOT multiarch_avx512_bw) - include("${PROJECT_SOURCE_DIR}/cmake/multiarch_sve_arm.cmake") - endif() - if(multiarch_x86_popcnt OR multiarch_avx512_bw OR multiarch_avx512_vbmi2) - set(LIB_SRC ${LIB_SRC} src/x86/cpuid.cpp) + set(LIB_SRC ${LIB_SRC} src/arch/x86/cpuid.cpp) + else() + include("${PROJECT_SOURCE_DIR}/cmake/multiarch_sve_arm.cmake") + if(multiarch_sve_arm) + set(LIB_SRC ${LIB_SRC} src/arch/arm/sve.cpp) + endif() endif() endif() diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/primesieve-12.6/COPYING new/primesieve-12.7/COPYING --- old/primesieve-12.6/COPYING 2024-11-18 08:51:32.000000000 +0100 +++ new/primesieve-12.7/COPYING 2025-03-01 10:11:34.000000000 +0100 @@ -1,6 +1,6 @@ BSD 2-Clause License -Copyright (c) 2010 - 2024, Kim Walisch. +Copyright (c) 2010 - 2025, Kim Walisch. All rights reserved. Redistribution and use in source and binary forms, with or without diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/primesieve-12.6/ChangeLog new/primesieve-12.7/ChangeLog --- old/primesieve-12.6/ChangeLog 2024-11-18 08:51:32.000000000 +0100 +++ new/primesieve-12.7/ChangeLog 2025-03-01 10:11:34.000000000 +0100 @@ -1,3 +1,12 @@ +Changes in version 12.7, 28/02/2025 +=================================== + +* multiarch_sve_arm.cmake: Improve ARM SVE detection. +* src/arch/arm/sve.cpp: Detect ARM SVE on Linux and Windows. +* EratBig.cpp: Simplify bucket handling. +* Erat.cpp: Tune sieve size using FACTOR_SIEVESIZE. +* README.md: Add Sponsors section. + Changes in version 12.6, 11/11/2024 =================================== diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/primesieve-12.6/README.md new/primesieve-12.7/README.md --- old/primesieve-12.6/README.md 2024-11-18 08:51:32.000000000 +0100 +++ new/primesieve-12.7/README.md 2025-03-01 10:11:34.000000000 +0100 @@ -190,6 +190,10 @@ <td><a href="https://github.com/AaronChen0/cl-primesieve">cl-primesieve</a></td> </tr> <tr> + <td><b>Java:</b></td> + <td><a href="https://github.com/buildingnicesoftware/primesieve-java">primesieve-java</a></td> + </tr> + <tr> <td><b>Janet:</b></td> <td><a href="https://github.com/bunder/janet-primesieve">janet-primesieve</a></td> </tr> @@ -232,3 +236,14 @@ </table> Many thanks to the developers of these bindings! + +## Sponsors + +Thanks to all current and past [sponsors of primesieve](https://github.com/sponsors/kimwalisch)! Your donations help me purchase (or rent) the latest CPUs and ensure primesieve runs at maximum performance on them. Your donations also motivate me to continue maintaining primesieve. + +<a href="https://github.com/AndrewVSutherland"><img src="https://images.weserv.nl/?url=avatars.githubusercontent.com/u/11425002?h=60&w=60&fit=cover&mask=circle"></img></a> +<a href="https://github.com/wolframresearch"><img src="https://images.weserv.nl/?url=avatars.githubusercontent.com/u/11549616?h=60&w=60&fit=cover&mask=circle"></img></a> +<a href="https://github.com/AlgoWin"><img src="https://images.weserv.nl/?url=avatars.githubusercontent.com/u/44401099?h=60&w=60&fit=cover&mask=circle"></img></a> +<a href="https://github.com/sethtroisi"><img src="https://images.weserv.nl/?url=avatars.githubusercontent.com/u/10172976?h=60&w=60&fit=cover&mask=circle"></img></a> +<a href="https://github.com/entersoftone"><img src="https://images.weserv.nl/?url=avatars.githubusercontent.com/u/80900902?h=60&w=60&fit=cover&mask=circle"></img></a> +<a href="https://github.com/utmcontent"><img src="https://images.weserv.nl/?url=avatars.githubusercontent.com/u/4705133?h=60&w=60&fit=cover&mask=circle"></img></a> diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/primesieve-12.6/cmake/multiarch_avx512_bw.cmake new/primesieve-12.7/cmake/multiarch_avx512_bw.cmake --- old/primesieve-12.6/cmake/multiarch_avx512_bw.cmake 2024-11-18 08:51:32.000000000 +0100 +++ new/primesieve-12.7/cmake/multiarch_avx512_bw.cmake 2025-03-01 10:11:34.000000000 +0100 @@ -19,7 +19,7 @@ Error: AVX512BW multiarch not needed! #endif - #include <src/x86/cpuid.cpp> + #include <src/arch/x86/cpuid.cpp> #include <immintrin.h> #include <stdint.h> #include <cstddef> diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/primesieve-12.6/cmake/multiarch_avx512_vbmi2.cmake new/primesieve-12.7/cmake/multiarch_avx512_vbmi2.cmake --- old/primesieve-12.6/cmake/multiarch_avx512_vbmi2.cmake 2024-11-18 08:51:32.000000000 +0100 +++ new/primesieve-12.7/cmake/multiarch_avx512_vbmi2.cmake 2025-03-01 10:11:34.000000000 +0100 @@ -20,7 +20,7 @@ Error: AVX512VBMI2 multiarch not needed! #endif - #include <src/x86/cpuid.cpp> + #include <src/arch/x86/cpuid.cpp> #include <immintrin.h> #include <stdint.h> diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/primesieve-12.6/cmake/multiarch_sve_arm.cmake new/primesieve-12.7/cmake/multiarch_sve_arm.cmake --- old/primesieve-12.6/cmake/multiarch_sve_arm.cmake 2024-11-18 08:51:32.000000000 +0100 +++ new/primesieve-12.7/cmake/multiarch_sve_arm.cmake 2025-03-01 10:11:34.000000000 +0100 @@ -7,7 +7,7 @@ include(CMakePushCheckState) cmake_push_check_state() -set(CMAKE_REQUIRED_INCLUDES "${PROJECT_SOURCE_DIR}/include") +set(CMAKE_REQUIRED_INCLUDES "${PROJECT_SOURCE_DIR}") check_cxx_source_compiles(" // GCC/Clang function multiversioning for ARM SVE is not needed @@ -19,7 +19,7 @@ Error: ARM SVE multiarch not needed! #endif - #include <primesieve/cpu_supports_arm_sve.hpp> + #include <src/arch/arm/sve.cpp> #include <arm_sve.h> #include <stdint.h> #include <cstddef> @@ -62,7 +62,7 @@ uint8_t PreSieveTable3[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; uint8_t PreSieveTable4[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; - if (cpu_supports_sve) + if (primesieve::has_arm_sve()) AND_PreSieveTables_arm_sve(&PreSieveTable1[0], &PreSieveTable2[1], &PreSieveTable3[1], &PreSieveTable4[1], &sieve[0], 10); else AND_PreSieveTables_default(&PreSieveTable1[0], &PreSieveTable2[1], &PreSieveTable3[1], &PreSieveTable4[1], &sieve[0], 10); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/primesieve-12.6/cmake/multiarch_x86_popcnt.cmake new/primesieve-12.7/cmake/multiarch_x86_popcnt.cmake --- old/primesieve-12.6/cmake/multiarch_x86_popcnt.cmake 2024-11-18 08:51:32.000000000 +0100 +++ new/primesieve-12.7/cmake/multiarch_x86_popcnt.cmake 2025-03-01 10:11:34.000000000 +0100 @@ -32,7 +32,7 @@ Error: x86 POPCNT multiarch not needed! #endif - #include <src/x86/cpuid.cpp> + #include <src/arch/x86/cpuid.cpp> #include <iostream> int main() diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/primesieve-12.6/include/primesieve/Wheel.hpp new/primesieve-12.7/include/primesieve/Wheel.hpp --- old/primesieve-12.6/include/primesieve/Wheel.hpp 2024-11-18 08:51:32.000000000 +0100 +++ new/primesieve-12.7/include/primesieve/Wheel.hpp 2025-03-01 10:11:34.000000000 +0100 @@ -3,7 +3,7 @@ /// @brief Wheel factorization is used to skip multiles of /// small primes in the sieve of Eratosthenes. /// -/// Copyright (C) 2022 Kim Walisch, <kim.wali...@gmail.com> +/// Copyright (C) 2025 Kim Walisch, <kim.wali...@gmail.com> /// /// This file is distributed under the BSD License. See the COPYING /// file in the top level directory. @@ -73,8 +73,17 @@ if (nextMultiple > stop_ - multiple) return; - nextMultiple += multiple - segmentLow; - uint64_t multipleIndex = nextMultiple / 30; + multiple += nextMultiple; + + #if defined(ENABLE_ASSERT) + if (MODULO >= 2) ASSERT(multiple % 2 != 0); + if (MODULO >= 6) ASSERT(multiple % 3 != 0); + if (MODULO >= 30) ASSERT(multiple % 5 != 0); + if (MODULO >= 210) ASSERT(multiple % 7 != 0); + if (MODULO >= 2310) ASSERT(multiple % 11 != 0); + #endif + + uint64_t multipleIndex = (multiple - segmentLow) / 30; uint64_t wheelIndex = wheelOffsets_[prime % 30] + INIT[quotient % MODULO].wheelIndex; storeSievingPrime(prime, multipleIndex, wheelIndex); } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/primesieve-12.6/include/primesieve/config.hpp new/primesieve-12.7/include/primesieve/config.hpp --- old/primesieve-12.6/include/primesieve/config.hpp 2024-11-18 08:51:32.000000000 +0100 +++ new/primesieve-12.7/include/primesieve/config.hpp 2025-03-01 10:11:34.000000000 +0100 @@ -2,7 +2,7 @@ /// @file config.hpp /// @brief primesieve compile time constants. /// -/// Copyright (C) 2024 Kim Walisch, <kim.wali...@gmail.com> +/// Copyright (C) 2025 Kim Walisch, <kim.wali...@gmail.com> /// /// This file is distributed under the BSD License. See the COPYING /// file in the top level directory. @@ -60,6 +60,21 @@ /// constexpr uint64_t MIN_THREAD_DISTANCE = (uint64_t) 1e7; +/// sieveSize = sqrt(stop) * FACTOR_SIEVESIZE. +/// +/// Using a larger FACTOR_SIEVESIZE increases the segment size in the +/// sieve of Eratosthenes and hence reduces the number of operations +/// used by the algorithm. However, as a drawback a larger segment +/// size is less cache efficient and hence performance may deteriorate +/// on CPUs with limited L2 cache bandwidth (especially when using +/// multi-threading). +/// +/// Using FACTOR_SIEVESIZE = 2.0 performs well for counting the +/// primes < 10^11 using multi-threading on both the Apple M3 CPU and +/// the Intel Arrow Lake 245K CPU (from 2024). +/// +constexpr double FACTOR_SIEVESIZE = 2.0; + /// Sieving primes <= (L1D_CACHE_BYTES * FACTOR_ERATSMALL) are /// processed in EratSmall. When FACTOR_ERATSMALL is small fewer /// sieving primes are processed in EratSmall.cpp and more sieving diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/primesieve-12.6/include/primesieve/cpu_supports_arm_sve.hpp new/primesieve-12.7/include/primesieve/cpu_supports_arm_sve.hpp --- old/primesieve-12.6/include/primesieve/cpu_supports_arm_sve.hpp 2024-11-18 08:51:32.000000000 +0100 +++ new/primesieve-12.7/include/primesieve/cpu_supports_arm_sve.hpp 2025-03-01 10:11:34.000000000 +0100 @@ -2,7 +2,7 @@ /// @file cpu_supports_arm_sve.hpp /// Check if the CPU supports the ARM SVE instruction set. /// -/// Copyright (C) 2024 Kim Walisch, <kim.wali...@gmail.com> +/// Copyright (C) 2025 Kim Walisch, <kim.wali...@gmail.com> /// /// This file is distributed under the BSD License. See the COPYING /// file in the top level directory. @@ -11,17 +11,17 @@ #ifndef CPU_SUPPORTS_ARM_SVE_HPP #define CPU_SUPPORTS_ARM_SVE_HPP -#include "macros.hpp" +namespace primesieve { -#if __has_builtin(__builtin_cpu_supports) +bool has_arm_sve(); + +} // namespace namespace { /// Initialized at startup -const bool cpu_supports_sve = __builtin_cpu_supports("sve"); +const bool cpu_supports_sve = primesieve::has_arm_sve(); } // namespace -#endif // __builtin_cpu_supports - #endif diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/primesieve-12.6/include/primesieve.h new/primesieve-12.7/include/primesieve.h --- old/primesieve-12.6/include/primesieve.h 2024-11-18 08:51:32.000000000 +0100 +++ new/primesieve-12.7/include/primesieve.h 2025-03-01 10:11:34.000000000 +0100 @@ -7,7 +7,7 @@ * standard error stream. libprimesieve also sets the C errno * variable to EDOM if an error occurs. * - * Copyright (C) 2024 Kim Walisch, <kim.wali...@gmail.com> + * Copyright (C) 2025 Kim Walisch, <kim.wali...@gmail.com> * * This file is distributed under the BSD License. */ @@ -15,9 +15,9 @@ #ifndef PRIMESIEVE_H #define PRIMESIEVE_H -#define PRIMESIEVE_VERSION "12.6" +#define PRIMESIEVE_VERSION "12.7" #define PRIMESIEVE_VERSION_MAJOR 12 -#define PRIMESIEVE_VERSION_MINOR 6 +#define PRIMESIEVE_VERSION_MINOR 7 #include <primesieve/iterator.h> diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/primesieve-12.6/include/primesieve.hpp new/primesieve-12.7/include/primesieve.hpp --- old/primesieve-12.6/include/primesieve.hpp 2024-11-18 08:51:32.000000000 +0100 +++ new/primesieve-12.7/include/primesieve.hpp 2025-03-01 10:11:34.000000000 +0100 @@ -5,7 +5,7 @@ /// primesieve::primesieve_error exception (derived form /// std::runtime_error) is thrown. /// -/// Copyright (C) 2024 Kim Walisch, <kim.wali...@gmail.com> +/// Copyright (C) 2025 Kim Walisch, <kim.wali...@gmail.com> /// /// This file is distributed under the BSD License. /// @@ -13,9 +13,9 @@ #ifndef PRIMESIEVE_HPP #define PRIMESIEVE_HPP -#define PRIMESIEVE_VERSION "12.6" +#define PRIMESIEVE_VERSION "12.7" #define PRIMESIEVE_VERSION_MAJOR 12 -#define PRIMESIEVE_VERSION_MINOR 6 +#define PRIMESIEVE_VERSION_MINOR 7 #include <primesieve/iterator.hpp> #include <primesieve/primesieve_error.hpp> diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/primesieve-12.6/scripts/build_clang_multiarch_win_x64.bat new/primesieve-12.7/scripts/build_clang_multiarch_win_x64.bat --- old/primesieve-12.6/scripts/build_clang_multiarch_win_x64.bat 2024-11-18 08:51:32.000000000 +0100 +++ new/primesieve-12.7/scripts/build_clang_multiarch_win_x64.bat 2025-03-01 10:11:34.000000000 +0100 @@ -1 +1 @@ -clang++ -I../include -O3 -mpopcnt -DNDEBUG -DENABLE_MULTIARCH_AVX512_BW -DENABLE_MULTIARCH_AVX512_VBMI2 ../src/*.cpp ../src/x86/*.cpp ../src/app/*.cpp -o primesieve.exe "C:\Program Files\LLVM\lib\clang\18\lib\windows\clang_rt.builtins-x86_64.lib" +clang++ -I../include -O3 -mpopcnt -DNDEBUG -DENABLE_MULTIARCH_AVX512_BW -DENABLE_MULTIARCH_AVX512_VBMI2 ../src/*.cpp ../src/arch/x86/*.cpp ../src/app/*.cpp -o primesieve.exe "C:\Program Files\LLVM\lib\clang\18\lib\windows\clang_rt.builtins-x86_64.lib" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/primesieve-12.6/scripts/build_mingw64_x64.sh new/primesieve-12.7/scripts/build_mingw64_x64.sh --- old/primesieve-12.6/scripts/build_mingw64_x64.sh 2024-11-18 08:51:32.000000000 +0100 +++ new/primesieve-12.7/scripts/build_mingw64_x64.sh 2025-03-01 10:11:34.000000000 +0100 @@ -45,7 +45,7 @@ mkdir build-release cd build-release -g++ -static -O3 -mpopcnt -flto -DNDEBUG -D_WIN32_WINNT=0x0A00 -Wall -Wextra -pedantic -DENABLE_MULTIARCH_AVX512_BW -DENABLE_MULTIARCH_AVX512_VBMI2 -I ../include ../src/*.cpp ../src/x86/*.cpp ../src/app/*.cpp -o primesieve.exe +g++ -static -O3 -mpopcnt -flto -DNDEBUG -D_WIN32_WINNT=0x0A00 -Wall -Wextra -pedantic -DENABLE_MULTIARCH_AVX512_BW -DENABLE_MULTIARCH_AVX512_VBMI2 -I ../include ../src/*.cpp ../src/arch/x86/*.cpp ../src/app/*.cpp -o primesieve.exe strip primesieve.exe # Create a release zip archive diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/primesieve-12.6/src/Erat.cpp new/primesieve-12.7/src/Erat.cpp --- old/primesieve-12.6/src/Erat.cpp 2024-11-18 08:51:32.000000000 +0100 +++ new/primesieve-12.7/src/Erat.cpp 2025-03-01 10:11:34.000000000 +0100 @@ -3,7 +3,7 @@ /// @brief The Erat class manages prime sieving using the /// EratSmall, EratMedium, EratBig classes. /// -/// Copyright (C) 2024 Kim Walisch, <kim.wali...@gmail.com> +/// Copyright (C) 2025 Kim Walisch, <kim.wali...@gmail.com> /// /// This file is distributed under the BSD License. See the COPYING /// file in the top level directory. @@ -110,20 +110,45 @@ uint64_t minSieveSize = std::min(l1CacheSize, maxSieveSize); // ================================================================ - // 2. sieveSize = inBetween(minSieveSize, sqrtStop, maxSieveSize) + // 2. sieveSize = sqrt(stop) * FACTOR_SIEVESIZE + // ================================================================ + + // Using a larger FACTOR_SIEVESIZE increases the segment size + // in the sieve of Eratosthenes and hence reduces the number + // of operations used by the algorithm. However, as a drawback + // a larger segment size is less cache efficient and hence + // performance may deteriorate on CPUs with limited L2 cache + // bandwidth (especially when using multi-threading). + uint64_t sieveSize = uint64_t(sqrtStop * config::FACTOR_SIEVESIZE); + + // ================================================================ + // 3. sieveSize = minSieveSize * x + // ================================================================ + + // The EratSmall algorithm uses minSieveSize as its segment + // size. If sieveSize is a multiple of minSieveSize then + // there will be no short segments in EratSmall which should + // provide optimal performance. + if (sieveSize > minSieveSize) + sieveSize = (sieveSize / minSieveSize) * minSieveSize; + + // ================================================================ + // 4. L1CacheSize <= sieveSize <= L2CacheSize // ================================================================ // For small stop numbers a small sieve array size that // matches the CPU's L1 data cache size performs best. - // For larger stop numbers a sieve array size that is - // within [L1CacheSize, L2CacheSize] usually performs best. - uint64_t sieveSize = inBetween(minSieveSize, sqrtStop, maxSieveSize); + // For larger stop numbers a sieve array size that is ~ + // L2CacheSize usually performs best. Hence our sieve size + // increases dynamically based on the stop number but it + // can never exceed the L2CacheSize (or maxSieveSize). + sieveSize = inBetween(minSieveSize, sieveSize, maxSieveSize); sieveSize = inBetween(16 << 10, sieveSize, 8192 << 10); sieveSize = ceilDiv(sieveSize, sizeof(uint64_t)) * sizeof(uint64_t); minSieveSize = std::min(l1CacheSize, sieveSize); // ================================================================ - // 3. Initialize upper bounds for EratSmall & EratMedium + // 5. Initialize upper bounds for EratSmall & EratMedium // ================================================================ // Small sieving primes are processed using the EratSmall @@ -134,7 +159,7 @@ maxEratMedium_ = (uint64_t) (sieveSize * config::FACTOR_ERATMEDIUM); // ================================================================ - // 4. EratBig requires a power of 2 sieve size + // 6. EratBig requires a power of 2 sieve size // ================================================================ if (sqrtStop > maxEratMedium_) @@ -146,14 +171,14 @@ } // ================================================================ - // 5. Ensure we allocate the smallest possible amount of memory + // 7. Ensure we allocate the smallest possible amount of memory // ================================================================ maxEratSmall_ = std::min(maxEratSmall_, sqrtStop); maxEratMedium_ = std::min(maxEratMedium_, sqrtStop); // ================================================================ - // 6. Initialize segment bounds + // 8. Initialize segment bounds // ================================================================ // The 8 bits of each byte of the sieve array correspond to @@ -168,7 +193,7 @@ segmentHigh_ = std::min(segmentHigh_, stop_); // ================================================================ - // 7. Use tiny sieveSize if possible + // 9. Use tiny sieveSize if possible // ================================================================ // If we are sieving just a single segment @@ -184,7 +209,7 @@ } // ================================================================ - // 8. Finally, initialize EratSmall, EratMedium & EratBig + // 10. Finally, initialize EratSmall, EratMedium & EratBig // ================================================================ ASSERT(sieveSize % sizeof(uint64_t) == 0); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/primesieve-12.6/src/EratBig.cpp new/primesieve-12.7/src/EratBig.cpp --- old/primesieve-12.6/src/EratBig.cpp 2024-11-18 08:51:32.000000000 +0100 +++ new/primesieve-12.7/src/EratBig.cpp 2025-03-01 10:11:34.000000000 +0100 @@ -18,7 +18,7 @@ /// after the last multiple of each sieving prime is removed /// from the sieve array. /// -/// Copyright (C) 2023 Kim Walisch, <kim.wali...@gmail.com> +/// Copyright (C) 2025 Kim Walisch, <kim.wali...@gmail.com> /// /// This file is distributed under the BSD License. See the COPYING /// file in the top level directory. @@ -162,37 +162,24 @@ multipleIndex &= moduloSieveSize_; while (buckets_.size() < newSize) - { buckets_.push_back(nullptr); - memoryPool_->addBucket(buckets_.back()); - } ASSERT(prime <= maxPrime_); ASSERT(segment < buckets_.size()); - buckets_[segment]++->set(sievingPrime, multipleIndex, wheelIndex); if (Bucket::isFull(buckets_[segment])) memoryPool_->addBucket(buckets_[segment]); + + buckets_[segment]++->set(sievingPrime, multipleIndex, wheelIndex); } void EratBig::crossOff(Vector<uint8_t>& sieve) { - while (true) + while (buckets_[0]) { - // Get the current bucket list, it's a singly linked - // list. This list contains the sieving primes that - // have multiple occurrences in the current segment. Bucket* bucket = Bucket::get(buckets_[0]); bucket->setEnd(buckets_[0]); - - // No more buckets in the current segment - if (bucket->empty() && - bucket->next() == nullptr) - break; - - // Reset the buckets_[0] list buckets_[0] = nullptr; - memoryPool_->addBucket(buckets_[0]); // Iterate over the buckets related // to the current segment. @@ -228,49 +215,15 @@ MemoryPool& memoryPool = *memoryPool_; std::size_t moduloSieveSize = moduloSieveSize_; std::size_t log2SieveSize = log2SieveSize_; - std::size_t size = (std::size_t) (end - prime); - SievingPrime* end2 = end - size % 2; - // Process 2 sieving primes per loop iteration to - // increase instruction level parallelism. - for (; prime != end2; prime += 2) - { - std::size_t multipleIndex0 = prime[0].getMultipleIndex(); - std::size_t wheelIndex0 = prime[0].getWheelIndex(); - std::size_t sievingPrime0 = prime[0].getSievingPrime(); - std::size_t multipleIndex1 = prime[1].getMultipleIndex(); - std::size_t wheelIndex1 = prime[1].getWheelIndex(); - std::size_t sievingPrime1 = prime[1].getSievingPrime(); - - sieve[multipleIndex0] &= wheel210[wheelIndex0].unsetBit; - sieve[multipleIndex1] &= wheel210[wheelIndex1].unsetBit; - - multipleIndex0 += wheel210[wheelIndex0].nextMultipleFactor * sievingPrime0; - multipleIndex1 += wheel210[wheelIndex1].nextMultipleFactor * sievingPrime1; - multipleIndex0 += wheel210[wheelIndex0].correct; - multipleIndex1 += wheel210[wheelIndex1].correct; - wheelIndex0 = wheel210[wheelIndex0].next; - wheelIndex1 = wheel210[wheelIndex1].next; - std::size_t segment0 = multipleIndex0 >> log2SieveSize; - std::size_t segment1 = multipleIndex1 >> log2SieveSize; - multipleIndex0 &= moduloSieveSize; - multipleIndex1 &= moduloSieveSize; - - buckets[segment0]++->set(sievingPrime0, multipleIndex0, wheelIndex0); - if_unlikely(Bucket::isFull(buckets[segment0])) - memoryPool.addBucket(buckets[segment0]); - - buckets[segment1]++->set(sievingPrime1, multipleIndex1, wheelIndex1); - if_unlikely(Bucket::isFull(buckets[segment1])) - memoryPool.addBucket(buckets[segment1]); - } - - if_unlikely(prime != end) + for (; prime != end; prime++) { std::size_t multipleIndex = prime->getMultipleIndex(); std::size_t wheelIndex = prime->getWheelIndex(); std::size_t sievingPrime = prime->getSievingPrime(); + // Cross-off the current multiple (unset bit) + // and calculate the next multiple. sieve[multipleIndex] &= wheel210[wheelIndex].unsetBit; multipleIndex += wheel210[wheelIndex].nextMultipleFactor * sievingPrime; multipleIndex += wheel210[wheelIndex].correct; @@ -278,9 +231,10 @@ std::size_t segment = multipleIndex >> log2SieveSize; multipleIndex &= moduloSieveSize; - buckets[segment]++->set(sievingPrime, multipleIndex, wheelIndex); - if_unlikely(Bucket::isFull(buckets[segment])) + if (Bucket::isFull(buckets[segment])) memoryPool.addBucket(buckets[segment]); + + buckets[segment]++->set(sievingPrime, multipleIndex, wheelIndex); } } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/primesieve-12.6/src/EratMedium.cpp new/primesieve-12.7/src/EratMedium.cpp --- old/primesieve-12.6/src/EratMedium.cpp 2024-11-18 08:51:32.000000000 +0100 +++ new/primesieve-12.7/src/EratMedium.cpp 2025-03-01 10:11:34.000000000 +0100 @@ -11,7 +11,7 @@ /// by up to 30% for sieving primes that have only a few /// multiple occurrences per segment. /// -/// Copyright (C) 2023 Kim Walisch, <kim.wali...@gmail.com> +/// Copyright (C) 2025 Kim Walisch, <kim.wali...@gmail.com> /// /// This file is distributed under the BSD License. See the COPYING /// file in the top level directory. @@ -142,6 +142,7 @@ std::size_t dist1 = sievingPrime * 4 + 1; std::size_t dist2 = sievingPrime * 2 + 0; std::size_t dist4 = sievingPrime * 2 + 1; + ASSERT(wheelIndex <= 7); switch (wheelIndex) { @@ -181,6 +182,9 @@ std::size_t dist3 = sievingPrime * 4 + 2; std::size_t dist4 = sievingPrime * 2 + 0; + ASSERT(wheelIndex >= 8); + ASSERT(wheelIndex <= 15); + switch (wheelIndex) { default: UNREACHABLE; @@ -219,6 +223,9 @@ std::size_t dist5 = sievingPrime * 4 + 1; std::size_t dist6 = sievingPrime * 6 + 3; + ASSERT(wheelIndex >= 16); + ASSERT(wheelIndex <= 23); + switch (wheelIndex) { default: UNREACHABLE; @@ -257,6 +264,9 @@ std::size_t dist3 = sievingPrime * 4 + 2; std::size_t dist6 = sievingPrime * 6 + 4; + ASSERT(wheelIndex >= 24); + ASSERT(wheelIndex <= 31); + switch (wheelIndex) { default: UNREACHABLE; @@ -295,6 +305,9 @@ std::size_t dist4 = sievingPrime * 2 + 1; std::size_t dist5 = sievingPrime * 4 + 3; + ASSERT(wheelIndex >= 32); + ASSERT(wheelIndex <= 39); + switch (wheelIndex) { default: UNREACHABLE; @@ -332,6 +345,9 @@ std::size_t dist2 = sievingPrime * 2 + 1; std::size_t dist4 = sievingPrime * 2 + 2; + ASSERT(wheelIndex >= 40); + ASSERT(wheelIndex <= 47); + switch (wheelIndex) { default: UNREACHABLE; @@ -369,6 +385,9 @@ std::size_t dist2 = sievingPrime * 2 + 2; std::size_t dist6 = sievingPrime * 6 + 5; + ASSERT(wheelIndex >= 48); + ASSERT(wheelIndex <= 55); + switch (wheelIndex) { default: UNREACHABLE; @@ -406,6 +425,9 @@ std::size_t dist2 = sievingPrime * 2 + 0; std::size_t dist6 = sievingPrime * 6 + 0; + ASSERT(wheelIndex >= 56); + ASSERT(wheelIndex <= 63); + switch (wheelIndex) { default: UNREACHABLE; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/primesieve-12.6/src/EratSmall.cpp new/primesieve-12.7/src/EratSmall.cpp --- old/primesieve-12.6/src/EratSmall.cpp 2024-11-18 08:51:32.000000000 +0100 +++ new/primesieve-12.7/src/EratSmall.cpp 2025-03-01 10:11:34.000000000 +0100 @@ -9,7 +9,7 @@ /// multiples uses as few instructions as possible since there /// are so many multiples. /// -/// Copyright (C) 2023 Kim Walisch, <kim.wali...@gmail.com> +/// Copyright (C) 2025 Kim Walisch, <kim.wali...@gmail.com> /// /// This file is distributed under the BSD License. See the COPYING /// file in the top level directory. @@ -92,6 +92,7 @@ std::size_t sievingPrime = prime.getSievingPrime(); std::size_t i = prime.getMultipleIndex(); std::size_t wheelIndex = prime.getWheelIndex(); + ASSERT(wheelIndex <= 63); switch (wheelIndex) { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/primesieve-12.6/src/app/help.cpp new/primesieve-12.7/src/app/help.cpp --- old/primesieve-12.6/src/app/help.cpp 2024-11-18 08:51:32.000000000 +0100 +++ new/primesieve-12.7/src/app/help.cpp 2025-03-01 10:11:34.000000000 +0100 @@ -3,7 +3,7 @@ /// @brief help() and version() functions of the primesieve /// console application. /// -/// Copyright (C) 2024 Kim Walisch, <kim.wali...@gmail.com> +/// Copyright (C) 2025 Kim Walisch, <kim.wali...@gmail.com> /// /// This file is distributed under the BSD License. See the COPYING /// file in the top level directory. @@ -66,6 +66,6 @@ { std::cout << "primesieve " << primesieve::primesieve_version(); std::cout << ", <https://github.com/kimwalisch/primesieve>" << std::endl; - std::cout << "Copyright (C) 2010 - 2024 Kim Walisch" << std::endl; + std::cout << "Copyright (C) 2010 - 2025 Kim Walisch" << std::endl; std::cout << "BSD 2-Clause License <https://opensource.org/licenses/BSD-2-Clause>" << std::endl; } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/primesieve-12.6/src/app/stressTest.cpp new/primesieve-12.7/src/app/stressTest.cpp --- old/primesieve-12.6/src/app/stressTest.cpp 2024-11-18 08:51:32.000000000 +0100 +++ new/primesieve-12.7/src/app/stressTest.cpp 2025-03-01 10:11:34.000000000 +0100 @@ -6,7 +6,7 @@ /// miscalculation occurs (due to a hardware issue) or the /// timeout (--timeout=SECS option) expires. /// -/// Copyright (C) 2024 Kim Walisch, <kim.wali...@gmail.com> +/// Copyright (C) 2025 Kim Walisch, <kim.wali...@gmail.com> /// /// This file is distributed under the BSD License. See the COPYING /// file in the top level directory. @@ -146,7 +146,7 @@ if (opts.stressTestMode == "CPU") { double sieveSizeKiB = primesieve::get_sieve_size(); - double avgMiB = 2.8 + (sieveSizeKiB / 1024.0); + double avgMiB = 2.6 + (sieveSizeKiB / 1024.0); std::cout << std::fixed << std::setprecision(2) << avgMiB << " MiB = " << std::fixed << std::setprecision(2) << threads * avgMiB << " MiB.\n"; } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/primesieve-12.6/src/arch/arm/sve.cpp new/primesieve-12.7/src/arch/arm/sve.cpp --- old/primesieve-12.6/src/arch/arm/sve.cpp 1970-01-01 01:00:00.000000000 +0100 +++ new/primesieve-12.7/src/arch/arm/sve.cpp 2025-03-01 10:11:34.000000000 +0100 @@ -0,0 +1,101 @@ +/// +/// @file sve.cpp +/// @brief Check if the CPU and OS support the SVE instruction set. +/// Compiling and linking of sve.cpp is tested by the CMake +/// build system using multiarch_sve_arm.cmake. +/// +/// In order to generate optimal code, we need to be able to +/// check if the ARM CPU supports the SVE instruction set +/// in a global initializer when the program is loaded. +/// +/// __builtin_cpu_supports() from Clang >= 19.0.0 does not +/// work when running in a global initializer. Usually the +/// workaround for this issue is to call __builtin_cpu_init() +/// before calling __builtin_cpu_supports(). However, +/// __builtin_cpu_init() is currently not supported on +/// ARM CPUs. +/// +/// TODO: Add macOS support once Apple ARM CPUs support the +/// SVE instruction yet. +/// +/// Copyright (C) 2025 Kim Walisch, <kim.wali...@gmail.com> +/// +/// This file is distributed under the BSD License. See the COPYING +/// file in the top level directory. +/// + +#if defined(_WIN32) + +#include <windows.h> + +namespace primesieve { + +bool has_arm_sve() +{ + return IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE); +} + +} // namespace + +#elif (defined(__linux__) || \ + defined(__gnu_linux__) || \ + defined(__ANDROID__)) && \ + __has_include(<sys/auxv.h>) + +#include <sys/auxv.h> +#include <errno.h> + +// The Linux kernel header <asm/hwcap.h> is not installed by +// default on some Linux distros. Hence we define HWCAP_SVE +// for ARM64 CPUs to get rid of the <asm/hwcap.h> dependency. +#if defined(__aarch64__) + #define HWCAP_SVE (1 << 22) +#else + #include <asm/hwcap.h> +#endif + +namespace primesieve { + +bool has_arm_sve() +{ + errno = 0; + + // getauxval() is supported by glibc >= 2.16 (since 2012), + // musl libc >= 1.1.0 (2014) and Android's bionic libc (2010). + // We check using CMake (multiarch_sve_arm.cmake) if + // sve.cpp (and getauxval()) compiles and links correctly. + unsigned long hwcaps = getauxval(AT_HWCAP); + + if (errno != 0) + return false; + + // Check if the Linux kernel and the CPU support + // the ARM SVE instruction set. + if (hwcaps & HWCAP_SVE) + return true; + else + return false; +} + +} // namespace + +#else + +namespace primesieve { + +bool has_arm_sve() +{ + // Since __builtin_cpu_init() and __builtin_cpu_supports() are + // currently (2025) not yet supported for ARM64 CPUs by both + // GCC and Clang, we only try them as a fallback option if + // none of the other more reliable methods work. + __builtin_cpu_init(); + if (__builtin_cpu_supports("sve")) + return true; + else + return false; +} + +} // namespace + +#endif diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/primesieve-12.6/src/arch/x86/cpuid.cpp new/primesieve-12.7/src/arch/x86/cpuid.cpp --- old/primesieve-12.6/src/arch/x86/cpuid.cpp 1970-01-01 01:00:00.000000000 +0100 +++ new/primesieve-12.7/src/arch/x86/cpuid.cpp 2025-03-01 10:11:34.000000000 +0100 @@ -0,0 +1,157 @@ +/// +/// @file cpuid.cpp +/// @brief CPUID for x86 and x86-64 CPUs. +/// +/// Copyright (C) 2024 Kim Walisch, <kim.wali...@gmail.com> +/// +/// This file is distributed under the BSD License. See the COPYING +/// file in the top level directory. +/// + +#include <stdint.h> + +#if defined(_MSC_VER) + #include <intrin.h> + #include <immintrin.h> +#endif + +// CPUID bits documentation: +// https://en.wikipedia.org/wiki/CPUID + +// %ebx bit flags +#define bit_AVX512F (1 << 16) +#define bit_AVX512BW (1 << 30) + +// %ecx bit flags +#define bit_AVX512VBMI (1 << 1) +#define bit_AVX512VBMI2 (1 << 6) +#define bit_POPCNT (1 << 23) + +// xgetbv bit flags +#define XSTATE_SSE (1 << 1) +#define XSTATE_YMM (1 << 2) +#define XSTATE_ZMM (7 << 5) + +namespace { + +void run_cpuid(int eax, int ecx, int* abcd) +{ +#if defined(_MSC_VER) + __cpuidex(abcd, eax, ecx); +#else + int ebx = 0; + int edx = 0; + + #if defined(__i386__) && \ + defined(__PIC__) + // In case of PIC under 32-bit EBX cannot be clobbered + __asm__ __volatile__("movl %%ebx, %%edi;" + "cpuid;" + "xchgl %%ebx, %%edi;" + : "+a" (eax), + "=D" (ebx), + "+c" (ecx), + "=d" (edx)); + #else + __asm__ __volatile__("cpuid" + : "+a" (eax), + "+b" (ebx), + "+c" (ecx), + "=d" (edx)); + #endif + + abcd[0] = eax; + abcd[1] = ebx; + abcd[2] = ecx; + abcd[3] = edx; +#endif +} + +// Get Value of Extended Control Register +uint64_t get_xcr0() +{ +#if defined(_MSC_VER) + return _xgetbv(0); +#else + uint32_t eax; + uint32_t edx; + + __asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0)); + return eax | (uint64_t(edx) << 32); +#endif +} + +} // namespace + +namespace primesieve { + +bool has_cpuid_popcnt() +{ + int abcd[4]; + run_cpuid(1, 0, abcd); + return (abcd[2] & bit_POPCNT) == bit_POPCNT; +} + +bool has_cpuid_avx512_bw() +{ + int abcd[4]; + + run_cpuid(1, 0, abcd); + + int osxsave_mask = (1 << 27); + + // Ensure OS supports extended processor state management + if ((abcd[2] & osxsave_mask) != osxsave_mask) + return false; + + uint64_t ymm_mask = XSTATE_SSE | XSTATE_YMM; + uint64_t zmm_mask = XSTATE_SSE | XSTATE_YMM | XSTATE_ZMM; + uint64_t xcr0 = get_xcr0(); + + // Check AVX OS support + if ((xcr0 & ymm_mask) != ymm_mask) + return false; + + // Check AVX512 OS support + if ((xcr0 & zmm_mask) != zmm_mask) + return false; + + run_cpuid(7, 0, abcd); + + // AND_PreSieveTables_avx512 requires AVX512F, AVX512BW + return ((abcd[1] & bit_AVX512F) == bit_AVX512F && + (abcd[1] & bit_AVX512BW) == bit_AVX512BW); +} + +bool has_cpuid_avx512_vbmi2() +{ + int abcd[4]; + + run_cpuid(1, 0, abcd); + + int osxsave_mask = (1 << 27); + + // Ensure OS supports extended processor state management + if ((abcd[2] & osxsave_mask) != osxsave_mask) + return false; + + uint64_t ymm_mask = XSTATE_SSE | XSTATE_YMM; + uint64_t zmm_mask = XSTATE_SSE | XSTATE_YMM | XSTATE_ZMM; + uint64_t xcr0 = get_xcr0(); + + // Check AVX OS support + if ((xcr0 & ymm_mask) != ymm_mask) + return false; + + // Check AVX512 OS support + if ((xcr0 & zmm_mask) != zmm_mask) + return false; + + run_cpuid(7, 0, abcd); + + // PrimeGenerator::fillNextPrimes_avx512() requires AVX512F, AVX512VBMI & AVX512VBMI2 + return ((abcd[1] & bit_AVX512F) == bit_AVX512F && + (abcd[2] & (bit_AVX512VBMI | bit_AVX512VBMI2)) == (bit_AVX512VBMI | bit_AVX512VBMI2)); +} + +} // namespace diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/primesieve-12.6/src/x86/cpuid.cpp new/primesieve-12.7/src/x86/cpuid.cpp --- old/primesieve-12.6/src/x86/cpuid.cpp 2024-11-18 08:51:32.000000000 +0100 +++ new/primesieve-12.7/src/x86/cpuid.cpp 1970-01-01 01:00:00.000000000 +0100 @@ -1,157 +0,0 @@ -/// -/// @file cpuid.cpp -/// @brief CPUID for x86 and x86-64 CPUs. -/// -/// Copyright (C) 2024 Kim Walisch, <kim.wali...@gmail.com> -/// -/// This file is distributed under the BSD License. See the COPYING -/// file in the top level directory. -/// - -#include <stdint.h> - -#if defined(_MSC_VER) - #include <intrin.h> - #include <immintrin.h> -#endif - -// CPUID bits documentation: -// https://en.wikipedia.org/wiki/CPUID - -// %ebx bit flags -#define bit_AVX512F (1 << 16) -#define bit_AVX512BW (1 << 30) - -// %ecx bit flags -#define bit_AVX512VBMI (1 << 1) -#define bit_AVX512VBMI2 (1 << 6) -#define bit_POPCNT (1 << 23) - -// xgetbv bit flags -#define XSTATE_SSE (1 << 1) -#define XSTATE_YMM (1 << 2) -#define XSTATE_ZMM (7 << 5) - -namespace { - -void run_cpuid(int eax, int ecx, int* abcd) -{ -#if defined(_MSC_VER) - __cpuidex(abcd, eax, ecx); -#else - int ebx = 0; - int edx = 0; - - #if defined(__i386__) && \ - defined(__PIC__) - // In case of PIC under 32-bit EBX cannot be clobbered - __asm__ __volatile__("movl %%ebx, %%edi;" - "cpuid;" - "xchgl %%ebx, %%edi;" - : "+a" (eax), - "=D" (ebx), - "+c" (ecx), - "=d" (edx)); - #else - __asm__ __volatile__("cpuid" - : "+a" (eax), - "+b" (ebx), - "+c" (ecx), - "=d" (edx)); - #endif - - abcd[0] = eax; - abcd[1] = ebx; - abcd[2] = ecx; - abcd[3] = edx; -#endif -} - -// Get Value of Extended Control Register -uint64_t get_xcr0() -{ -#if defined(_MSC_VER) - return _xgetbv(0); -#else - uint32_t eax; - uint32_t edx; - - __asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0)); - return eax | (uint64_t(edx) << 32); -#endif -} - -} // namespace - -namespace primesieve { - -bool has_cpuid_popcnt() -{ - int abcd[4]; - run_cpuid(1, 0, abcd); - return (abcd[2] & bit_POPCNT) == bit_POPCNT; -} - -bool has_cpuid_avx512_bw() -{ - int abcd[4]; - - run_cpuid(1, 0, abcd); - - int osxsave_mask = (1 << 27); - - // Ensure OS supports extended processor state management - if ((abcd[2] & osxsave_mask) != osxsave_mask) - return false; - - uint64_t ymm_mask = XSTATE_SSE | XSTATE_YMM; - uint64_t zmm_mask = XSTATE_SSE | XSTATE_YMM | XSTATE_ZMM; - uint64_t xcr0 = get_xcr0(); - - // Check AVX OS support - if ((xcr0 & ymm_mask) != ymm_mask) - return false; - - // Check AVX512 OS support - if ((xcr0 & zmm_mask) != zmm_mask) - return false; - - run_cpuid(7, 0, abcd); - - // AND_PreSieveTables_avx512 requires AVX512F, AVX512BW - return ((abcd[1] & bit_AVX512F) == bit_AVX512F && - (abcd[1] & bit_AVX512BW) == bit_AVX512BW); -} - -bool has_cpuid_avx512_vbmi2() -{ - int abcd[4]; - - run_cpuid(1, 0, abcd); - - int osxsave_mask = (1 << 27); - - // Ensure OS supports extended processor state management - if ((abcd[2] & osxsave_mask) != osxsave_mask) - return false; - - uint64_t ymm_mask = XSTATE_SSE | XSTATE_YMM; - uint64_t zmm_mask = XSTATE_SSE | XSTATE_YMM | XSTATE_ZMM; - uint64_t xcr0 = get_xcr0(); - - // Check AVX OS support - if ((xcr0 & ymm_mask) != ymm_mask) - return false; - - // Check AVX512 OS support - if ((xcr0 & zmm_mask) != zmm_mask) - return false; - - run_cpuid(7, 0, abcd); - - // PrimeGenerator::fillNextPrimes_avx512() requires AVX512F, AVX512VBMI & AVX512VBMI2 - return ((abcd[1] & bit_AVX512F) == bit_AVX512F && - (abcd[2] & (bit_AVX512VBMI | bit_AVX512VBMI2)) == (bit_AVX512VBMI | bit_AVX512VBMI2)); -} - -} // namespace