Add run-time CPU feature detection for AArch64 ISA extensions on Windows platforms. --- source/CMakeLists.txt | 71 ++++++++++++++++++++++++------------- source/common/aarch64/cpu.h | 57 +++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+), 24 deletions(-)
diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt index 52870452c..4205db40e 100755 --- a/source/CMakeLists.txt +++ b/source/CMakeLists.txt @@ -88,7 +88,7 @@ elseif(ARM64MATCH GREATER "-1") option(AARCH64_WARNINGS_AS_ERRORS "Build with -Werror for AArch64 Intrinsics files" OFF) option(AARCH64_RUNTIME_CPU_DETECT "Enable AArch64 run-time CPU feature detection" ON) - if(NOT CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin") + if(NOT CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin|Windows") set(AARCH64_RUNTIME_CPU_DETECT OFF CACHE BOOL "" FORCE) message(STATUS "Run-time CPU feature detection unsupported on this platform") endif() @@ -306,6 +306,52 @@ if(GCC) endif() endif() + if(ENABLE_SVE OR ENABLE_SVE2) + set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) + string(APPEND CMAKE_REQUIRED_FLAGS " ${AARCH64_SVE_FLAG}") + set(OLD_CMAKE_TRY_COMPILE_TARGET_TYPE ${CMAKE_TRY_COMPILE_TARGET_TYPE}) + set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) + + # Check whether the compiler can compile SVE functions that require + # backup/restore of SVE registers according to AAPCS. + # https://github.com/llvm/llvm-project/issues/80009. + set(SVE_COMPILATION_TEST " +#include <arm_sve.h> +void other(); +svfloat32_t func(svfloat32_t a) { + other(); + return a; +} +int main() { return 0; }") + + check_c_source_compiles("${SVE_COMPILATION_TEST}" SVE_COMPILATION_C_TEST_COMPILED) + check_cxx_source_compiles("${SVE_COMPILATION_TEST}" SVE_COMPILATION_CXX_TEST_COMPILED) + + # Check if arm_neon_sve_bridge.h is available. + set(SVE_HEADER_TEST " +#ifndef __ARM_NEON_SVE_BRIDGE +#error 1 +#endif +#include <arm_sve.h> +#include <arm_neon_sve_bridge.h> +int main() { return 0; }") + check_c_source_compiles("${SVE_HEADER_TEST}" SVE_HEADER_C_TEST_COMPILED) + check_cxx_source_compiles("${SVE_HEADER_TEST}" SVE_HEADER_CXX_TEST_COMPILED) + + set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS}) + set(CMAKE_TRY_COMPILE_TARGET_TYPE ${OLD_CMAKE_TRY_COMPILE_TARGET_TYPE}) + if (SVE_COMPILATION_C_TEST_COMPILED AND SVE_COMPILATION_CXX_TEST_COMPILED) + if (SVE_HEADER_C_TEST_COMPILED AND SVE_HEADER_CXX_TEST_COMPILED) + add_definitions(-DHAVE_SVE_BRIDGE=1) + set(HAVE_SVE_BRIDGE 1) + endif() + else() + set(ENABLE_SVE OFF CACHE BOOL "" FORCE) + set(ENABLE_SVE2 OFF CACHE BOOL "" FORCE) + message(STATUS "Disabling SVE and SVE2") + endif() + endif() + # Impose constraint that disabling one extension disables all 'higher order' ones. if(NOT ENABLE_NEON) message(STATUS "Disabling Neon") @@ -359,29 +405,6 @@ if(GCC) if(CC_HAS_FLAX_VEC_CONV_NONE) set(ARM_ARGS ${ARM_ARGS} -flax-vector-conversions=none) endif() - if(CPU_HAS_SVE) - set(SVE_HEADER_TEST " -#ifndef __ARM_NEON_SVE_BRIDGE -#error 1 -#endif -#include <arm_sve.h> -#include <arm_neon_sve_bridge.h> -int main() { return 0; }") - set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) - # CMAKE_REQUIRED_FLAGS requires a space-delimited string, whereas - # ARM_ARGS is defined and used elsewhere as a ;-list. - # Add `-march=...+sve` so the test functions correctly with Clang. - foreach(ARM_ARG ${ARM_ARGS} ${AARCH64_SVE_FLAG}) - string(APPEND CMAKE_REQUIRED_FLAGS " ${ARM_ARG}") - endforeach() - check_c_source_compiles("${SVE_HEADER_TEST}" SVE_HEADER_C_TEST_COMPILED) - check_cxx_source_compiles("${SVE_HEADER_TEST}" SVE_HEADER_CXX_TEST_COMPILED) - set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS}) - if(SVE_HEADER_C_TEST_COMPILED AND SVE_HEADER_CXX_TEST_COMPILED) - add_definitions(-DHAVE_SVE_BRIDGE=1) - set(HAVE_SVE_BRIDGE 1) - endif() - endif() endif() if(ENABLE_PIC) list(APPEND ARM_ARGS -DPIC) diff --git a/source/common/aarch64/cpu.h b/source/common/aarch64/cpu.h index 857ba980a..c61b86359 100644 --- a/source/common/aarch64/cpu.h +++ b/source/common/aarch64/cpu.h @@ -62,6 +62,63 @@ static inline int aarch64_get_cpu_flags() return flags; } +#elif defined(_WIN32) + +#include <windows.h> + +static inline int aarch64_get_cpu_flags() +{ + int flags = 0; +// IsProcessorFeaturePresent() parameter documentation: +// https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent#parameters +#if HAVE_NEON + flags |= X265_CPU_NEON; +#endif // HAVE_NEON +#if HAVE_NEON_DOTPROD +// Support for PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE was added in Windows SDK +// 20348, supported by Windows 11 and Windows Server 2022. +#if defined(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) + if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)) + { + flags |= X265_CPU_NEON_DOTPROD; + } +#endif // defined(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) +#endif // HAVE_NEON_DOTPROD +#if HAVE_NEON_I8MM +// Support for PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE was added in Windows SDK +// 26100. +#if defined(PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE) + // There's no PF_* flag yet that indicates whether Neon I8MM is available + // or not. But if SVE_I8MM is available, that also implies that Neon I8MM + // is available. + if (IsProcessorFeaturePresent(PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE)) + { + flags |= X265_CPU_NEON_I8MM; + } +#endif // defined(PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE) +#endif // HAVE_NEON_I8MM +#if HAVE_SVE +// Support for PF_ARM_SVE_INSTRUCTIONS_AVAILABLE was added in Windows SDK 26100. +#if defined(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE) + if (IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE)) + { + flags |= X265_CPU_SVE; + } +#endif // defined(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE) +#endif // HAVE_SVE +#if HAVE_SVE2 +// Support for PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE was added in Windows SDK +// 26100. +#if defined(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE) + if (IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE)) + { + flags |= X265_CPU_SVE2; + } +#endif // defined(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE) +#endif // HAVE_SVE2 + return flags; +} + #elif defined(__linux__) #include <sys/auxv.h> -- 2.42.1
>From 6b81ac8521abcab83777d87ae2ac3d79c914ddf0 Mon Sep 17 00:00:00 2001 Message-ID: <6b81ac8521abcab83777d87ae2ac3d79c914ddf0.1730906531.git.hari.lim...@arm.com> In-Reply-To: <cover.1729809914.git.hari.lim...@arm.com> From: Hari Limaye <hari.lim...@arm.com> Date: Wed, 23 Oct 2024 16:54:59 +0100 Subject: [PATCH v2 4/6] AArch64: Add Windows run-time CPU feature detection Add run-time CPU feature detection for AArch64 ISA extensions on Windows platforms. --- source/CMakeLists.txt | 71 ++++++++++++++++++++++++------------- source/common/aarch64/cpu.h | 57 +++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+), 24 deletions(-) diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt index 52870452c..4205db40e 100755 --- a/source/CMakeLists.txt +++ b/source/CMakeLists.txt @@ -88,7 +88,7 @@ elseif(ARM64MATCH GREATER "-1") option(AARCH64_WARNINGS_AS_ERRORS "Build with -Werror for AArch64 Intrinsics files" OFF) option(AARCH64_RUNTIME_CPU_DETECT "Enable AArch64 run-time CPU feature detection" ON) - if(NOT CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin") + if(NOT CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin|Windows") set(AARCH64_RUNTIME_CPU_DETECT OFF CACHE BOOL "" FORCE) message(STATUS "Run-time CPU feature detection unsupported on this platform") endif() @@ -306,6 +306,52 @@ if(GCC) endif() endif() + if(ENABLE_SVE OR ENABLE_SVE2) + set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) + string(APPEND CMAKE_REQUIRED_FLAGS " ${AARCH64_SVE_FLAG}") + set(OLD_CMAKE_TRY_COMPILE_TARGET_TYPE ${CMAKE_TRY_COMPILE_TARGET_TYPE}) + set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) + + # Check whether the compiler can compile SVE functions that require + # backup/restore of SVE registers according to AAPCS. + # https://github.com/llvm/llvm-project/issues/80009. + set(SVE_COMPILATION_TEST " +#include <arm_sve.h> +void other(); +svfloat32_t func(svfloat32_t a) { + other(); + return a; +} +int main() { return 0; }") + + check_c_source_compiles("${SVE_COMPILATION_TEST}" SVE_COMPILATION_C_TEST_COMPILED) + check_cxx_source_compiles("${SVE_COMPILATION_TEST}" SVE_COMPILATION_CXX_TEST_COMPILED) + + # Check if arm_neon_sve_bridge.h is available. + set(SVE_HEADER_TEST " +#ifndef __ARM_NEON_SVE_BRIDGE +#error 1 +#endif +#include <arm_sve.h> +#include <arm_neon_sve_bridge.h> +int main() { return 0; }") + check_c_source_compiles("${SVE_HEADER_TEST}" SVE_HEADER_C_TEST_COMPILED) + check_cxx_source_compiles("${SVE_HEADER_TEST}" SVE_HEADER_CXX_TEST_COMPILED) + + set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS}) + set(CMAKE_TRY_COMPILE_TARGET_TYPE ${OLD_CMAKE_TRY_COMPILE_TARGET_TYPE}) + if (SVE_COMPILATION_C_TEST_COMPILED AND SVE_COMPILATION_CXX_TEST_COMPILED) + if (SVE_HEADER_C_TEST_COMPILED AND SVE_HEADER_CXX_TEST_COMPILED) + add_definitions(-DHAVE_SVE_BRIDGE=1) + set(HAVE_SVE_BRIDGE 1) + endif() + else() + set(ENABLE_SVE OFF CACHE BOOL "" FORCE) + set(ENABLE_SVE2 OFF CACHE BOOL "" FORCE) + message(STATUS "Disabling SVE and SVE2") + endif() + endif() + # Impose constraint that disabling one extension disables all 'higher order' ones. if(NOT ENABLE_NEON) message(STATUS "Disabling Neon") @@ -359,29 +405,6 @@ if(GCC) if(CC_HAS_FLAX_VEC_CONV_NONE) set(ARM_ARGS ${ARM_ARGS} -flax-vector-conversions=none) endif() - if(CPU_HAS_SVE) - set(SVE_HEADER_TEST " -#ifndef __ARM_NEON_SVE_BRIDGE -#error 1 -#endif -#include <arm_sve.h> -#include <arm_neon_sve_bridge.h> -int main() { return 0; }") - set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) - # CMAKE_REQUIRED_FLAGS requires a space-delimited string, whereas - # ARM_ARGS is defined and used elsewhere as a ;-list. - # Add `-march=...+sve` so the test functions correctly with Clang. - foreach(ARM_ARG ${ARM_ARGS} ${AARCH64_SVE_FLAG}) - string(APPEND CMAKE_REQUIRED_FLAGS " ${ARM_ARG}") - endforeach() - check_c_source_compiles("${SVE_HEADER_TEST}" SVE_HEADER_C_TEST_COMPILED) - check_cxx_source_compiles("${SVE_HEADER_TEST}" SVE_HEADER_CXX_TEST_COMPILED) - set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS}) - if(SVE_HEADER_C_TEST_COMPILED AND SVE_HEADER_CXX_TEST_COMPILED) - add_definitions(-DHAVE_SVE_BRIDGE=1) - set(HAVE_SVE_BRIDGE 1) - endif() - endif() endif() if(ENABLE_PIC) list(APPEND ARM_ARGS -DPIC) diff --git a/source/common/aarch64/cpu.h b/source/common/aarch64/cpu.h index 857ba980a..c61b86359 100644 --- a/source/common/aarch64/cpu.h +++ b/source/common/aarch64/cpu.h @@ -62,6 +62,63 @@ static inline int aarch64_get_cpu_flags() return flags; } +#elif defined(_WIN32) + +#include <windows.h> + +static inline int aarch64_get_cpu_flags() +{ + int flags = 0; +// IsProcessorFeaturePresent() parameter documentation: +// https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent#parameters +#if HAVE_NEON + flags |= X265_CPU_NEON; +#endif // HAVE_NEON +#if HAVE_NEON_DOTPROD +// Support for PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE was added in Windows SDK +// 20348, supported by Windows 11 and Windows Server 2022. +#if defined(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) + if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)) + { + flags |= X265_CPU_NEON_DOTPROD; + } +#endif // defined(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) +#endif // HAVE_NEON_DOTPROD +#if HAVE_NEON_I8MM +// Support for PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE was added in Windows SDK +// 26100. +#if defined(PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE) + // There's no PF_* flag yet that indicates whether Neon I8MM is available + // or not. But if SVE_I8MM is available, that also implies that Neon I8MM + // is available. + if (IsProcessorFeaturePresent(PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE)) + { + flags |= X265_CPU_NEON_I8MM; + } +#endif // defined(PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE) +#endif // HAVE_NEON_I8MM +#if HAVE_SVE +// Support for PF_ARM_SVE_INSTRUCTIONS_AVAILABLE was added in Windows SDK 26100. +#if defined(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE) + if (IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE)) + { + flags |= X265_CPU_SVE; + } +#endif // defined(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE) +#endif // HAVE_SVE +#if HAVE_SVE2 +// Support for PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE was added in Windows SDK +// 26100. +#if defined(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE) + if (IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE)) + { + flags |= X265_CPU_SVE2; + } +#endif // defined(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE) +#endif // HAVE_SVE2 + return flags; +} + #elif defined(__linux__) #include <sys/auxv.h> -- 2.42.1
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel