Hi, Thank you for putting up this patch.
In “source/common/cpu.cpp”, the test for Neon_I8MM is checking the wrong flag. The line: + flags |= (hwcaps2 & HWCAP2_SVEI8MM ? X265_CPU_NEON_I8MM : 0); Should be: + flags |= (hwcaps2 & HWCAP2_I8MM ? X265_CPU_NEON_I8MM : 0); Many thanks, Hari From: x265-devel <x265-devel-boun...@videolan.org> on behalf of Dash Santosh <dash.sathyanaraya...@multicorewareinc.com> Date: Thursday, 26 September 2024 at 07:13 To: x265-devel@videolan.org <x265-devel@videolan.org> Subject: [x265] [PATCH] AArch64: Runtime CPU feature detection >From e9614d170f93f3ad4f01e95abfed0a260f218bd5 Mon Sep 17 00:00:00 2001 From: Min Chen <chenm...@163.com<mailto:chenm...@163.com>> Date: Sat, 14 Sep 2024 14:25:28 -0700 Subject: [PATCH] AArch64: Runtime CPU feature detection --- source/CMakeLists.txt | 20 ++++++++++++-------- source/common/CMakeLists.txt | 5 +++++ source/common/cpu.cpp | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 8 deletions(-) diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt index 37b83f959..32a99206f 100755 --- a/source/CMakeLists.txt +++ b/source/CMakeLists.txt @@ -306,7 +306,8 @@ if(GCC) if(CPU_HAS_NEON_DOTPROD) # Neon DotProd is mandatory from Armv8.4. message(STATUS "Found Neon DotProd") - set(ARM_ARGS -O3 -march=armv8.2-a+dotprod) + set(ARM64_ARCH_ARGS -march=armv8.2-a+dotprod) + set(ARM_ARGS -O3) add_definitions(-DHAVE_NEON_DOTPROD=1) endif() if(CPU_HAS_NEON_I8MM) @@ -316,7 +317,8 @@ if(GCC) if(NOT CPU_HAS_NEON_DOTPROD) message(FATAL_ERROR "Unsupported AArch64 feature combination (Neon I8MM without Neon DotProd)") endif() - set(ARM_ARGS -O3 -march=armv8.2-a+dotprod+i8mm) + set(ARM64_ARCH_ARGS -march=armv8.2-a+dotprod+i8mm) + set(ARM_ARGS -O3) add_definitions(-DHAVE_NEON_I8MM=1) endif() if(CPU_HAS_SVE) @@ -325,13 +327,15 @@ if(GCC) if(NOT CPU_HAS_NEON_I8MM) message(FATAL_ERROR "Unsupported AArch64 feature combination (SVE without Neon I8MM)") endif() - set(ARM_ARGS -O3 -march=armv8.2-a+dotprod+i8mm+sve) + set(ARM64_ARCH_ARGS -march=armv8.2-a+dotprod+i8mm+sve) + set(ARM_ARGS -O3) add_definitions(-DHAVE_SVE=1) endif() if(CPU_HAS_SVE2) message(STATUS "Found SVE2") # SVE2 is only available from Armv9.0, and armv9-a implies +dotprod - set(ARM_ARGS -O3 -march=armv9-a+i8mm+sve2) + set(ARM64_ARCH_ARGS -march=armv9-a+i8mm+sve2) + set(ARM_ARGS -O3) add_definitions(-DHAVE_SVE2=1) endif() set(ARM_ARGS ${ARM_ARGS} -fPIC) @@ -692,7 +696,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY) add_custom_command( OUTPUT ${ASM}.${SUFFIX} COMMAND ${CMAKE_CXX_COMPILER} - ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX} + ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX} DEPENDS ${ASM_SRC}) endforeach() if(CPU_HAS_SVE2) @@ -703,7 +707,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY) add_custom_command( OUTPUT ${ASM}.${SUFFIX} COMMAND ${CMAKE_CXX_COMPILER} - ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX} + ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX} DEPENDS ${ASM_SRC}) endforeach() endif() @@ -715,7 +719,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY) add_custom_command( OUTPUT ${ASM}.${SUFFIX} COMMAND ${CMAKE_CXX_COMPILER} - ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX} + ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX} DEPENDS ${ASM_SRC}) endforeach() endif() @@ -727,7 +731,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY) add_custom_command( OUTPUT ${ASM}.${SUFFIX} COMMAND ${CMAKE_CXX_COMPILER} - ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX} + ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX} DEPENDS ${ASM_SRC}) endforeach() endif() diff --git a/source/common/CMakeLists.txt b/source/common/CMakeLists.txt index dc4a74107..33025cada 100644 --- a/source/common/CMakeLists.txt +++ b/source/common/CMakeLists.txt @@ -123,29 +123,34 @@ if(ENABLE_ASSEMBLY AND (ARM64 OR CROSS_COMPILE_ARM64)) set(ARM_ASMS_NEON_DOTPROD "${A_SRCS_NEON_DOTPROD}" CACHE INTERNAL "Arm Assembly Sources that use the Neon DotProd extension") foreach(SRC ${C_SRCS_NEON}) set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC}) + set_source_files_properties( ${SRC} PROPERTIES COMPILE_FLAGS ${ARM64_ARCH_ARGS} ) endforeach() if(CPU_HAS_NEON_I8MM) foreach(SRC ${C_SRCS_NEON_I8MM}) set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC}) + set_source_files_properties( aarch64/${SRC} PROPERTIES COMPILE_FLAGS ${ARM64_ARCH_ARGS} ) endforeach() endif() if(CPU_HAS_NEON_DOTPROD) foreach(SRC ${C_SRCS_NEON_DOTPROD}) set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC}) + set_source_files_properties( aarch64/${SRC} PROPERTIES COMPILE_FLAGS ${ARM64_ARCH_ARGS} ) endforeach() endif() if(CPU_HAS_SVE AND HAVE_SVE_BRIDGE) foreach(SRC ${C_SRCS_SVE}) set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC}) + set_source_files_properties( aarch64/${SRC} PROPERTIES COMPILE_FLAGS ${ARM64_ARCH_ARGS} ) endforeach() endif() if(CPU_HAS_SVE2 AND HAVE_SVE_BRIDGE) foreach(SRC ${C_SRCS_SVE2}) set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC}) + set_source_files_properties( aarch64/${SRC} PROPERTIES COMPILE_FLAGS ${ARM64_ARCH_ARGS} ) endforeach() endif() diff --git a/source/common/cpu.cpp b/source/common/cpu.cpp index 61cdaadfb..24c60ff0e 100644 --- a/source/common/cpu.cpp +++ b/source/common/cpu.cpp @@ -391,6 +391,8 @@ uint32_t cpu_detect(bool benableavx512) #elif X265_ARCH_ARM64 +// TODO: Support ARM on Windows +#if _MSC_VER uint32_t cpu_detect(bool benableavx512) { int flags = 0; @@ -413,6 +415,37 @@ uint32_t cpu_detect(bool benableavx512) return flags; } +#else // Linux+Aarch64 + +#include <asm/hwcap.h> +#include <sys/auxv.h> + +uint32_t cpu_detect(bool benableavx512) +{ + unsigned long hwcaps = getauxval(AT_HWCAP); + unsigned long hwcaps2 = getauxval(AT_HWCAP2); + + int flags = 0; + + #if HAVE_NEON + flags |= X265_CPU_NEON; // All of ARM64 has NEON + #endif + #if HAVE_NEON_DOTPROD + flags |= (hwcaps & HWCAP_ASIMDDP ? X265_CPU_NEON_DOTPROD : 0); + #endif + #if HAVE_NEON_I8MM + flags |= (hwcaps2 & HWCAP2_SVEI8MM ? X265_CPU_NEON_I8MM : 0); + #endif + #if HAVE_SVE + flags |= (hwcaps & HWCAP_SVE ? X265_CPU_SVE : 0); + #endif + #if HAVE_SVE2 + flags |= (hwcaps2 & HWCAP2_SVE2 ? X265_CPU_SVE2 : 0); + #endif + + return flags; +} +#endif // end of Linux+AArch64 #elif X265_ARCH_POWER8 -- 2.43.0.windows.1 -- [Image removed by sender.]<https://multicorewareinc.com/> [Image removed by sender.] <https://www.linkedin.com/company/multicoreware-inc/> [Image removed by sender.] <https://twitter.com/MulticoreWare> [Image removed by sender.] <https://www.facebook.com/multicoreware> [Image removed by sender.] <https://www.youtube.com/channel/UCXZ1A1MzS5JwBqwBkNfsBBw?sub_confirmation=1> [Image removed by sender.] <https://www.instagram.com/multicoreware.inc/> Dash Santosh Research Engineer, Video Engineering Mobile: +91 78679 43737 IndiQube Echo Point, Avinashi Road Coimbatore - 641 014 [Image removed by sender.]
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel