>From c2038d6d6825354a5137db9cdedabb2067d5b6d2 Mon Sep 17 00:00:00 2001 From: Harshitha Suresh <harshi...@multicorewareinc.com> Date: Wed, 9 Oct 2024 11:58:03 +0530 Subject: [PATCH] Revert "AArch64: Runtime CPU feature detection"
This commit is being reverted since I8MM ARM optimization feature detection flag is not yet supported by Windows. --- .../msys/make-x86_64-w64-mingw32-Makefiles.sh | 8 -- build/msys/toolchain-x86_64-w64-mingw32.cmake | 6 -- source/CMakeLists.txt | 21 ++--- source/common/CMakeLists.txt | 5 -- source/common/cpu.cpp | 85 +------------------ 5 files changed, 9 insertions(+), 116 deletions(-) delete mode 100644 build/msys/make-x86_64-w64-mingw32-Makefiles.sh delete mode 100644 build/msys/toolchain-x86_64-w64-mingw32.cmake diff --git a/build/msys/make-x86_64-w64-mingw32-Makefiles.sh b/build/msys/make-x86_64-w64-mingw32-Makefiles.sh deleted file mode 100644 index d98eced87..000000000 --- a/build/msys/make-x86_64-w64-mingw32-Makefiles.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/sh - -# This will generate a cross-compile environment, compiling an x86_64 -# Win64 target from a 32bit MinGW32 host environment. If your MinGW -# install is 64bit, you can use the native compiler batch file: -# make-Makefiles.sh - -cmake -G "MSYS Makefiles" -DCMAKE_TOOLCHAIN_FILE=toolchain-x86_64-w64-mingw32.cmake ../../source && cmake-gui ../../source diff --git a/build/msys/toolchain-x86_64-w64-mingw32.cmake b/build/msys/toolchain-x86_64-w64-mingw32.cmake deleted file mode 100644 index a3f768b7e..000000000 --- a/build/msys/toolchain-x86_64-w64-mingw32.cmake +++ /dev/null @@ -1,6 +0,0 @@ -SET(CMAKE_SYSTEM_NAME Windows) -SET(CMAKE_C_COMPILER x86_64-w64-mingw32-gcc) -SET(CMAKE_CXX_COMPILER x86_64-w64-mingw32-g++) -SET(CMAKE_RC_COMPILER x86_64-w64-mingw32-windres) -SET(CMAKE_RANLIB x86_64-w64-mingw32-ranlib) -SET(CMAKE_ASM_YASM_COMPILER yasm) diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt index cd19050c3..8a3128bb7 100755 --- a/source/CMakeLists.txt +++ b/source/CMakeLists.txt @@ -303,12 +303,10 @@ if(GCC) endif() endif() - set(ARM64_ARCH_ARGS "-O3") if(CPU_HAS_NEON_DOTPROD) # Neon DotProd is mandatory from Armv8.4. message(STATUS "Found Neon DotProd") - set(ARM64_ARCH_ARGS -march=armv8.2-a+dotprod) - set(ARM_ARGS -O3) + set(ARM_ARGS -O3 -march=armv8.2-a+dotprod) add_definitions(-DHAVE_NEON_DOTPROD=1) endif() if(CPU_HAS_NEON_I8MM) @@ -318,8 +316,7 @@ if(GCC) if(NOT CPU_HAS_NEON_DOTPROD) message(FATAL_ERROR "Unsupported AArch64 feature combination (Neon I8MM without Neon DotProd)") endif() - set(ARM64_ARCH_ARGS -march=armv8.2-a+dotprod+i8mm) - set(ARM_ARGS -O3) + set(ARM_ARGS -O3 -march=armv8.2-a+dotprod+i8mm) add_definitions(-DHAVE_NEON_I8MM=1) endif() if(CPU_HAS_SVE) @@ -328,15 +325,13 @@ if(GCC) if(NOT CPU_HAS_NEON_I8MM) message(FATAL_ERROR "Unsupported AArch64 feature combination (SVE without Neon I8MM)") endif() - set(ARM64_ARCH_ARGS -march=armv8.2-a+dotprod+i8mm+sve) - set(ARM_ARGS -O3) + set(ARM_ARGS -O3 -march=armv8.2-a+dotprod+i8mm+sve) add_definitions(-DHAVE_SVE=1) endif() if(CPU_HAS_SVE2) message(STATUS "Found SVE2") # SVE2 is only available from Armv9.0, and armv9-a implies +dotprod - set(ARM64_ARCH_ARGS -march=armv9-a+i8mm+sve2) - set(ARM_ARGS -O3) + set(ARM_ARGS -O3 -march=armv8.2-a+dotprod+i8mm+sve) add_definitions(-DHAVE_SVE2=1) endif() set(ARM_ARGS ${ARM_ARGS} -fPIC) @@ -701,7 +696,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY) add_custom_command( OUTPUT ${ASM}.${SUFFIX} COMMAND ${CMAKE_CXX_COMPILER} - ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX} + ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX} DEPENDS ${ASM_SRC}) endforeach() if(CPU_HAS_SVE2) @@ -712,7 +707,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY) add_custom_command( OUTPUT ${ASM}.${SUFFIX} COMMAND ${CMAKE_CXX_COMPILER} - ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX} + ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX} DEPENDS ${ASM_SRC}) endforeach() endif() @@ -724,7 +719,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY) add_custom_command( OUTPUT ${ASM}.${SUFFIX} COMMAND ${CMAKE_CXX_COMPILER} - ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX} + ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX} DEPENDS ${ASM_SRC}) endforeach() endif() @@ -736,7 +731,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY) add_custom_command( OUTPUT ${ASM}.${SUFFIX} COMMAND ${CMAKE_CXX_COMPILER} - ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX} + ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX} DEPENDS ${ASM_SRC}) endforeach() endif() diff --git a/source/common/CMakeLists.txt b/source/common/CMakeLists.txt index 33025cada..dc4a74107 100644 --- a/source/common/CMakeLists.txt +++ b/source/common/CMakeLists.txt @@ -123,34 +123,29 @@ if(ENABLE_ASSEMBLY AND (ARM64 OR CROSS_COMPILE_ARM64)) set(ARM_ASMS_NEON_DOTPROD "${A_SRCS_NEON_DOTPROD}" CACHE INTERNAL "Arm Assembly Sources that use the Neon DotProd extension") foreach(SRC ${C_SRCS_NEON}) set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC}) - set_source_files_properties( ${SRC} PROPERTIES COMPILE_FLAGS ${ARM64_ARCH_ARGS} ) endforeach() if(CPU_HAS_NEON_I8MM) foreach(SRC ${C_SRCS_NEON_I8MM}) set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC}) - set_source_files_properties( aarch64/${SRC} PROPERTIES COMPILE_FLAGS ${ARM64_ARCH_ARGS} ) endforeach() endif() if(CPU_HAS_NEON_DOTPROD) foreach(SRC ${C_SRCS_NEON_DOTPROD}) set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC}) - set_source_files_properties( aarch64/${SRC} PROPERTIES COMPILE_FLAGS ${ARM64_ARCH_ARGS} ) endforeach() endif() if(CPU_HAS_SVE AND HAVE_SVE_BRIDGE) foreach(SRC ${C_SRCS_SVE}) set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC}) - set_source_files_properties( aarch64/${SRC} PROPERTIES COMPILE_FLAGS ${ARM64_ARCH_ARGS} ) endforeach() endif() if(CPU_HAS_SVE2 AND HAVE_SVE_BRIDGE) foreach(SRC ${C_SRCS_SVE2}) set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC}) - set_source_files_properties( aarch64/${SRC} PROPERTIES COMPILE_FLAGS ${ARM64_ARCH_ARGS} ) endforeach() endif() diff --git a/source/common/cpu.cpp b/source/common/cpu.cpp index c1fa928e4..d18aeb8d2 100644 --- a/source/common/cpu.cpp +++ b/source/common/cpu.cpp @@ -391,8 +391,7 @@ uint32_t cpu_detect(bool benableavx512) #elif X265_ARCH_ARM64 -#if defined(_MSC_VER) || defined(__APPLE__) -uint32_t cpu_detect(bool /*benableavx512*/) +uint32_t cpu_detect(bool benableavx512) { int flags = 0; @@ -417,88 +416,6 @@ uint32_t cpu_detect(bool /*benableavx512*/) return flags; } -// TODO: Remove isOryonCPU() once Windows defines PF_ flag for I8MM on supported ARM64 devices -#elif defined(__MINGW64__) // Windows+Aarch64 - -#include <windows.h> -#include <processthreadsapi.h> - -bool isOryonCPU() -{ - - char processorName[128]; - DWORD bufferSize = 128; - - LONG result = RegGetValue(HKEY_LOCAL_MACHINE, "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", "ProcessorNameString", RRF_RT_ANY, NULL, (PVOID)&processorName, &bufferSize); - if (strstr(processorName, "Oryon") != NULL) - { - return true; - } - else - { - return false; - } -} -uint32_t cpu_detect(bool /*benableavx512*/) -{ - - int flags = 0; - -#ifdef ENABLE_ASSEMBLY - #if HAVE_NEON - flags |= X265_CPU_NEON; // All of ARM64 has NEON - #endif - #if HAVE_NEON_DOTPROD && defined(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) - flags |= IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) ? X265_CPU_NEON_DOTPROD : 0; - #endif - #if HAVE_NEON_I8MM - flags |= isOryonCPU() ? X265_CPU_NEON_I8MM : 0; - #endif - #if HAVE_SVE && defined(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE) - flags |= IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE) ? X265_CPU_SVE : 0; - #endif - #if HAVE_SVE2 && defined(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE) - flags |= IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE) ? X265_CPU_SVE2 : 0; - #endif -#endif - - return flags; -} // end of Windows+Aarch64 - -#else // Linux+Aarch64 - -#include <asm/hwcap.h> -#include <sys/auxv.h> - -uint32_t cpu_detect(bool /*benableavx512*/) -{ - unsigned long hwcaps = getauxval(AT_HWCAP); - unsigned long hwcaps2 = getauxval(AT_HWCAP2); - - int flags = 0; - -#ifdef ENABLE_ASSEMBLY - #if HAVE_NEON - flags |= X265_CPU_NEON; // All of ARM64 has NEON - #endif - #if HAVE_NEON_DOTPROD - flags |= (hwcaps & HWCAP_ASIMDDP ? X265_CPU_NEON_DOTPROD : 0); - #endif - #if HAVE_NEON_I8MM - flags |= (hwcaps2 & HWCAP2_I8MM ? X265_CPU_NEON_I8MM : 0); - #endif - #if HAVE_SVE - flags |= (hwcaps & HWCAP_SVE ? X265_CPU_SVE : 0); - #endif - #if HAVE_SVE2 - flags |= (hwcaps2 & HWCAP2_SVE2 ? X265_CPU_SVE2 : 0); - #endif -#endif - - return flags; -} -#endif // end of Linux+AArch64 - #elif X265_ARCH_POWER8 uint32_t cpu_detect(bool benableavx512) -- 2.36.0.windows.1
0001-Revert-AArch64-Runtime-CPU-feature-detection.patch
Description: Binary data
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel