commit:     1f2eceb5dfcce9899649870caac59d62055e7b82
Author:     stefson <herrtimson <AT> yahoo <DOT> de>
AuthorDate: Tue Mar  7 11:01:52 2023 +0000
Commit:     Sam James <sam <AT> gentoo <DOT> org>
CommitDate: Tue Mar  7 11:44:19 2023 +0000
URL:        https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=1f2eceb5

dev-cpp/highway: fix compile without neon optimization on armv7

Closes: https://bugs.gentoo.org/869077
Signed-off-by: Steffen Kuhn <nielson2 <AT> yandex.com>
Closes: https://github.com/gentoo/gentoo/pull/29964
Signed-off-by: Sam James <sam <AT> gentoo.org>

 ...ile-for-armv7-targets-with-vfp4-and-lower.patch | 123 +++++++++++++++++++++
 dev-cpp/highway/highway-1.0.1-r1.ebuild            |   6 +-
 dev-cpp/highway/highway-1.0.3.ebuild               |   4 +
 3 files changed, 132 insertions(+), 1 deletion(-)

diff --git 
a/dev-cpp/highway/files/0001-fix-compile-for-armv7-targets-with-vfp4-and-lower.patch
 
b/dev-cpp/highway/files/0001-fix-compile-for-armv7-targets-with-vfp4-and-lower.patch
new file mode 100644
index 000000000000..ebf448cfbb24
--- /dev/null
+++ 
b/dev-cpp/highway/files/0001-fix-compile-for-armv7-targets-with-vfp4-and-lower.patch
@@ -0,0 +1,123 @@
+https://github.com/google/highway/commit/dc63f813c465f3bf95cb5b98f01aeed28b81173c
+https://github.com/google/highway/pull/1143
+
+https://github.com/google/highway/issues/834
+https://github.com/google/highway/issues/1032
+
+https://bugs.gentoo.org/869077
+
+From dc63f813c465f3bf95cb5b98f01aeed28b81173c Mon Sep 17 00:00:00 2001
+From: Julien Olivain <[email protected]>
+Date: Mon, 20 Feb 2023 23:22:28 +0100
+Subject: [PATCH] Fix compilation for armv7 targets with vfp < v4 and gcc >= 8
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+When using a armv7 gcc >= 8 toolchain (like [1]) with Highway
+configured with -DHWY_CMAKE_ARM7=OFF and HWY_ENABLE_CONTRIB=ON,
+compilation fails with error:
+
+    In file included from /build/highway-1.0.3/hwy/ops/arm_neon-inl.h:33,
+                     from /build/highway-1.0.3/hwy/highway.h:358,
+                     from 
/build/highway-1.0.3/hwy/contrib/sort/shared-inl.h:104,
+                     from 
/build/highway-1.0.3/hwy/contrib/sort/traits128-inl.h:27,
+                     from 
/build/highway-1.0.3/hwy/contrib/sort/vqsort_128d.cc:23,
+                     from /build/highway-1.0.3/hwy/foreach_target.h:81,
+                     from 
/build/highway-1.0.3/hwy/contrib/sort/vqsort_128d.cc:20:
+    
/toolchain/lib/gcc/arm-buildroot-linux-gnueabihf/12.2.0/include/arm_neon.h: In 
function ‘void hwy::N_NEON::StoreU(Vec128<long long unsigned int, 2>, 
Full128<long long unsigned int>, uint64_t*)’:
+    
/toolchain/lib/gcc/arm-buildroot-linux-gnueabihf/12.2.0/include/arm_neon.h:11052:1:
 error: inlining failed in call to ‘always_inline’ ‘void vst1q_u64(uint64_t*, 
uint64x2_t)’: target specific option mismatch
+    11052 | vst1q_u64 (uint64_t * __a, uint64x2_t __b)
+          | ^~~~~~~~~
+    /build/highway-1.0.3/hwy/ops/arm_neon-inl.h:2786:12: note: called from here
+     2786 |   vst1q_u64(unaligned, v.raw);
+          |   ~~~~~~~~~^~~~~~~~~~~~~~~~~~
+
+The same errors happen when configured with HWY_ENABLE_EXAMPLES=ON,
+or from client libraries like libjxl (at other places).
+
+The issue is that Highway Arm NEON ops have a dependency on the
+Advanced SIMD (Neon) v2 and the VFPv4 floating-point instructions.
+The SIMD (Neon) v1 and VFPv3 instructions are not supported.
+
+There was several attempts to fix variants of this issues.
+See #834 and #1032.
+
+HWY_NEON target is selected only if __ARM_NEON is defined. See:
+https://github.com/google/highway/blob/1.0.3/hwy/detect_targets.h#L251
+
+This test is not sufficient since __ARM_NEON will be predefined in
+any cases when Neon is enabled (neon-vfpv3, neon-vfpv4).
+
+The issue is that HWY_CMAKE_ARM7=ON implies VFPv4 / NEON SIMD v2.
+When setting HWY_CMAKE_ARM7=OFF, "neon-vfpv4" will not be forced,
+but the code is still using intrinsics assuming VFPv4. Gcc will fail
+with error because code cannot be generated for the selected
+architecture.
+
+This issue can be avoided by adding "-DHWY_DISABLED_TARGETS=HWY_NEON" in
+CXXFLAGS. The problem with this solution is that every client program will
+also need to do the same. This goes against the very purpose of
+"hwy/detect_targets.h".
+
+Technically, Armv7-a processors with VFPv4 can be detected using some
+ACLE (Arm C Language Extensions [2]) predefined macros:
+
+Basically, we want Highway to define HWY_NEON only when the target
+supports SIMDv2/VFPv4 or higher. An older target with vfpv3 only
+(e.g. Cortex-A8, A9, ...) would NOT define HWY_NEON, and therefore
+would fallback on HWY_SCALAR implementation.
+
+However, not all compiler completely support ACLE. There is also
+several versions too. So we cannot easily rely on macros like
+"__ARM_VFPV4__" (which clang predefine, but not gcc).
+
+The alternative solution proposed in this patch, is to declare the
+HWY_NEON target architecture as broken, when we detect the target is
+Armv7-A, but mandatory features for vfpv4 (namely half-float, FMA)
+are missing. Half-floats are tested using the macro __ARM_NEON_FP,
+and the FMA with the macro __ARM_FEATURE_FMA. See ACLE [2]. The
+intent of declaring the target as broken, rather than selecting
+HWY_NEON only if vfpv4 features are detected is to remain a bit
+conservative, since the detection is slithly inaccurate.
+
+For a given compiler/cflags, predefined macros for Arm/ACLE can be
+reviewed with commands like:
+
+    arm-linux-gnueabihf-gcc -mcpu=cortex-a9 -mfpu=neon-vfpv3 -Wp,-dM -E -c - < 
/dev/null | grep -Fi arm | sort
+    arm-linux-gnueabihf-gcc -mcpu=cortex-a7 -mfpu=neon-vfpv4 -Wp,-dM -E -c - < 
/dev/null | grep -Fi arm | sort
+    clang -target armv7a -mcpu=cortex-a9 -mfpu=neon-vfpv3 -mfloat-abi=hard 
-Wp,-dM -E -c - < /dev/null | grep -Fi arm | sort
+    clang -target armv7a -mcpu=cortex-a7 -mfpu=neon-vfpv4 -mfloat-abi=hard 
-Wp,-dM -E -c - < /dev/null | grep -Fi arm | sort
+
+The different values of __ARM_NEON_FP can be seen, depending which
+"-mfpu" is passed. Same for __ARM_FEATURE_FMA.
+
+[1] 
https://toolchains.bootlin.com/downloads/releases/toolchains/armv7-eabihf/tarballs/armv7-eabihf--glibc--bleeding-edge-2022.08-1.tar.bz2
+[2] https://github.com/ARM-software/acle/
+
+Signed-off-by: Julien Olivain <[email protected]>
+---
+ hwy/detect_targets.h | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+diff --git a/hwy/detect_targets.h b/hwy/detect_targets.h
+index 2beca95b..40ae7fe7 100644
+--- a/hwy/detect_targets.h
++++ b/hwy/detect_targets.h
+@@ -154,6 +154,16 @@
+      (defined(__BYTE_ORDER) && __BYTE_ORDER == __BIG_ENDIAN))
+ #define HWY_BROKEN_TARGETS (HWY_NEON)
+ 
++// armv7-a without a detected vfpv4 is not supported
++// (for example Cortex-A8, Cortex-A9)
++// vfpv4 always have neon half-float _and_ FMA.
++#elif HWY_ARCH_ARM_V7 && \
++    (__ARM_ARCH_PROFILE == 'A') && \
++    !defined(__ARM_VFPV4__) && \
++        !((__ARM_NEON_FP & 0x2 /* half-float */) && \
++          (__ARM_FEATURE_FMA == 1))
++#define HWY_BROKEN_TARGETS (HWY_NEON)
++
+ // SVE[2] require recent clang or gcc versions.
+ #elif (HWY_COMPILER_CLANG && HWY_COMPILER_CLANG < 1100) || \
+     (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1000)

diff --git a/dev-cpp/highway/highway-1.0.1-r1.ebuild 
b/dev-cpp/highway/highway-1.0.1-r1.ebuild
index e4a2c4b87d4b..b0a7900ce124 100644
--- a/dev-cpp/highway/highway-1.0.1-r1.ebuild
+++ b/dev-cpp/highway/highway-1.0.1-r1.ebuild
@@ -1,4 +1,4 @@
-# Copyright 2021-2022 Gentoo Authors
+# Copyright 2021-2023 Gentoo Authors
 # Distributed under the terms of the GNU General Public License v2
 
 EAPI=8
@@ -24,6 +24,10 @@ DEPEND="test? ( dev-cpp/gtest[${MULTILIB_USEDEP}] )"
 
 RESTRICT="!test? ( test )"
 
+PATCHES=(
+       
"${FILESDIR}"/0001-fix-compile-for-armv7-targets-with-vfp4-and-lower.patch
+)
+
 multilib_src_configure() {
        local mycmakeargs=(
                -DHWY_CMAKE_ARM7=$(usex cpu_flags_arm_neon)

diff --git a/dev-cpp/highway/highway-1.0.3.ebuild 
b/dev-cpp/highway/highway-1.0.3.ebuild
index af752cf34a06..c4d4ed034ee9 100644
--- a/dev-cpp/highway/highway-1.0.3.ebuild
+++ b/dev-cpp/highway/highway-1.0.3.ebuild
@@ -24,6 +24,10 @@ DEPEND="test? ( dev-cpp/gtest[${MULTILIB_USEDEP}] )"
 
 RESTRICT="!test? ( test )"
 
+PATCHES=(
+       
"${FILESDIR}"/0001-fix-compile-for-armv7-targets-with-vfp4-and-lower.patch
+)
+
 multilib_src_configure() {
        local mycmakeargs=(
                -DHWY_CMAKE_ARM7=$(usex cpu_flags_arm_neon)

Reply via email to