Re: [PATCH 6/7][ARM] Add ACLE intrinsics vqrdmlah and vqrdmlsh
On Mon, Dec 7, 2015 at 4:12 PM, Matthew Wahab wrote: > Ping. Updated patch attached. > Matthew > > > On 26/11/15 16:04, Matthew Wahab wrote: >> >> Hello, >> >> This patch adds the ACLE intrinsics for the instructions introduced in >> ARMv8.1. It adds the vqrmdlah and vqrdmlsh forms of the instrinsics to >> the arm_neon.h header, together with the ARM builtins used to implement >> them. The intrinsics are available when -march=armv8.1-a is enabled >> together with appropriate fpu options. >> >> Tested the series for arm-none-eabi with cross-compiled check-gcc on an >> ARMv8.1 emulator. Also tested arm-none-linux-gnueabihf with native >> bootstrap and make check. >> >> Ok for trunk? >> Matthew >> >> gcc/ >> 2015-11-26 Matthew Wahab >> >> * config/arm/arm_neon.h (vqrdmlah_s16, vqrdmlah_s32): New. >> (vqrdmlahq_s16, vqrdmlahq_s32): New. >> (vqrdmlsh_s16, vqrdmlsh_s32): New. >> (vqrdmlahq_s16, vqrdmlshq_s32): New. >> * config/arm/arm_neon_builtins.def: Add "vqrdmlah" and "vqrdmlsh". >> > OK. Ramana
Re: [PATCH 6/7][ARM] Add ACLE intrinsics vqrdmlah and vqrdmlsh
Ping. Updated patch attached. Matthew On 26/11/15 16:04, Matthew Wahab wrote: Hello, This patch adds the ACLE intrinsics for the instructions introduced in ARMv8.1. It adds the vqrmdlah and vqrdmlsh forms of the instrinsics to the arm_neon.h header, together with the ARM builtins used to implement them. The intrinsics are available when -march=armv8.1-a is enabled together with appropriate fpu options. Tested the series for arm-none-eabi with cross-compiled check-gcc on an ARMv8.1 emulator. Also tested arm-none-linux-gnueabihf with native bootstrap and make check. Ok for trunk? Matthew gcc/ 2015-11-26 Matthew Wahab * config/arm/arm_neon.h (vqrdmlah_s16, vqrdmlah_s32): New. (vqrdmlahq_s16, vqrdmlahq_s32): New. (vqrdmlsh_s16, vqrdmlsh_s32): New. (vqrdmlahq_s16, vqrdmlshq_s32): New. * config/arm/arm_neon_builtins.def: Add "vqrdmlah" and "vqrdmlsh". >From 1844027592d818e0de53a3da904ae6bfe1aef534 Mon Sep 17 00:00:00 2001 From: Matthew Wahab Date: Tue, 1 Sep 2015 16:21:44 +0100 Subject: [PATCH 6/7] [ARM] Add neon intrinsics vqrdmlah, vqrdmlsh. Change-Id: Ic40ff4d477f36ec01714c68e3b83b66208c7958b --- gcc/config/arm/arm_neon.h| 50 gcc/config/arm/arm_neon_builtins.def | 2 ++ 2 files changed, 52 insertions(+) diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index 0a33d21..b617f80 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -1158,6 +1158,56 @@ vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b) return (int32x4_t)__builtin_neon_vqrdmulhv4si (__a, __b); } +#ifdef __ARM_FEATURE_QRDMX +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrdmlah_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) +{ + return (int16x4_t)__builtin_neon_vqrdmlahv4hi (__a, __b, __c); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrdmlah_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) +{ + return (int32x2_t)__builtin_neon_vqrdmlahv2si (__a, __b, __c); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqrdmlahq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) +{ + return (int16x8_t)__builtin_neon_vqrdmlahv8hi (__a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqrdmlahq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) +{ + return (int32x4_t)__builtin_neon_vqrdmlahv4si (__a, __b, __c); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrdmlsh_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) +{ + return (int16x4_t)__builtin_neon_vqrdmlshv4hi (__a, __b, __c); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrdmlsh_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) +{ + return (int32x2_t)__builtin_neon_vqrdmlshv2si (__a, __b, __c); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqrdmlshq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) +{ + return (int16x8_t)__builtin_neon_vqrdmlshv8hi (__a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqrdmlshq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) +{ + return (int32x4_t)__builtin_neon_vqrdmlshv4si (__a, __b, __c); +} +#endif + __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vmull_s8 (int8x8_t __a, int8x8_t __b) { diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index 0b719df..8d5c0ca 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -45,6 +45,8 @@ VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) VAR4 (BINOP, vqrdmulh, v4hi, v2si, v8hi, v4si) VAR2 (TERNOP, vqdmlal, v4hi, v2si) VAR2 (TERNOP, vqdmlsl, v4hi, v2si) +VAR4 (TERNOP, vqrdmlah, v4hi, v2si, v8hi, v4si) +VAR4 (TERNOP, vqrdmlsh, v4hi, v2si, v8hi, v4si) VAR3 (BINOP, vmullp, v8qi, v4hi, v2si) VAR3 (BINOP, vmulls, v8qi, v4hi, v2si) VAR3 (BINOP, vmullu, v8qi, v4hi, v2si) -- 2.1.4
[PATCH 6/7][ARM] Add ACLE intrinsics vqrdmlah and vqrdmlsh
Hello, This patch adds the ACLE intrinsics for the instructions introduced in ARMv8.1. It adds the vqrmdlah and vqrdmlsh forms of the instrinsics to the arm_neon.h header, together with the ARM builtins used to implement them. The intrinsics are available when -march=armv8.1-a is enabled together with appropriate fpu options. Tested the series for arm-none-eabi with cross-compiled check-gcc on an ARMv8.1 emulator. Also tested arm-none-linux-gnueabihf with native bootstrap and make check. Ok for trunk? Matthew gcc/ 2015-11-26 Matthew Wahab * config/arm/arm_neon.h (vqrdmlah_s16, vqrdmlah_s32): New. (vqrdmlahq_s16, vqrdmlahq_s32): New. (vqrdmlsh_s16, vqrdmlsh_s32): New. (vqrdmlahq_s16, vqrdmlshq_s32): New. * config/arm/arm_neon_builtins.def: Add "vqrdmlah" and "vqrdmlsh". >From 93e9db5bf06172f18f4e89e9533c66d8a0c4f2ca Mon Sep 17 00:00:00 2001 From: Matthew Wahab Date: Tue, 1 Sep 2015 16:21:44 +0100 Subject: [PATCH 6/7] [ARM] Add neon intrinsics vqrdmlah, vqrdmlsh. Change-Id: Ic40ff4d477f36ec01714c68e3b83b66208c7958b --- gcc/config/arm/arm_neon.h| 50 gcc/config/arm/arm_neon_builtins.def | 2 ++ 2 files changed, 52 insertions(+) diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index 0a33d21..b617f80 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -1158,6 +1158,56 @@ vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b) return (int32x4_t)__builtin_neon_vqrdmulhv4si (__a, __b); } +#ifdef __ARM_FEATURE_QRDMX +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrdmlah_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) +{ + return (int16x4_t)__builtin_neon_vqrdmlahv4hi (__a, __b, __c); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrdmlah_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) +{ + return (int32x2_t)__builtin_neon_vqrdmlahv2si (__a, __b, __c); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqrdmlahq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) +{ + return (int16x8_t)__builtin_neon_vqrdmlahv8hi (__a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqrdmlahq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) +{ + return (int32x4_t)__builtin_neon_vqrdmlahv4si (__a, __b, __c); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrdmlsh_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) +{ + return (int16x4_t)__builtin_neon_vqrdmlshv4hi (__a, __b, __c); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrdmlsh_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) +{ + return (int32x2_t)__builtin_neon_vqrdmlshv2si (__a, __b, __c); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqrdmlshq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) +{ + return (int16x8_t)__builtin_neon_vqrdmlshv8hi (__a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqrdmlshq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) +{ + return (int32x4_t)__builtin_neon_vqrdmlshv4si (__a, __b, __c); +} +#endif + __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vmull_s8 (int8x8_t __a, int8x8_t __b) { diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index 0b719df..8d5c0ca 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -45,6 +45,8 @@ VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) VAR4 (BINOP, vqrdmulh, v4hi, v2si, v8hi, v4si) VAR2 (TERNOP, vqdmlal, v4hi, v2si) VAR2 (TERNOP, vqdmlsl, v4hi, v2si) +VAR4 (TERNOP, vqrdmlah, v4hi, v2si, v8hi, v4si) +VAR4 (TERNOP, vqrdmlsh, v4hi, v2si, v8hi, v4si) VAR3 (BINOP, vmullp, v8qi, v4hi, v2si) VAR3 (BINOP, vmulls, v8qi, v4hi, v2si) VAR3 (BINOP, vmullu, v8qi, v4hi, v2si) -- 2.1.4