vlc | branch: master | Janne Grunau <janne-...@jannau.net> | Mon Feb 19 00:47:53 2018 +0100| [85b89f35bdd5799436ea51086c7a38f1f4b3f451] | committer: Martin Storsjö
arm: make the assembler functions compatible with non ELF/gas platforms Allow assembling arm neon functions for IOS and arm windows. > http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=85b89f35bdd5799436ea51086c7a38f1f4b3f451 --- modules/arm_neon/amplify.S | 8 ++++-- modules/arm_neon/asm.S | 39 ++++++++++++++++++++++++++ modules/arm_neon/deinterleave_chroma.S | 10 ++++--- modules/arm_neon/i420_rgb.S | 10 ++++--- modules/arm_neon/i420_rv16.S | 10 ++++--- modules/arm_neon/i420_yuyv.S | 14 ++++----- modules/arm_neon/i422_yuyv.S | 14 ++++----- modules/arm_neon/nv12_rgb.S | 10 ++++--- modules/arm_neon/nv21_rgb.S | 10 ++++--- modules/arm_neon/simple_channel_mixer.S | 38 +++++++++---------------- modules/arm_neon/yuyv_i422.S | 14 ++++----- modules/video_filter/deinterlace/merge_arm.S | 22 +++++++-------- modules/video_filter/deinterlace/merge_arm64.S | 10 +++---- 13 files changed, 122 insertions(+), 87 deletions(-) diff --git a/modules/arm_neon/amplify.S b/modules/arm_neon/amplify.S index 5938118378..9e655afe16 100644 --- a/modules/arm_neon/amplify.S +++ b/modules/arm_neon/amplify.S @@ -18,18 +18,20 @@ @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA. @****************************************************************************/ +#include "asm.S" + .syntax unified .arm +#if HAVE_AS_FPU_DIRECTIVE .fpu neon +#endif .text #define DST r0 #define SRC r1 #define SIZE r2 .align 2 - .global amplify_float_arm_neon - .type amplify_float_arm_neon, %function -amplify_float_arm_neon: +function amplify_float_arm_neon cmp SIZE, #0 bxeq lr #ifdef __ARM_PCS diff --git a/modules/arm_neon/asm.S b/modules/arm_neon/asm.S new file mode 100644 index 0000000000..728391ea37 --- /dev/null +++ b/modules/arm_neon/asm.S @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2018 Janne Grunau <janne-li...@jannau.net> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifdef __APPLE__ +# define EXTERN_ASM _ +#else +# define EXTERN_ASM +#endif + +#if defined(__APPLE__) || defined(_WIN32) +# define HAVE_AS_ARCH_DIRECTIVE 0 +# define HAVE_AS_FPU_DIRECTIVE 0 +#else +# define HAVE_AS_ARCH_DIRECTIVE 1 +# define HAVE_AS_FPU_DIRECTIVE 1 +#endif + +.macro function name + .globl EXTERN_ASM\name +#ifdef __ELF__ + .type EXTERN_ASM\name, %function +#endif +EXTERN_ASM\name: +.endm diff --git a/modules/arm_neon/deinterleave_chroma.S b/modules/arm_neon/deinterleave_chroma.S index 019d647ed6..9cd01c7aed 100644 --- a/modules/arm_neon/deinterleave_chroma.S +++ b/modules/arm_neon/deinterleave_chroma.S @@ -19,8 +19,12 @@ @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA. @****************************************************************************/ +#include "asm.S" + .syntax unified - .fpu neon +#if HAVE_AS_FPU_DIRECTIVE + .fpu neon +#endif .text #define UV r0 @@ -35,9 +39,7 @@ #define OPAD lr .align 2 - .global deinterleave_chroma_neon - .type deinterleave_chroma_neon, %function -deinterleave_chroma_neon: +function deinterleave_chroma_neon push {r4-r6,lr} ldmia r0, {U, V, OPITCH} ldmia r1, {UV, IPITCH} diff --git a/modules/arm_neon/i420_rgb.S b/modules/arm_neon/i420_rgb.S index a512b5f0b2..54fb38746e 100644 --- a/modules/arm_neon/i420_rgb.S +++ b/modules/arm_neon/i420_rgb.S @@ -19,8 +19,12 @@ @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA. @****************************************************************************/ +#include "asm.S" + .syntax unified - .fpu neon +#if HAVE_AS_FPU_DIRECTIVE + .fpu neon +#endif .text /* ARM */ @@ -80,9 +84,7 @@ coefficients: .short -18432 .align 2 - .global i420_rgb_neon - .type i420_rgb_neon, %function -i420_rgb_neon: +function i420_rgb_neon push {r4-r8,r10-r11,lr} vpush {q4-q7} diff --git a/modules/arm_neon/i420_rv16.S b/modules/arm_neon/i420_rv16.S index cd6d2696c5..15d1e7b7bd 100644 --- a/modules/arm_neon/i420_rv16.S +++ b/modules/arm_neon/i420_rv16.S @@ -19,8 +19,12 @@ @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA. @****************************************************************************/ +#include "asm.S" + .syntax unified - .fpu neon +#if HAVE_AS_FPU_DIRECTIVE + .fpu neon +#endif .text /* ARM */ @@ -83,9 +87,7 @@ coefficients: .short -18432 .align 2 - .global i420_rv16_neon - .type i420_rv16_neon, %function -i420_rv16_neon: +function i420_rv16_neon push {r4-r8,r10-r11,lr} vpush {q4-q7} diff --git a/modules/arm_neon/i420_yuyv.S b/modules/arm_neon/i420_yuyv.S index 0dd04de6d7..29668e438e 100644 --- a/modules/arm_neon/i420_yuyv.S +++ b/modules/arm_neon/i420_yuyv.S @@ -18,8 +18,12 @@ @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA. @****************************************************************************/ +#include "asm.S" + .syntax unified - .fpu neon +#if HAVE_AS_FPU_DIRECTIVE + .fpu neon +#endif .text #define O1 r0 @@ -37,9 +41,7 @@ #define OPITCH lr .align 2 - .global i420_yuyv_neon - .type i420_yuyv_neon, %function -i420_yuyv_neon: +function i420_yuyv_neon push {r4-r8,r10-r11,lr} ldmia r0, {O1, OPITCH} ldmia r1, {Y1, U, V, YPITCH} @@ -76,9 +78,7 @@ i420_yuyv_neon: add V, V, YPAD, lsr #1 b 1b - .global i420_uyvy_neon - .type i420_uyvy_neon, %function -i420_uyvy_neon: +function i420_uyvy_neon push {r4-r8,r10-r11,lr} ldmia r0, {O1, OPITCH} ldmia r1, {Y1, U, V, YPITCH} diff --git a/modules/arm_neon/i422_yuyv.S b/modules/arm_neon/i422_yuyv.S index 0960267a8f..9119839ea5 100644 --- a/modules/arm_neon/i422_yuyv.S +++ b/modules/arm_neon/i422_yuyv.S @@ -18,8 +18,12 @@ @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA. @****************************************************************************/ +#include "asm.S" + .syntax unified - .fpu neon +#if HAVE_AS_FPU_DIRECTIVE + .fpu neon +#endif .text #define O r0 @@ -33,9 +37,7 @@ #define YPAD lr .align 2 - .global i422_yuyv_neon - .type i422_yuyv_neon, %function -i422_yuyv_neon: +function i422_yuyv_neon push {r4-r6,lr} ldmia r1, {Y, U, V, YPAD} ldmia r0, {O, OPAD} @@ -66,9 +68,7 @@ i422_yuyv_neon: add O, O, OPAD b 1b - .global i422_uyvy_neon - .type i422_uyvy_neon, %function -i422_uyvy_neon: +function i422_uyvy_neon push {r4-r6,lr} ldmia r1, {Y, U, V, YPAD} ldmia r0, {O, OPAD} diff --git a/modules/arm_neon/nv12_rgb.S b/modules/arm_neon/nv12_rgb.S index f514c4328e..1bb924fc2b 100644 --- a/modules/arm_neon/nv12_rgb.S +++ b/modules/arm_neon/nv12_rgb.S @@ -19,8 +19,12 @@ @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA. @****************************************************************************/ +#include "asm.S" + .syntax unified - .fpu neon +#if HAVE_AS_FPU_DIRECTIVE + .fpu neon +#endif .text /* ARM */ @@ -76,9 +80,7 @@ coefficients: .short -18432 .align 2 - .global nv12_rgb_neon - .type nv12_rgb_neon, %function -nv12_rgb_neon: +function nv12_rgb_neon push {r4-r8,r10-r11,lr} vpush {q4-q7} diff --git a/modules/arm_neon/nv21_rgb.S b/modules/arm_neon/nv21_rgb.S index 599112ed1e..f775b5a6ac 100644 --- a/modules/arm_neon/nv21_rgb.S +++ b/modules/arm_neon/nv21_rgb.S @@ -19,8 +19,12 @@ @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA. @****************************************************************************/ +#include "asm.S" + .syntax unified - .fpu neon +#if HAVE_AS_FPU_DIRECTIVE + .fpu neon +#endif .text /* ARM */ @@ -76,9 +80,7 @@ coefficients: .short -18432 .align 2 - .global nv21_rgb_neon - .type nv21_rgb_neon, %function -nv21_rgb_neon: +function nv21_rgb_neon push {r4-r8,r10-r11,lr} vpush {q4-q7} diff --git a/modules/arm_neon/simple_channel_mixer.S b/modules/arm_neon/simple_channel_mixer.S index a94ae8539f..cf9b1b531e 100644 --- a/modules/arm_neon/simple_channel_mixer.S +++ b/modules/arm_neon/simple_channel_mixer.S @@ -19,7 +19,11 @@ @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA. @****************************************************************************/ - .fpu neon +#include "asm.S" + +#if HAVE_AS_FPU_DIRECTIVE + .fpu neon +#endif .text .align 2 @@ -34,9 +38,7 @@ coeff_7to2: .float 0.5 .float 0.25 .float 0.25 - .global convert_7_x_to_2_0_neon_asm - .type convert_7_x_to_2_0_neon_asm, %function -convert_7_x_to_2_0_neon_asm: +function convert_7_x_to_2_0_neon_asm push {r4,lr} adr COEFF, coeff_7to2 @@ -70,9 +72,7 @@ coeff_5to2: .float 0.5 .float 0.33 .float 0.33 - .global convert_5_x_to_2_0_neon_asm - .type convert_5_x_to_2_0_neon_asm, %function -convert_5_x_to_2_0_neon_asm: +function convert_5_x_to_2_0_neon_asm push {r4,lr} adr COEFF, coeff_5to2 @@ -100,9 +100,7 @@ convert_5_x_to_2_0_neon_asm: coeff_4to2: .float 0.5 .float 0.5 - .global convert_4_0_to_2_0_neon_asm - .type convert_4_0_to_2_0_neon_asm, %function -convert_4_0_to_2_0_neon_asm: +function convert_4_0_to_2_0_neon_asm push {r4,lr} adr COEFF, coeff_4to2 @@ -124,9 +122,7 @@ convert_4_0_to_2_0_neon_asm: coeff_3to2: .float 0.5 .float 0.5 - .global convert_3_x_to_2_0_neon_asm - .type convert_3_x_to_2_0_neon_asm, %function -convert_3_x_to_2_0_neon_asm: +function convert_3_x_to_2_0_neon_asm push {r4,lr} adr COEFF, coeff_3to2 @@ -154,9 +150,7 @@ coeff_7to1: .float 0.25 .float 0.125 .float 0.125 - .global convert_7_x_to_1_0_neon_asm - .type convert_7_x_to_1_0_neon_asm, %function -convert_7_x_to_1_0_neon_asm: +function convert_7_x_to_1_0_neon_asm push {r4,lr} adr COEFF, coeff_7to1 @@ -188,9 +182,7 @@ coeff_5to1: .float 0.25 .float 0.16666667 .float 0.16666667 - .global convert_5_x_to_1_0_neon_asm - .type convert_5_x_to_1_0_neon_asm, %function -convert_5_x_to_1_0_neon_asm: +function convert_5_x_to_1_0_neon_asm push {r4,lr} adr COEFF, coeff_5to1 @@ -219,9 +211,7 @@ coeff_7to4: .float 0.5 .float 0.16666667 .float 0.16666667 - .global convert_7_x_to_4_0_neon_asm - .type convert_7_x_to_4_0_neon_asm, %function -convert_7_x_to_4_0_neon_asm: +function convert_7_x_to_4_0_neon_asm push {r4,lr} adr COEFF, coeff_7to4 @@ -252,9 +242,7 @@ convert_7_x_to_4_0_neon_asm: coeff_5to4: .float 0.5 .float 0.5 - .global convert_5_x_to_4_0_neon_asm - .type convert_5_x_to_4_0_neon_asm, %function -convert_5_x_to_4_0_neon_asm: +function convert_5_x_to_4_0_neon_asm push {r4,lr} adr COEFF, coeff_5to4 diff --git a/modules/arm_neon/yuyv_i422.S b/modules/arm_neon/yuyv_i422.S index c3774f5d6a..637effe9bc 100644 --- a/modules/arm_neon/yuyv_i422.S +++ b/modules/arm_neon/yuyv_i422.S @@ -18,8 +18,12 @@ @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA. @****************************************************************************/ +#include "asm.S" + .syntax unified - .fpu neon +#if HAVE_AS_FPU_DIRECTIVE + .fpu neon +#endif .text #define I r0 @@ -33,9 +37,7 @@ #define YPAD lr .align 2 - .global yuyv_i422_neon - .type yuyv_i422_neon, %function -yuyv_i422_neon: +function yuyv_i422_neon push {r4-r6,lr} ldmia r0, {Y, U, V, YPAD} ldmia r1, {I, IPAD} @@ -64,9 +66,7 @@ yuyv_i422_neon: add V, V, YPAD, lsr #1 b 1b - .global uyvy_i422_neon - .type uyvy_i422_neon, %function -uyvy_i422_neon: +function uyvy_i422_neon push {r4-r6,lr} ldmia r0, {Y, U, V, YPAD} ldmia r1, {I, IPAD} diff --git a/modules/video_filter/deinterlace/merge_arm.S b/modules/video_filter/deinterlace/merge_arm.S index dd779029ae..d3f32c5ca6 100644 --- a/modules/video_filter/deinterlace/merge_arm.S +++ b/modules/video_filter/deinterlace/merge_arm.S @@ -18,10 +18,16 @@ @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA. @****************************************************************************/ +#include "../arm_neon/asm.S" + .syntax unified .arm +#if HAVE_AS_ARCH_DIRECTIVE .arch armv6 +#endif +#if HAVE_AS_FPU_DIRECTIVE .fpu neon +#endif .text #define DEST r0 @@ -30,10 +36,8 @@ #define SIZE r3 .align 2 - .global merge8_arm_neon - .type merge8_arm_neon, %function @ NOTE: Offset and pitch must be multiple of 16-bytes in VLC. -merge8_arm_neon: +function merge8_arm_neon cmp SIZE, #64 blo 2f 1: @@ -72,9 +76,7 @@ merge8_arm_neon: bx lr .align 2 - .global merge16_arm_neon - .type merge16_arm_neon, %function -merge16_arm_neon: +function merge16_arm_neon cmp SIZE, #64 blo 2f 1: @@ -113,9 +115,7 @@ merge16_arm_neon: bx lr .align 2 - .global merge8_armv6 - .type merge8_armv6, %function -merge8_armv6: +function merge8_armv6 push {r4-r9,lr} 1: pld [SRC1, #64] @@ -135,9 +135,7 @@ merge8_armv6: b 1b .align 2 - .global merge16_armv6 - .type merge16_armv6, %function -merge16_armv6: +function merge16_armv6 push {r4-r9,lr} 1: pld [SRC1, #64] diff --git a/modules/video_filter/deinterlace/merge_arm64.S b/modules/video_filter/deinterlace/merge_arm64.S index db19e54caf..7b70678891 100644 --- a/modules/video_filter/deinterlace/merge_arm64.S +++ b/modules/video_filter/deinterlace/merge_arm64.S @@ -19,6 +19,8 @@ // Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA. //****************************************************************************/ +#include "../../arm_neon/asm.S" + .text #define DEST x0 @@ -27,10 +29,8 @@ #define SIZE x3 .align 2 - .global merge8_arm64_neon - .type merge8_arm64_neon, %function // NOTE: Offset and pitch must be multiple of 16-bytes in VLC. -merge8_arm64_neon: +function merge8_arm64_neon ands x5, SIZE, #~63 b.eq 2f mov x10, #64 @@ -66,9 +66,7 @@ merge8_arm64_neon: ret .align 2 - .global merge16_arm64_neon - .type merge16_arm64_neon, %function -merge16_arm64_neon: +function merge16_arm64_neon ands x5, SIZE, #~63 b.eq 2f 1: _______________________________________________ vlc-commits mailing list vlc-commits@videolan.org https://mailman.videolan.org/listinfo/vlc-commits