vlc | branch: master | Janne Grunau <janne-...@jannau.net> | Mon Feb 19 
00:47:53 2018 +0100| [85b89f35bdd5799436ea51086c7a38f1f4b3f451] | committer: 
Martin Storsjö

arm: make the assembler functions compatible with non ELF/gas platforms

Allow assembling arm neon functions for IOS and arm windows.

> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=85b89f35bdd5799436ea51086c7a38f1f4b3f451
---

 modules/arm_neon/amplify.S                     |  8 ++++--
 modules/arm_neon/asm.S                         | 39 ++++++++++++++++++++++++++
 modules/arm_neon/deinterleave_chroma.S         | 10 ++++---
 modules/arm_neon/i420_rgb.S                    | 10 ++++---
 modules/arm_neon/i420_rv16.S                   | 10 ++++---
 modules/arm_neon/i420_yuyv.S                   | 14 ++++-----
 modules/arm_neon/i422_yuyv.S                   | 14 ++++-----
 modules/arm_neon/nv12_rgb.S                    | 10 ++++---
 modules/arm_neon/nv21_rgb.S                    | 10 ++++---
 modules/arm_neon/simple_channel_mixer.S        | 38 +++++++++----------------
 modules/arm_neon/yuyv_i422.S                   | 14 ++++-----
 modules/video_filter/deinterlace/merge_arm.S   | 22 +++++++--------
 modules/video_filter/deinterlace/merge_arm64.S | 10 +++----
 13 files changed, 122 insertions(+), 87 deletions(-)

diff --git a/modules/arm_neon/amplify.S b/modules/arm_neon/amplify.S
index 5938118378..9e655afe16 100644
--- a/modules/arm_neon/amplify.S
+++ b/modules/arm_neon/amplify.S
@@ -18,18 +18,20 @@
  @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  @****************************************************************************/
 
+#include "asm.S"
+
        .syntax unified
        .arm
+#if HAVE_AS_FPU_DIRECTIVE
        .fpu    neon
+#endif
        .text
 
 #define        DST     r0
 #define        SRC     r1
 #define        SIZE    r2
        .align 2
-       .global amplify_float_arm_neon
-       .type   amplify_float_arm_neon, %function
-amplify_float_arm_neon:
+function amplify_float_arm_neon
        cmp             SIZE,   #0
        bxeq            lr
 #ifdef __ARM_PCS
diff --git a/modules/arm_neon/asm.S b/modules/arm_neon/asm.S
new file mode 100644
index 0000000000..728391ea37
--- /dev/null
+++ b/modules/arm_neon/asm.S
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 Janne Grunau <janne-li...@jannau.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifdef __APPLE__
+#   define EXTERN_ASM _
+#else
+#   define EXTERN_ASM
+#endif
+
+#if defined(__APPLE__) || defined(_WIN32)
+#   define HAVE_AS_ARCH_DIRECTIVE 0
+#   define HAVE_AS_FPU_DIRECTIVE  0
+#else
+#   define HAVE_AS_ARCH_DIRECTIVE 1
+#   define HAVE_AS_FPU_DIRECTIVE  1
+#endif
+
+.macro  function name
+       .globl  EXTERN_ASM\name
+#ifdef __ELF__
+       .type   EXTERN_ASM\name, %function
+#endif
+EXTERN_ASM\name:
+.endm
diff --git a/modules/arm_neon/deinterleave_chroma.S 
b/modules/arm_neon/deinterleave_chroma.S
index 019d647ed6..9cd01c7aed 100644
--- a/modules/arm_neon/deinterleave_chroma.S
+++ b/modules/arm_neon/deinterleave_chroma.S
@@ -19,8 +19,12 @@
  @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  @****************************************************************************/
 
+#include "asm.S"
+
        .syntax unified
-       .fpu neon
+#if HAVE_AS_FPU_DIRECTIVE
+       .fpu    neon
+#endif
        .text
 
 #define UV     r0
@@ -35,9 +39,7 @@
 #define OPAD   lr
 
        .align 2
-       .global deinterleave_chroma_neon
-       .type   deinterleave_chroma_neon, %function
-deinterleave_chroma_neon:
+function deinterleave_chroma_neon
        push            {r4-r6,lr}
        ldmia           r0,     {U, V, OPITCH}
        ldmia           r1,     {UV, IPITCH}
diff --git a/modules/arm_neon/i420_rgb.S b/modules/arm_neon/i420_rgb.S
index a512b5f0b2..54fb38746e 100644
--- a/modules/arm_neon/i420_rgb.S
+++ b/modules/arm_neon/i420_rgb.S
@@ -19,8 +19,12 @@
  @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  @****************************************************************************/
 
+#include "asm.S"
+
        .syntax unified
-       .fpu neon
+#if HAVE_AS_FPU_DIRECTIVE
+       .fpu    neon
+#endif
        .text
 
 /* ARM */
@@ -80,9 +84,7 @@ coefficients:
     .short  -18432
 
        .align 2
-       .global i420_rgb_neon
-       .type   i420_rgb_neon, %function
-i420_rgb_neon:
+function i420_rgb_neon
        push            {r4-r8,r10-r11,lr}
        vpush           {q4-q7}
 
diff --git a/modules/arm_neon/i420_rv16.S b/modules/arm_neon/i420_rv16.S
index cd6d2696c5..15d1e7b7bd 100644
--- a/modules/arm_neon/i420_rv16.S
+++ b/modules/arm_neon/i420_rv16.S
@@ -19,8 +19,12 @@
  @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  @****************************************************************************/
 
+#include "asm.S"
+
        .syntax unified
-       .fpu neon
+#if HAVE_AS_FPU_DIRECTIVE
+       .fpu    neon
+#endif
        .text
 
 /* ARM */
@@ -83,9 +87,7 @@ coefficients:
     .short  -18432
 
        .align 2
-       .global i420_rv16_neon
-       .type   i420_rv16_neon, %function
-i420_rv16_neon:
+function i420_rv16_neon
        push            {r4-r8,r10-r11,lr}
        vpush           {q4-q7}
 
diff --git a/modules/arm_neon/i420_yuyv.S b/modules/arm_neon/i420_yuyv.S
index 0dd04de6d7..29668e438e 100644
--- a/modules/arm_neon/i420_yuyv.S
+++ b/modules/arm_neon/i420_yuyv.S
@@ -18,8 +18,12 @@
  @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  @****************************************************************************/
 
+#include "asm.S"
+
        .syntax unified
-       .fpu neon
+#if HAVE_AS_FPU_DIRECTIVE
+       .fpu    neon
+#endif
        .text
 
 #define O1     r0
@@ -37,9 +41,7 @@
 #define OPITCH lr
 
        .align 2
-       .global i420_yuyv_neon
-       .type   i420_yuyv_neon, %function
-i420_yuyv_neon:
+function i420_yuyv_neon
        push            {r4-r8,r10-r11,lr}
        ldmia           r0,     {O1, OPITCH}
        ldmia           r1,     {Y1, U, V, YPITCH}
@@ -76,9 +78,7 @@ i420_yuyv_neon:
        add             V,      V,      YPAD,   lsr #1
        b               1b
 
-       .global i420_uyvy_neon
-       .type   i420_uyvy_neon, %function
-i420_uyvy_neon:
+function i420_uyvy_neon
        push            {r4-r8,r10-r11,lr}
        ldmia           r0,     {O1, OPITCH}
        ldmia           r1,     {Y1, U, V, YPITCH}
diff --git a/modules/arm_neon/i422_yuyv.S b/modules/arm_neon/i422_yuyv.S
index 0960267a8f..9119839ea5 100644
--- a/modules/arm_neon/i422_yuyv.S
+++ b/modules/arm_neon/i422_yuyv.S
@@ -18,8 +18,12 @@
  @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  @****************************************************************************/
 
+#include "asm.S"
+
        .syntax unified
-       .fpu neon
+#if HAVE_AS_FPU_DIRECTIVE
+       .fpu    neon
+#endif
        .text
 
 #define O      r0
@@ -33,9 +37,7 @@
 #define YPAD   lr
 
        .align 2
-       .global i422_yuyv_neon
-       .type   i422_yuyv_neon, %function
-i422_yuyv_neon:
+function i422_yuyv_neon
        push            {r4-r6,lr}
        ldmia           r1,     {Y, U, V, YPAD}
        ldmia           r0,     {O, OPAD}
@@ -66,9 +68,7 @@ i422_yuyv_neon:
        add             O,      O,      OPAD
        b               1b
 
-       .global i422_uyvy_neon
-       .type   i422_uyvy_neon, %function
-i422_uyvy_neon:
+function i422_uyvy_neon
        push            {r4-r6,lr}
        ldmia           r1,     {Y, U, V, YPAD}
        ldmia           r0,     {O, OPAD}
diff --git a/modules/arm_neon/nv12_rgb.S b/modules/arm_neon/nv12_rgb.S
index f514c4328e..1bb924fc2b 100644
--- a/modules/arm_neon/nv12_rgb.S
+++ b/modules/arm_neon/nv12_rgb.S
@@ -19,8 +19,12 @@
  @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  @****************************************************************************/
 
+#include "asm.S"
+
        .syntax unified
-       .fpu neon
+#if HAVE_AS_FPU_DIRECTIVE
+       .fpu    neon
+#endif
        .text
 
 /* ARM */
@@ -76,9 +80,7 @@ coefficients:
     .short  -18432
 
        .align 2
-       .global nv12_rgb_neon
-       .type   nv12_rgb_neon, %function
-nv12_rgb_neon:
+function nv12_rgb_neon
        push            {r4-r8,r10-r11,lr}
        vpush           {q4-q7}
 
diff --git a/modules/arm_neon/nv21_rgb.S b/modules/arm_neon/nv21_rgb.S
index 599112ed1e..f775b5a6ac 100644
--- a/modules/arm_neon/nv21_rgb.S
+++ b/modules/arm_neon/nv21_rgb.S
@@ -19,8 +19,12 @@
  @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  @****************************************************************************/
 
+#include "asm.S"
+
        .syntax unified
-       .fpu neon
+#if HAVE_AS_FPU_DIRECTIVE
+       .fpu    neon
+#endif
        .text
 
 /* ARM */
@@ -76,9 +80,7 @@ coefficients:
     .short  -18432
 
        .align 2
-       .global nv21_rgb_neon
-       .type   nv21_rgb_neon, %function
-nv21_rgb_neon:
+function nv21_rgb_neon
        push            {r4-r8,r10-r11,lr}
        vpush           {q4-q7}
 
diff --git a/modules/arm_neon/simple_channel_mixer.S 
b/modules/arm_neon/simple_channel_mixer.S
index a94ae8539f..cf9b1b531e 100644
--- a/modules/arm_neon/simple_channel_mixer.S
+++ b/modules/arm_neon/simple_channel_mixer.S
@@ -19,7 +19,11 @@
  @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  @****************************************************************************/
 
-       .fpu neon
+#include "asm.S"
+
+#if HAVE_AS_FPU_DIRECTIVE
+       .fpu    neon
+#endif
        .text
        .align 2
 
@@ -34,9 +38,7 @@ coeff_7to2:
        .float 0.5
        .float 0.25
        .float 0.25
-       .global convert_7_x_to_2_0_neon_asm
-       .type   convert_7_x_to_2_0_neon_asm, %function
-convert_7_x_to_2_0_neon_asm:
+function convert_7_x_to_2_0_neon_asm
        push {r4,lr}
 
        adr COEFF, coeff_7to2
@@ -70,9 +72,7 @@ coeff_5to2:
        .float 0.5
        .float 0.33
        .float 0.33
-       .global convert_5_x_to_2_0_neon_asm
-       .type   convert_5_x_to_2_0_neon_asm, %function
-convert_5_x_to_2_0_neon_asm:
+function convert_5_x_to_2_0_neon_asm
        push {r4,lr}
 
        adr COEFF, coeff_5to2
@@ -100,9 +100,7 @@ convert_5_x_to_2_0_neon_asm:
 coeff_4to2:
        .float 0.5
        .float 0.5
-       .global convert_4_0_to_2_0_neon_asm
-       .type   convert_4_0_to_2_0_neon_asm, %function
-convert_4_0_to_2_0_neon_asm:
+function convert_4_0_to_2_0_neon_asm
        push {r4,lr}
 
        adr COEFF, coeff_4to2
@@ -124,9 +122,7 @@ convert_4_0_to_2_0_neon_asm:
 coeff_3to2:
        .float 0.5
        .float 0.5
-       .global convert_3_x_to_2_0_neon_asm
-       .type   convert_3_x_to_2_0_neon_asm, %function
-convert_3_x_to_2_0_neon_asm:
+function convert_3_x_to_2_0_neon_asm
        push {r4,lr}
 
        adr COEFF, coeff_3to2
@@ -154,9 +150,7 @@ coeff_7to1:
        .float 0.25
        .float 0.125
        .float 0.125
-       .global convert_7_x_to_1_0_neon_asm
-       .type   convert_7_x_to_1_0_neon_asm, %function
-convert_7_x_to_1_0_neon_asm:
+function convert_7_x_to_1_0_neon_asm
        push {r4,lr}
 
        adr COEFF, coeff_7to1
@@ -188,9 +182,7 @@ coeff_5to1:
        .float 0.25
        .float 0.16666667
        .float 0.16666667
-       .global convert_5_x_to_1_0_neon_asm
-       .type   convert_5_x_to_1_0_neon_asm, %function
-convert_5_x_to_1_0_neon_asm:
+function convert_5_x_to_1_0_neon_asm
        push {r4,lr}
 
        adr COEFF, coeff_5to1
@@ -219,9 +211,7 @@ coeff_7to4:
        .float 0.5
        .float 0.16666667
        .float 0.16666667
-       .global convert_7_x_to_4_0_neon_asm
-       .type   convert_7_x_to_4_0_neon_asm, %function
-convert_7_x_to_4_0_neon_asm:
+function convert_7_x_to_4_0_neon_asm
        push {r4,lr}
 
        adr COEFF, coeff_7to4
@@ -252,9 +242,7 @@ convert_7_x_to_4_0_neon_asm:
 coeff_5to4:
        .float 0.5
        .float 0.5
-       .global convert_5_x_to_4_0_neon_asm
-       .type   convert_5_x_to_4_0_neon_asm, %function
-convert_5_x_to_4_0_neon_asm:
+function convert_5_x_to_4_0_neon_asm
        push {r4,lr}
 
        adr COEFF, coeff_5to4
diff --git a/modules/arm_neon/yuyv_i422.S b/modules/arm_neon/yuyv_i422.S
index c3774f5d6a..637effe9bc 100644
--- a/modules/arm_neon/yuyv_i422.S
+++ b/modules/arm_neon/yuyv_i422.S
@@ -18,8 +18,12 @@
  @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  @****************************************************************************/
 
+#include "asm.S"
+
        .syntax unified
-       .fpu neon
+#if HAVE_AS_FPU_DIRECTIVE
+       .fpu    neon
+#endif
        .text
 
 #define I      r0
@@ -33,9 +37,7 @@
 #define YPAD   lr
 
        .align 2
-       .global yuyv_i422_neon
-       .type   yuyv_i422_neon, %function
-yuyv_i422_neon:
+function yuyv_i422_neon
        push            {r4-r6,lr}
        ldmia           r0,     {Y, U, V, YPAD}
        ldmia           r1,     {I, IPAD}
@@ -64,9 +66,7 @@ yuyv_i422_neon:
        add             V,      V,      YPAD,   lsr #1
        b               1b
 
-       .global uyvy_i422_neon
-       .type   uyvy_i422_neon, %function
-uyvy_i422_neon:
+function uyvy_i422_neon
        push            {r4-r6,lr}
        ldmia           r0,     {Y, U, V, YPAD}
        ldmia           r1,     {I, IPAD}
diff --git a/modules/video_filter/deinterlace/merge_arm.S 
b/modules/video_filter/deinterlace/merge_arm.S
index dd779029ae..d3f32c5ca6 100644
--- a/modules/video_filter/deinterlace/merge_arm.S
+++ b/modules/video_filter/deinterlace/merge_arm.S
@@ -18,10 +18,16 @@
  @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  @****************************************************************************/
 
+#include "../arm_neon/asm.S"
+
        .syntax unified
        .arm
+#if HAVE_AS_ARCH_DIRECTIVE
        .arch   armv6
+#endif
+#if HAVE_AS_FPU_DIRECTIVE
        .fpu    neon
+#endif
        .text
 
 #define        DEST    r0
@@ -30,10 +36,8 @@
 #define        SIZE    r3
 
        .align 2
-       .global merge8_arm_neon
-       .type   merge8_arm_neon, %function
        @ NOTE: Offset and pitch must be multiple of 16-bytes in VLC.
-merge8_arm_neon:
+function merge8_arm_neon
        cmp             SIZE,   #64
        blo             2f
 1:
@@ -72,9 +76,7 @@ merge8_arm_neon:
        bx              lr
 
        .align 2
-       .global merge16_arm_neon
-       .type   merge16_arm_neon, %function
-merge16_arm_neon:
+function merge16_arm_neon
        cmp             SIZE,   #64
        blo             2f
 1:
@@ -113,9 +115,7 @@ merge16_arm_neon:
        bx              lr
 
        .align 2
-       .global merge8_armv6
-       .type   merge8_armv6, %function
-merge8_armv6:
+function merge8_armv6
        push            {r4-r9,lr}
 1:
        pld             [SRC1, #64]
@@ -135,9 +135,7 @@ merge8_armv6:
        b               1b
 
        .align 2
-       .global merge16_armv6
-       .type   merge16_armv6, %function
-merge16_armv6:
+function merge16_armv6
        push            {r4-r9,lr}
 1:
        pld             [SRC1, #64]
diff --git a/modules/video_filter/deinterlace/merge_arm64.S 
b/modules/video_filter/deinterlace/merge_arm64.S
index db19e54caf..7b70678891 100644
--- a/modules/video_filter/deinterlace/merge_arm64.S
+++ b/modules/video_filter/deinterlace/merge_arm64.S
@@ -19,6 +19,8 @@
  // Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  
//****************************************************************************/
 
+#include "../../arm_neon/asm.S"
+
        .text
 
 #define        DEST    x0
@@ -27,10 +29,8 @@
 #define        SIZE    x3
 
        .align 2
-       .global merge8_arm64_neon
-       .type   merge8_arm64_neon, %function
        // NOTE: Offset and pitch must be multiple of 16-bytes in VLC.
-merge8_arm64_neon:
+function merge8_arm64_neon
        ands            x5, SIZE, #~63
        b.eq            2f
        mov             x10, #64
@@ -66,9 +66,7 @@ merge8_arm64_neon:
        ret
 
        .align 2
-       .global merge16_arm64_neon
-       .type   merge16_arm64_neon, %function
-merge16_arm64_neon:
+function merge16_arm64_neon
        ands            x5, SIZE, #~63
        b.eq            2f
 1:

_______________________________________________
vlc-commits mailing list
vlc-commits@videolan.org
https://mailman.videolan.org/listinfo/vlc-commits

Reply via email to