On Mon, 4 Jan 2016, Janne Grunau wrote:

On 2016-01-01 22:42:46 +0200, Martin Storsjö wrote:
Use two separate functions, depending on whether VFP/NEON is available.

This is set to require armv5te - it uses blx, which is only available
since armv5t, but we don't have a separate configure item for that.
(It also uses ldrd, which requires armv5te, but this could be avoided
if necessary.)
---
Using HAVE_ARMV5TE_EXTERNAL, using have_neon/have_vfp with DCE to
avoid undefined references, added a dummy parameter for 8 byte alignment,
renamed the check function to _vfp/_novfp and rewrote those clobber
tests to avoid NEON functions to make them work on plain VFPv2 as well.
Moved out the fallback definition of define_new_emms, making it generic
(and reusable for aarch64).
---
 tests/checkasm/arm/Makefile   |    1 +
 tests/checkasm/arm/checkasm.S |  143 +++++++++++++++++++++++++++++++++++++++++
 tests/checkasm/checkasm.c     |   11 ++++
 tests/checkasm/checkasm.h     |   13 ++++
 4 files changed, 168 insertions(+)
 create mode 100644 tests/checkasm/arm/Makefile
 create mode 100644 tests/checkasm/arm/checkasm.S

diff --git a/tests/checkasm/arm/Makefile b/tests/checkasm/arm/Makefile
new file mode 100644
index 0000000..55f2383
--- /dev/null
+++ b/tests/checkasm/arm/Makefile
@@ -0,0 +1 @@
+CHECKASMOBJS-$(HAVE_ARMV5TE_EXTERNAL) += arm/checkasm.o
diff --git a/tests/checkasm/arm/checkasm.S b/tests/checkasm/arm/checkasm.S
new file mode 100644
index 0000000..478d59f
--- /dev/null
+++ b/tests/checkasm/arm/checkasm.S
@@ -0,0 +1,143 @@
+/****************************************************************************
+ * Assembly testing and benchmarking tool
+ * Copyright (c) 2015 Martin Storsjo
+ * Copyright (c) 2015 Janne Grunau
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *****************************************************************************/
+
+#include "libavutil/arm/asm.S"
+
+const register_init
+    .quad 0x21f86d66c8ca00ce
+    .quad 0x75b6ba21077c48ad
+    .quad 0xed56bb2dcb3c7736
+    .quad 0x8bda43d3fd1a7e06
+    .quad 0xb64a9c9e5d318408
+    .quad 0xdf9a54b303f1d3a3
+    .quad 0x4a75479abd64e097
+    .quad 0x249214109d5d1c88
+endconst
+
+const error_message
+    .asciz "failed to preserve register"
+endconst
+
+@ max number of args used by any asm function.
+#define MAX_ARGS 15
+
+#define ARG_STACK 4*(MAX_ARGS - 2)
+
+.macro clobbercheck variant
+.equ pushed, 4*9
+function checkasm_checked_call_\variant, export=1
+    push        {r4-r11, lr}
+.ifc \variant, vfp
+    vpush       {q4-q7}
+.equ pushed, pushed + 16*4
+.endif
+
+    movrel      r12, register_init
+.ifc \variant, vfp
+    vldm        r12, {q4-q7}

{d8-d15}, gas seems to translate it who knows how other assemblers
handle it

Sure

+.endif
+    ldm         r12, {r4-r11}
+
+    sub         sp,  sp,  #ARG_STACK
+.equ pos, 0
+.rept MAX_ARGS-2
+    ldr         r12, [sp, #ARG_STACK + pushed + 8 + pos]
+    str         r12, [sp, #pos]
+.equ pos, pos + 4
+.endr
+
+    mov         r12, r0
+    mov         r0,  r2
+    mov         r1,  r3
+    ldrd        r2,  r3,  [sp, #ARG_STACK + pushed]
+    blx         r12
+    add         sp,  sp,  #ARG_STACK
+
+    push        {r0, r1}
+    movrel      r12, register_init
+    mov         r3,  #0
+.ifc \variant, vfp
+    vldm        r12, {q0-q3}
+.macro check_reg_vfp reg1, reg2
+    vmov        r0,  \reg1
+    vmov        r1,  \reg2
+    eor         r0,  r0,  r1
+    orr         r3,  r3,  r0
+.endm
+
+    check_reg_vfp s0,  s16
+    check_reg_vfp s1,  s17
+    check_reg_vfp s2,  s18
+    check_reg_vfp s3,  s19
+    check_reg_vfp s4,  s20
+    check_reg_vfp s5,  s21
+    check_reg_vfp s6,  s22
+    check_reg_vfp s7,  s23
+    check_reg_vfp s8,  s24
+    check_reg_vfp s9,  s25
+    check_reg_vfp s10, s26
+    check_reg_vfp s11, s27
+    check_reg_vfp s12, s28
+    check_reg_vfp s13, s29
+    check_reg_vfp s14, s30
+    check_reg_vfp s15, s31

not really nice. I didn't realize that veor would be missing for the vfp
variant. The load could be integrated into the check_reg_vfp macro.
Since there is then only a single numeric paramter one could use .irp
Also there should be enough free regs to compare 64bit at once like

.macro check_reg_vfp, dreg, inc=8
 ldrd r0, r1, [r12], #inc
 vmov r2, lr, \dreg
 eor  r0, r2
 eor  r1, lr
 orr  r3, r0
 orr  r3, r1
.endm

.irp n,8,9,...
 check_reg_vfp d\n
.endr
 check_reg_vfp d15, -56

not that much nicer. decide yourself if prefer this or want to keep the
patch as it is. both ok for me

Hmm, that would indeed be nicer - not even being able to use some sort of repeat for the register list now is annoying.

+.purgem check_reg_vfp
+.endif
+
+.macro check_reg reg1, reg2=
+    ldrd        r0,  r1,  [r12], #8
+    eor         r0,  r0, \reg1
+    orr         r3,  r3, r0
+.ifnb \reg2
+    eor         r1,  r1, \reg2
+    orr         r3,  r3, r1
+.endif
+.endm
+    check_reg   r4,  r5
+    check_reg   r6,  r7
+@ r9 is a volatile register in the ios ABI
+#ifdef __APPLE__
+    check_reg   r8
+#else
+    check_reg   r8,  r9
+#endif
+    check_reg   r10, r11
+.purgem check_reg
+
+    cmp         r3,  #0
+    beq         0f
+
+    movrel      r0, error_message
+    blx         X(checkasm_fail_func)
+0:
+    pop         {r0, r1}
+.ifc \variant, vfp
+    vpop        {q4-q7}

same as for vpush

Ok

+.endif
+    pop         {r4-r11, pc}
+endfunc
+.endm
+
+#if HAVE_VFP || HAVE_NEON

I guess this needs to be _EXTERNAL too or we would have to add a .fpu
directive for the vfp variant

Hmm, no? We already have .fpu neon or .fpu vfp (in libavutil/arm/asm.S) if HAVE_NEON or HAVE_VFP are set - isn't that enough?

// Martin
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to