Use two separate functions, depending on whether VFP/NEON is available.
This is set to require armv5te - it uses blx, which is only available
since armv5t, but we don't have a separate configure item for that.
(It also uses ldrd, which requires armv5te, but this could be avoided
if necessary.)
---
Using HAVE_ARMV5TE_EXTERNAL, using have_neon/have_vfp with DCE to
avoid undefined references, added a dummy parameter for 8 byte alignment,
renamed the check function to _vfp/_novfp and rewrote those clobber
tests to avoid NEON functions to make them work on plain VFPv2 as well.
Moved out the fallback definition of define_new_emms, making it generic
(and reusable for aarch64).
---
tests/checkasm/arm/Makefile | 1 +
tests/checkasm/arm/checkasm.S | 143 +++++++++++++++++++++++++++++++++++++++++
tests/checkasm/checkasm.c | 11 ++++
tests/checkasm/checkasm.h | 13 ++++
4 files changed, 168 insertions(+)
create mode 100644 tests/checkasm/arm/Makefile
create mode 100644 tests/checkasm/arm/checkasm.S
diff --git a/tests/checkasm/arm/Makefile b/tests/checkasm/arm/Makefile
new file mode 100644
index 0000000..55f2383
--- /dev/null
+++ b/tests/checkasm/arm/Makefile
@@ -0,0 +1 @@
+CHECKASMOBJS-$(HAVE_ARMV5TE_EXTERNAL) += arm/checkasm.o
diff --git a/tests/checkasm/arm/checkasm.S b/tests/checkasm/arm/checkasm.S
new file mode 100644
index 0000000..478d59f
--- /dev/null
+++ b/tests/checkasm/arm/checkasm.S
@@ -0,0 +1,143 @@
+/****************************************************************************
+ * Assembly testing and benchmarking tool
+ * Copyright (c) 2015 Martin Storsjo
+ * Copyright (c) 2015 Janne Grunau
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
+ *****************************************************************************/
+
+#include "libavutil/arm/asm.S"
+
+const register_init
+ .quad 0x21f86d66c8ca00ce
+ .quad 0x75b6ba21077c48ad
+ .quad 0xed56bb2dcb3c7736
+ .quad 0x8bda43d3fd1a7e06
+ .quad 0xb64a9c9e5d318408
+ .quad 0xdf9a54b303f1d3a3
+ .quad 0x4a75479abd64e097
+ .quad 0x249214109d5d1c88
+endconst
+
+const error_message
+ .asciz "failed to preserve register"
+endconst
+
+@ max number of args used by any asm function.
+#define MAX_ARGS 15
+
+#define ARG_STACK 4*(MAX_ARGS - 2)
+
+.macro clobbercheck variant
+.equ pushed, 4*9
+function checkasm_checked_call_\variant, export=1
+ push {r4-r11, lr}
+.ifc \variant, vfp
+ vpush {q4-q7}
+.equ pushed, pushed + 16*4
+.endif
+
+ movrel r12, register_init
+.ifc \variant, vfp
+ vldm r12, {q4-q7}
+.endif
+ ldm r12, {r4-r11}
+
+ sub sp, sp, #ARG_STACK
+.equ pos, 0
+.rept MAX_ARGS-2
+ ldr r12, [sp, #ARG_STACK + pushed + 8 + pos]
+ str r12, [sp, #pos]
+.equ pos, pos + 4
+.endr
+
+ mov r12, r0
+ mov r0, r2
+ mov r1, r3
+ ldrd r2, r3, [sp, #ARG_STACK + pushed]
+ blx r12
+ add sp, sp, #ARG_STACK
+
+ push {r0, r1}
+ movrel r12, register_init
+ mov r3, #0
+.ifc \variant, vfp
+ vldm r12, {q0-q3}
+.macro check_reg_vfp reg1, reg2
+ vmov r0, \reg1
+ vmov r1, \reg2
+ eor r0, r0, r1
+ orr r3, r3, r0
+.endm
+
+ check_reg_vfp s0, s16
+ check_reg_vfp s1, s17
+ check_reg_vfp s2, s18
+ check_reg_vfp s3, s19
+ check_reg_vfp s4, s20
+ check_reg_vfp s5, s21
+ check_reg_vfp s6, s22
+ check_reg_vfp s7, s23
+ check_reg_vfp s8, s24
+ check_reg_vfp s9, s25
+ check_reg_vfp s10, s26
+ check_reg_vfp s11, s27
+ check_reg_vfp s12, s28
+ check_reg_vfp s13, s29
+ check_reg_vfp s14, s30
+ check_reg_vfp s15, s31
+.purgem check_reg_vfp
+.endif
+
+.macro check_reg reg1, reg2=
+ ldrd r0, r1, [r12], #8
+ eor r0, r0, \reg1
+ orr r3, r3, r0
+.ifnb \reg2
+ eor r1, r1, \reg2
+ orr r3, r3, r1
+.endif
+.endm
+ check_reg r4, r5
+ check_reg r6, r7
+@ r9 is a volatile register in the ios ABI
+#ifdef __APPLE__
+ check_reg r8
+#else
+ check_reg r8, r9
+#endif
+ check_reg r10, r11
+.purgem check_reg
+
+ cmp r3, #0
+ beq 0f
+
+ movrel r0, error_message
+ blx X(checkasm_fail_func)
+0:
+ pop {r0, r1}
+.ifc \variant, vfp
+ vpop {q4-q7}
+.endif
+ pop {r4-r11, pc}
+endfunc
+.endm
+
+#if HAVE_VFP || HAVE_NEON
+clobbercheck vfp
+#endif
+clobbercheck novfp
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index d6f8ffc..c75e431 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -53,6 +53,12 @@
#define isatty(fd) 1
#endif
+#if ARCH_ARM && HAVE_ARMV5TE_EXTERNAL
+#include "libavutil/arm/cpu.h"
+
+void (*checkasm_checked_call)(void *func, int dummy, ...) =
checkasm_checked_call_novfp;
+#endif
+
/* List of tests to invoke */
static const struct {
const char *name;
@@ -463,6 +469,11 @@ int main(int argc, char *argv[])
{
int i, seed, ret = 0;
+#if ARCH_ARM && HAVE_ARMV5TE_EXTERNAL
+ if (have_vfp(av_get_cpu_flags()) || have_neon(av_get_cpu_flags()))
+ checkasm_checked_call = checkasm_checked_call_vfp;
+#endif
+
if (!tests[0].func || !cpus[0].flag) {
fprintf(stderr, "checkasm: no tests to perform\n");
return 0;
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index 0bc66b9..2486cb5 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -116,6 +116,15 @@ void checkasm_stack_clobber(uint64_t clobber, ...);
(void *)checkasm_checked_call;
#define call_new(...) checked_call(func_new, __VA_ARGS__)
#endif
+#elif ARCH_ARM && HAVE_ARMV5TE_EXTERNAL
+/* Use a dummy argument, to offset the real parameters by 2, not only 1.
+ * This makes sure that potential 8-byte-alignment of parameters is kept the
same
+ * even when the extra parameters have been removed. */
+void checkasm_checked_call_vfp(void *func, int dummy, ...);
+void checkasm_checked_call_novfp(void *func, int dummy, ...);
+extern void (*checkasm_checked_call)(void *func, int dummy, ...);
+#define declare_new(ret, ...) ret (*checked_call)(void *, int dummy,
__VA_ARGS__) = (void *)checkasm_checked_call;
+#define call_new(...) checked_call(func_new, 0, __VA_ARGS__)
#else
#define declare_new(ret, ...)
#define declare_new_emms(cpu_flags, ret, ...)
@@ -123,6 +132,10 @@ void checkasm_stack_clobber(uint64_t clobber, ...);
#define call_new(...) ((func_type *)func_new)(__VA_ARGS__)
#endif
+#ifndef declare_new_emms
+#define declare_new_emms(cpu_flags, ret, ...) declare_new(ret, __VA_ARGS__)
+#endif
+
/* Benchmark the function */
#ifdef AV_READ_TIME
#define bench_new(...)\
--
1.7.10.4
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel