On Mon, 4 Jan 2016, Janne Grunau wrote:
On 2016-01-01 22:42:46 +0200, Martin Storsjö wrote:Use two separate functions, depending on whether VFP/NEON is available.This is set to require armv5te - it uses blx, which is only available since armv5t, but we don't have a separate configure item for that. (It also uses ldrd, which requires armv5te, but this could be avoided if necessary.) --- Using HAVE_ARMV5TE_EXTERNAL, using have_neon/have_vfp with DCE to avoid undefined references, added a dummy parameter for 8 byte alignment, renamed the check function to _vfp/_novfp and rewrote those clobber tests to avoid NEON functions to make them work on plain VFPv2 as well. Moved out the fallback definition of define_new_emms, making it generic (and reusable for aarch64). --- tests/checkasm/arm/Makefile | 1 + tests/checkasm/arm/checkasm.S | 143 +++++++++++++++++++++++++++++++++++++++++ tests/checkasm/checkasm.c | 11 ++++ tests/checkasm/checkasm.h | 13 ++++ 4 files changed, 168 insertions(+) create mode 100644 tests/checkasm/arm/Makefile create mode 100644 tests/checkasm/arm/checkasm.S diff --git a/tests/checkasm/arm/Makefile b/tests/checkasm/arm/Makefile new file mode 100644 index 0000000..55f2383 --- /dev/null +++ b/tests/checkasm/arm/Makefile @@ -0,0 +1 @@ +CHECKASMOBJS-$(HAVE_ARMV5TE_EXTERNAL) += arm/checkasm.o diff --git a/tests/checkasm/arm/checkasm.S b/tests/checkasm/arm/checkasm.S new file mode 100644 index 0000000..478d59f --- /dev/null +++ b/tests/checkasm/arm/checkasm.S @@ -0,0 +1,143 @@ +/**************************************************************************** + * Assembly testing and benchmarking tool + * Copyright (c) 2015 Martin Storsjo + * Copyright (c) 2015 Janne Grunau + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + *****************************************************************************/ + +#include "libavutil/arm/asm.S" + +const register_init + .quad 0x21f86d66c8ca00ce + .quad 0x75b6ba21077c48ad + .quad 0xed56bb2dcb3c7736 + .quad 0x8bda43d3fd1a7e06 + .quad 0xb64a9c9e5d318408 + .quad 0xdf9a54b303f1d3a3 + .quad 0x4a75479abd64e097 + .quad 0x249214109d5d1c88 +endconst + +const error_message + .asciz "failed to preserve register" +endconst + +@ max number of args used by any asm function. +#define MAX_ARGS 15 + +#define ARG_STACK 4*(MAX_ARGS - 2) + +.macro clobbercheck variant +.equ pushed, 4*9 +function checkasm_checked_call_\variant, export=1 + push {r4-r11, lr} +.ifc \variant, vfp + vpush {q4-q7} +.equ pushed, pushed + 16*4 +.endif + + movrel r12, register_init +.ifc \variant, vfp + vldm r12, {q4-q7}{d8-d15}, gas seems to translate it who knows how other assemblers handle it
Sure
+.endif + ldm r12, {r4-r11} + + sub sp, sp, #ARG_STACK +.equ pos, 0 +.rept MAX_ARGS-2 + ldr r12, [sp, #ARG_STACK + pushed + 8 + pos] + str r12, [sp, #pos] +.equ pos, pos + 4 +.endr + + mov r12, r0 + mov r0, r2 + mov r1, r3 + ldrd r2, r3, [sp, #ARG_STACK + pushed] + blx r12 + add sp, sp, #ARG_STACK + + push {r0, r1} + movrel r12, register_init + mov r3, #0 +.ifc \variant, vfp + vldm r12, {q0-q3} +.macro check_reg_vfp reg1, reg2 + vmov r0, \reg1 + vmov r1, \reg2 + eor r0, r0, r1 + orr r3, r3, r0 +.endm + + check_reg_vfp s0, s16 + check_reg_vfp s1, s17 + check_reg_vfp s2, s18 + check_reg_vfp s3, s19 + check_reg_vfp s4, s20 + check_reg_vfp s5, s21 + check_reg_vfp s6, s22 + check_reg_vfp s7, s23 + check_reg_vfp s8, s24 + check_reg_vfp s9, s25 + check_reg_vfp s10, s26 + check_reg_vfp s11, s27 + check_reg_vfp s12, s28 + check_reg_vfp s13, s29 + check_reg_vfp s14, s30 + check_reg_vfp s15, s31not really nice. I didn't realize that veor would be missing for the vfp variant. The load could be integrated into the check_reg_vfp macro. Since there is then only a single numeric paramter one could use .irp Also there should be enough free regs to compare 64bit at once like .macro check_reg_vfp, dreg, inc=8 ldrd r0, r1, [r12], #inc vmov r2, lr, \dreg eor r0, r2 eor r1, lr orr r3, r0 orr r3, r1 .endm .irp n,8,9,... check_reg_vfp d\n .endr check_reg_vfp d15, -56 not that much nicer. decide yourself if prefer this or want to keep the patch as it is. both ok for me
Hmm, that would indeed be nicer - not even being able to use some sort of repeat for the register list now is annoying.
+.purgem check_reg_vfp +.endif + +.macro check_reg reg1, reg2= + ldrd r0, r1, [r12], #8 + eor r0, r0, \reg1 + orr r3, r3, r0 +.ifnb \reg2 + eor r1, r1, \reg2 + orr r3, r3, r1 +.endif +.endm + check_reg r4, r5 + check_reg r6, r7 +@ r9 is a volatile register in the ios ABI +#ifdef __APPLE__ + check_reg r8 +#else + check_reg r8, r9 +#endif + check_reg r10, r11 +.purgem check_reg + + cmp r3, #0 + beq 0f + + movrel r0, error_message + blx X(checkasm_fail_func) +0: + pop {r0, r1} +.ifc \variant, vfp + vpop {q4-q7}same as for vpush
Ok
+.endif + pop {r4-r11, pc} +endfunc +.endm + +#if HAVE_VFP || HAVE_NEONI guess this needs to be _EXTERNAL too or we would have to add a .fpu directive for the vfp variant
Hmm, no? We already have .fpu neon or .fpu vfp (in libavutil/arm/asm.S) if HAVE_NEON or HAVE_VFP are set - isn't that enough?
// Martin _______________________________________________ libav-devel mailing list [email protected] https://lists.libav.org/mailman/listinfo/libav-devel
