On 2016-01-01 22:42:46 +0200, Martin Storsjö wrote:
> Use two separate functions, depending on whether VFP/NEON is available.
>
> This is set to require armv5te - it uses blx, which is only available
> since armv5t, but we don't have a separate configure item for that.
> (It also uses ldrd, which requires armv5te, but this could be avoided
> if necessary.)
> ---
> Using HAVE_ARMV5TE_EXTERNAL, using have_neon/have_vfp with DCE to
> avoid undefined references, added a dummy parameter for 8 byte alignment,
> renamed the check function to _vfp/_novfp and rewrote those clobber
> tests to avoid NEON functions to make them work on plain VFPv2 as well.
> Moved out the fallback definition of define_new_emms, making it generic
> (and reusable for aarch64).
> ---
> tests/checkasm/arm/Makefile | 1 +
> tests/checkasm/arm/checkasm.S | 143
> +++++++++++++++++++++++++++++++++++++++++
> tests/checkasm/checkasm.c | 11 ++++
> tests/checkasm/checkasm.h | 13 ++++
> 4 files changed, 168 insertions(+)
> create mode 100644 tests/checkasm/arm/Makefile
> create mode 100644 tests/checkasm/arm/checkasm.S
>
> diff --git a/tests/checkasm/arm/Makefile b/tests/checkasm/arm/Makefile
> new file mode 100644
> index 0000000..55f2383
> --- /dev/null
> +++ b/tests/checkasm/arm/Makefile
> @@ -0,0 +1 @@
> +CHECKASMOBJS-$(HAVE_ARMV5TE_EXTERNAL) += arm/checkasm.o
> diff --git a/tests/checkasm/arm/checkasm.S b/tests/checkasm/arm/checkasm.S
> new file mode 100644
> index 0000000..478d59f
> --- /dev/null
> +++ b/tests/checkasm/arm/checkasm.S
> @@ -0,0 +1,143 @@
> +/****************************************************************************
> + * Assembly testing and benchmarking tool
> + * Copyright (c) 2015 Martin Storsjo
> + * Copyright (c) 2015 Janne Grunau
> + *
> + * This file is part of Libav.
> + *
> + * Libav is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * Libav is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
> +
> *****************************************************************************/
> +
> +#include "libavutil/arm/asm.S"
> +
> +const register_init
> + .quad 0x21f86d66c8ca00ce
> + .quad 0x75b6ba21077c48ad
> + .quad 0xed56bb2dcb3c7736
> + .quad 0x8bda43d3fd1a7e06
> + .quad 0xb64a9c9e5d318408
> + .quad 0xdf9a54b303f1d3a3
> + .quad 0x4a75479abd64e097
> + .quad 0x249214109d5d1c88
> +endconst
> +
> +const error_message
> + .asciz "failed to preserve register"
> +endconst
> +
> +@ max number of args used by any asm function.
> +#define MAX_ARGS 15
> +
> +#define ARG_STACK 4*(MAX_ARGS - 2)
> +
> +.macro clobbercheck variant
> +.equ pushed, 4*9
> +function checkasm_checked_call_\variant, export=1
> + push {r4-r11, lr}
> +.ifc \variant, vfp
> + vpush {q4-q7}
> +.equ pushed, pushed + 16*4
> +.endif
> +
> + movrel r12, register_init
> +.ifc \variant, vfp
> + vldm r12, {q4-q7}
{d8-d15}, gas seems to translate it who knows how other assemblers
handle it
> +.endif
> + ldm r12, {r4-r11}
> +
> + sub sp, sp, #ARG_STACK
> +.equ pos, 0
> +.rept MAX_ARGS-2
> + ldr r12, [sp, #ARG_STACK + pushed + 8 + pos]
> + str r12, [sp, #pos]
> +.equ pos, pos + 4
> +.endr
> +
> + mov r12, r0
> + mov r0, r2
> + mov r1, r3
> + ldrd r2, r3, [sp, #ARG_STACK + pushed]
> + blx r12
> + add sp, sp, #ARG_STACK
> +
> + push {r0, r1}
> + movrel r12, register_init
> + mov r3, #0
> +.ifc \variant, vfp
> + vldm r12, {q0-q3}
> +.macro check_reg_vfp reg1, reg2
> + vmov r0, \reg1
> + vmov r1, \reg2
> + eor r0, r0, r1
> + orr r3, r3, r0
> +.endm
> +
> + check_reg_vfp s0, s16
> + check_reg_vfp s1, s17
> + check_reg_vfp s2, s18
> + check_reg_vfp s3, s19
> + check_reg_vfp s4, s20
> + check_reg_vfp s5, s21
> + check_reg_vfp s6, s22
> + check_reg_vfp s7, s23
> + check_reg_vfp s8, s24
> + check_reg_vfp s9, s25
> + check_reg_vfp s10, s26
> + check_reg_vfp s11, s27
> + check_reg_vfp s12, s28
> + check_reg_vfp s13, s29
> + check_reg_vfp s14, s30
> + check_reg_vfp s15, s31
not really nice. I didn't realize that veor would be missing for the vfp
variant. The load could be integrated into the check_reg_vfp macro.
Since there is then only a single numeric paramter one could use .irp
Also there should be enough free regs to compare 64bit at once like
.macro check_reg_vfp, dreg, inc=8
ldrd r0, r1, [r12], #inc
vmov r2, lr, \dreg
eor r0, r2
eor r1, lr
orr r3, r0
orr r3, r1
.endm
.irp n,8,9,...
check_reg_vfp d\n
.endr
check_reg_vfp d15, -56
not that much nicer. decide yourself if prefer this or want to keep the
patch as it is. both ok for me
> +.purgem check_reg_vfp
> +.endif
> +
> +.macro check_reg reg1, reg2=
> + ldrd r0, r1, [r12], #8
> + eor r0, r0, \reg1
> + orr r3, r3, r0
> +.ifnb \reg2
> + eor r1, r1, \reg2
> + orr r3, r3, r1
> +.endif
> +.endm
> + check_reg r4, r5
> + check_reg r6, r7
> +@ r9 is a volatile register in the ios ABI
> +#ifdef __APPLE__
> + check_reg r8
> +#else
> + check_reg r8, r9
> +#endif
> + check_reg r10, r11
> +.purgem check_reg
> +
> + cmp r3, #0
> + beq 0f
> +
> + movrel r0, error_message
> + blx X(checkasm_fail_func)
> +0:
> + pop {r0, r1}
> +.ifc \variant, vfp
> + vpop {q4-q7}
same as for vpush
> +.endif
> + pop {r4-r11, pc}
> +endfunc
> +.endm
> +
> +#if HAVE_VFP || HAVE_NEON
I guess this needs to be _EXTERNAL too or we would have to add a .fpu
directive for the vfp variant
> +clobbercheck vfp
> +#endif
> +clobbercheck novfp
> diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
> index d6f8ffc..c75e431 100644
> --- a/tests/checkasm/checkasm.c
> +++ b/tests/checkasm/checkasm.c
> @@ -53,6 +53,12 @@
> #define isatty(fd) 1
> #endif
>
> +#if ARCH_ARM && HAVE_ARMV5TE_EXTERNAL
> +#include "libavutil/arm/cpu.h"
> +
> +void (*checkasm_checked_call)(void *func, int dummy, ...) =
> checkasm_checked_call_novfp;
> +#endif
> +
> /* List of tests to invoke */
> static const struct {
> const char *name;
> @@ -463,6 +469,11 @@ int main(int argc, char *argv[])
> {
> int i, seed, ret = 0;
>
> +#if ARCH_ARM && HAVE_ARMV5TE_EXTERNAL
> + if (have_vfp(av_get_cpu_flags()) || have_neon(av_get_cpu_flags()))
> + checkasm_checked_call = checkasm_checked_call_vfp;
> +#endif
> +
> if (!tests[0].func || !cpus[0].flag) {
> fprintf(stderr, "checkasm: no tests to perform\n");
> return 0;
> diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
> index 0bc66b9..2486cb5 100644
> --- a/tests/checkasm/checkasm.h
> +++ b/tests/checkasm/checkasm.h
> @@ -116,6 +116,15 @@ void checkasm_stack_clobber(uint64_t clobber, ...);
> (void *)checkasm_checked_call;
> #define call_new(...) checked_call(func_new, __VA_ARGS__)
> #endif
> +#elif ARCH_ARM && HAVE_ARMV5TE_EXTERNAL
> +/* Use a dummy argument, to offset the real parameters by 2, not only 1.
> + * This makes sure that potential 8-byte-alignment of parameters is kept the
> same
> + * even when the extra parameters have been removed. */
> +void checkasm_checked_call_vfp(void *func, int dummy, ...);
> +void checkasm_checked_call_novfp(void *func, int dummy, ...);
> +extern void (*checkasm_checked_call)(void *func, int dummy, ...);
> +#define declare_new(ret, ...) ret (*checked_call)(void *, int dummy,
> __VA_ARGS__) = (void *)checkasm_checked_call;
> +#define call_new(...) checked_call(func_new, 0, __VA_ARGS__)
> #else
> #define declare_new(ret, ...)
> #define declare_new_emms(cpu_flags, ret, ...)
> @@ -123,6 +132,10 @@ void checkasm_stack_clobber(uint64_t clobber, ...);
> #define call_new(...) ((func_type *)func_new)(__VA_ARGS__)
> #endif
>
> +#ifndef declare_new_emms
> +#define declare_new_emms(cpu_flags, ret, ...) declare_new(ret, __VA_ARGS__)
> +#endif
> +
> /* Benchmark the function */
> #ifdef AV_READ_TIME
> #define bench_new(...)\
otherwise ok
Janne
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel