On Thu, 31 Dec 2015, Janne Grunau wrote:

On 2015-12-31 12:46:39 +0200, Martin Storsjö wrote:
Use two separate functions, depending on whether neon is available.

should be neon or vfp

Ah, yes, so if either NEON or VFPv3 is available (or just check for VFPv3?).

This is set to require armv5te - it uses blx, which is only available
since armv5t, but we don't have a separate configure item for that.
(It also uses ldrd, which requires armv5te, but this could be avoided
if necessary.)

This breaks running checkasm on armv4 with runtime cpu detection. It's
not very useful since it only checks if two consecutive calls of the c
variant return equal/similar results.

Hmm, indeed. Shouldn't this be fixed by using HAVE_ARMV5TE_EXTERNAL instead?

---
Compared to x264's checkasm, this is slightly simpler since it only
is expected to return void, i.e. no issues with handling of 64 bit return
values vs function pointer type casting. (The assembly wrapper still tries
to maintain the return value intact though, even though it's currently
not used.) It also is simplified since it doesn't use separate parameter
for indicating failures (avoiding one extra register to backup, and
reducing the number of parameters to shift by from 2 to 1).

I think that is actually a disadvantage because it breaks type which are
naturally aligned to 8-byte. Those are assigned register pairs starting
with an even register or 8-byte aligned stack address. If we shift only
one register, that rule is broken so we need a second dummy register.

Hmm, ok, will try to fix.

Other than
that, there's only been some minor touch-ups, using more of libav's
asm.S and using #ifdef __APPLE__ instead of #if SYS_DARWIN.

If all calls would include the cpuflags like declare_new_emms,
one wouldn't need the neon vs noneon function selection at startup.

declare_new_emms doesn't include cpuflags, it specifies for which cpu
flags a emms is required. the actual (masked) cpuflags are queried by
av_get_cpu_flags(). That would work two but the startup check is more
correct since the neon/vfp checked_call variant should be always used if
the cpu supports it.

Ah, thanks for clarifying it.

---
 tests/checkasm/arm/Makefile   |    1 +
 tests/checkasm/arm/checkasm.S |  132 +++++++++++++++++++++++++++++++++++++++++
 tests/checkasm/checkasm.c     |    9 +++
 tests/checkasm/checkasm.h     |    8 +++
 4 files changed, 150 insertions(+)
 create mode 100644 tests/checkasm/arm/Makefile
 create mode 100644 tests/checkasm/arm/checkasm.S

diff --git a/tests/checkasm/arm/Makefile b/tests/checkasm/arm/Makefile
new file mode 100644
index 0000000..f73da18
--- /dev/null
+++ b/tests/checkasm/arm/Makefile
@@ -0,0 +1 @@
+CHECKASMOBJS-$(HAVE_ARMV5TE) += arm/checkasm.o
diff --git a/tests/checkasm/arm/checkasm.S b/tests/checkasm/arm/checkasm.S
new file mode 100644
index 0000000..d1740bc
--- /dev/null
+++ b/tests/checkasm/arm/checkasm.S
@@ -0,0 +1,132 @@
+/****************************************************************************
+ * Assembly testing and benchmarking tool
+ * Copyright (c) 2015 Martin Storsjo
+ * Copyright (c) 2015 Janne Grunau
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *****************************************************************************/
+
+#include "libavutil/arm/asm.S"
+
+const register_init
+    .quad 0x21f86d66c8ca00ce
+    .quad 0x75b6ba21077c48ad
+    .quad 0xed56bb2dcb3c7736
+    .quad 0x8bda43d3fd1a7e06
+    .quad 0xb64a9c9e5d318408
+    .quad 0xdf9a54b303f1d3a3
+    .quad 0x4a75479abd64e097
+    .quad 0x249214109d5d1c88
+endconst
+
+const error_message
+    .asciz "failed to preserve register"
+endconst
+
+@ max number of args used by any asm function.
+#define MAX_ARGS 15
+
+#define ARG_STACK 4*(MAX_ARGS - 3)
+
+.macro clobbercheck variant
+.equ pushed, 4*9
+function checkasm_checked_call_\variant, export=1
+    push        {r4-r11, lr}
+.ifc \variant, neon
+    vpush       {q4-q7}
+.equ pushed, pushed + 16*4
+.endif
+
+    movrel      r12, register_init
+.ifc \variant, neon
+    vldm        r12, {q4-q7}
+.endif
+    ldm         r12, {r4-r11}
+
+    sub         sp,  sp,  #ARG_STACK
+.equ pos, 0
+.rept MAX_ARGS-3
+    ldr         r12, [sp, #ARG_STACK + pushed + 4 + pos]
+    str         r12, [sp, #pos]
+.equ pos, pos + 4
+.endr
+
+    mov         r12, r0
+    mov         r0,  r1
+    mov         r1,  r2
+    mov         r2,  r3
+    ldr         r3,  [sp, #ARG_STACK + pushed]
+    blx         r12
+    add         sp,  sp,  #ARG_STACK
+
+    push        {r0, r1}
+    movrel      r12, register_init
+.ifc \variant, neon
+    vldm        r12, {q0-q3}
+    veor        q0,  q0,  q4
+    veor        q1,  q1,  q5
+    veor        q2,  q2,  q6
+    veor        q3,  q3,  q7
+    vorr        q0,  q0,  q1
+    vorr        q0,  q0,  q2
+    vorr        q0,  q0,  q3
+    vorr        d0,  d0,  d1
+    vrev64.32   d1,  d0
+    vorr        d0,  d0,  d1
+    vmov.32     r3,  d0[0]
+.else
+    mov         r3,  #0
+.endif
+
+.macro check_reg reg1, reg2=
+    ldrd        r0,  r1,  [r12], #8
+    eor         r0,  r0, \reg1
+    orr         r3,  r3, r0
+.ifnb \reg2
+    eor         r1,  r1, \reg2
+    orr         r3,  r3, r1
+.endif
+.endm
+    check_reg   r4,  r5
+    check_reg   r6,  r7
+@ r9 is a volatile register in the ios ABI
+#ifdef __APPLE__
+    check_reg   r8
+#else
+    check_reg   r8,  r9
+#endif
+    check_reg   r10, r11
+.purgem check_reg
+
+    cmp         r3,  #0
+    beq         0f
+
+    movrel      r0, error_message
+    blx         X(checkasm_fail_func)
+0:
+    pop         {r0, r1}
+.ifc \variant, neon
+    vpop        {q4-q7}
+.endif
+    pop         {r4-r11, pc}
+endfunc
+.endm
+
+#if HAVE_NEON
+clobbercheck neon
+#endif
+clobbercheck noneon

except for the alignment issues for types with 8-byte alignment ok

diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index d6f8ffc..2c0bbbe 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -53,6 +53,10 @@
 #define isatty(fd) 1
 #endif

+#if HAVE_ARMV5TE
+void (*checkasm_checked_call)(void *func, ...) = checkasm_checked_call_noneon;
+#endif
+
 /* List of tests to invoke */
 static const struct {
     const char *name;
@@ -463,6 +467,11 @@ int main(int argc, char *argv[])
 {
     int i, seed, ret = 0;

+#if HAVE_ARMV5TE && HAVE_NEON
+    if (av_get_cpu_flags() & AV_CPU_FLAG_NEON)
+        checkasm_checked_call = checkasm_checked_call_neon;
+#endif

I think it would be cleaner to have this in a single ARCH_ARM block here

Hmm, can you elaborate a little? We need to have at least an ifdef check for HAVE_NEON (or rather VFPV3) to avoid undefined references - is there some existing ARCH_ARM block you suggest I should merge it into?

// Martin
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to