On Fri, Jul 3, 2015 at 1:19 AM, Janne Grunau <[email protected]> wrote:
> Thanks for the work. Checkasm style asm function level unit tests are
> helpful when writing asm optimizations (at least when the tests is
> already written by someone else ;)).
>
> I started working on build system integration. It is somewhat tricky so
> giving pointers without doing it myself is hard.

The latest version of that patch already had a working build system
integration though but IIRC it was rejected so I pretty much just
dropped the project since there wasn't any interest in it.

Attaching a version from 2014-11-09. I might have some additional
changes locally on top of that, but I'm not at home right now so I
can't check it at this moment.

/Henrik
From 6efacab3472889ba6acb88d60d7ca7413512edb8 Mon Sep 17 00:00:00 2001
From: Henrik Gramner <[email protected]>
Date: Sun, 9 Nov 2014 15:43:40 +0100
Subject: [PATCH] Checkasm: assembly testing and benchmarking tool

It provides the following features:
* verify correctness by comparing output to the C version.
* detect failure to save and restore clobbered callee-saved registers.
* detect 32-bit parameters being used as if they were 64-bit in x86-64
  (the upper halves are not guaranteed to be zero - but in practice they
  very often are, which makes those bugs hard to spot otherwise).
* easy benchmarking.

Compile by running 'make checkasm'.
Execute by running 'tests/checkasm/checkasm'.

Optional arguments are '--bench' to run benchmarks for all functions,
'--bench=<pattern>' to run benchmarks for all functions that starts with
<pattern>, and '<integer>' to seed the PRNG for reproducable results.

Contains unit tests for most h264pred functions to get started,
more tests can easily be added afterwards.

Loosely based on code from x264. Currently only supports x86 and x86-64.

Note that functions with floating point parameters or return values are
not yet supported. Some preprocessor hacks would probably be required
to add support for that without relying on compiler-specific features.
---
 .gitignore                      |   1 +
 tests/Makefile                  |   2 +
 tests/checkasm/Makefile         |  27 +++
 tests/checkasm/checkasm.c       | 462 ++++++++++++++++++++++++++++++++++++++++
 tests/checkasm/checkasm.h       | 123 +++++++++++
 tests/checkasm/h264pred.c       | 253 ++++++++++++++++++++++
 tests/checkasm/x86/Makefile     |   6 +
 tests/checkasm/x86/checkasm.asm | 194 +++++++++++++++++
 8 files changed, 1068 insertions(+)
 create mode 100644 tests/checkasm/Makefile
 create mode 100644 tests/checkasm/checkasm.c
 create mode 100644 tests/checkasm/checkasm.h
 create mode 100644 tests/checkasm/h264pred.c
 create mode 100644 tests/checkasm/x86/Makefile
 create mode 100644 tests/checkasm/x86/checkasm.asm

diff --git a/.gitignore b/.gitignore
index 19bb757..af3bcd0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -46,6 +46,7 @@
 /libavutil/avconfig.h
 /tests/audiogen
 /tests/base64
+/tests/checkasm/checkasm
 /tests/data/
 /tests/pixfmts.mak
 /tests/rotozoom
diff --git a/tests/Makefile b/tests/Makefile
index a2ee79b..7335e59 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -173,5 +173,7 @@ testclean:
 
 -include $(wildcard tests/*.d)
 
+include $(SRC_PATH)/tests/checkasm/Makefile
+
 .PHONY: fate* lcov lcov-reset
 .INTERMEDIATE: coverage.info
diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
new file mode 100644
index 0000000..6273bad
--- /dev/null
+++ b/tests/checkasm/Makefile
@@ -0,0 +1,27 @@
+CHECKASMOBJS-$(CONFIG_H264PRED) += h264pred.o
+
+-include $(SRC_PATH)/tests/checkasm/$(ARCH)/Makefile
+
+CHECKASMOBJS += $(CHECKASMOBJS-yes) checkasm.o
+CHECKASMOBJS := $(CHECKASMOBJS:%=tests/checkasm/%)
+
+-include $(CHECKASMOBJS:.o=.d)
+
+CHECKASM := $(SRC_PATH)/tests/checkasm/checkasm$(EXESUF)
+
+# We rely on function pointers intentionally declared without specified argument types.
+tests/checkasm/%.o: CFLAGS := $(subst -Wstrict-prototypes,,$(CFLAGS))
+
+checkasm: $(CHECKASM)
+
+$(CHECKASM): $(CHECKASMOBJS) $(FF_DEP_LIBS)
+	$(LD) $(LDFLAGS) $(LDEXEFLAGS) $(LD_O) $(CHECKASMOBJS) $(FF_EXTRALIBS)
+
+clean:: checkasmclean
+
+checkasmclean:
+	$(RM) $(CLEANSUFFIXES:%=tests/checkasm/%)
+	-$(RM) $(CLEANSUFFIXES:%=tests/checkasm/$(ARCH)/%)
+	$(RM) $(SRC_PATH)/tests/checkasm/checkasm$(EXESUF)
+
+.PHONY: checkasm
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
new file mode 100644
index 0000000..f9a1c71
--- /dev/null
+++ b/tests/checkasm/checkasm.c
@@ -0,0 +1,462 @@
+/*
+ * Assembly testing and benchmarking tool
+ * Copyright (c) 2014 Henrik Gramner
+ * Copyright (c) 2008 Loren Merritt
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Libav; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <assert.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "checkasm.h"
+#include "libavutil/common.h"
+#include "libavutil/random_seed.h"
+
+#if ARCH_X86
+#include "libavutil/x86/cpu.h"
+#endif
+
+#if HAVE_SETCONSOLETEXTATTRIBUTE
+#include <windows.h>
+#define COLOR_RED    FOREGROUND_RED
+#define COLOR_GREEN  FOREGROUND_GREEN
+#define COLOR_YELLOW (FOREGROUND_RED|FOREGROUND_GREEN)
+#else
+#define COLOR_RED    1
+#define COLOR_GREEN  2
+#define COLOR_YELLOW 3
+#endif
+
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#if !HAVE_ISATTY
+#define isatty(fd) 1
+#endif
+
+/* List of tests to invoke */
+static void (* const tests[])(void) = {
+#if CONFIG_H264PRED
+    checkasm_check_h264pred,
+#endif
+    NULL
+};
+
+/* List of cpu flags to check */
+static const struct {
+    const char *name;
+    const char *suffix;
+    int flag;
+} cpus[] = {
+#if ARCH_X86
+    { "MMX",      "mmx",      AV_CPU_FLAG_MMX|AV_CPU_FLAG_CMOV },
+    { "MMXEXT",   "mmxext",   AV_CPU_FLAG_MMXEXT },
+    { "3DNOW",    "3dnow",    AV_CPU_FLAG_3DNOW },
+    { "3DNOWEXT", "3dnowext", AV_CPU_FLAG_3DNOWEXT },
+    { "SSE",      "sse",      AV_CPU_FLAG_SSE },
+    { "SSE2",     "sse2",     AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW },
+    { "SSE3",     "sse3",     AV_CPU_FLAG_SSE3|AV_CPU_FLAG_SSE3SLOW },
+    { "SSSE3",    "ssse3",    AV_CPU_FLAG_SSSE3|AV_CPU_FLAG_ATOM },
+    { "SSE4.1",   "sse4",     AV_CPU_FLAG_SSE4 },
+    { "SSE4.2",   "sse42",    AV_CPU_FLAG_SSE42 },
+    { "AVX",      "avx",      AV_CPU_FLAG_AVX },
+    { "XOP",      "xop",      AV_CPU_FLAG_XOP },
+    { "FMA3",     "fma3",     AV_CPU_FLAG_FMA3 },
+    { "FMA4",     "fma4",     AV_CPU_FLAG_FMA4 },
+    { "AVX2",     "avx2",     AV_CPU_FLAG_AVX2 },
+#endif
+    { NULL }
+};
+
+typedef struct CheckasmFuncVersion {
+    struct CheckasmFuncVersion *next;
+    intptr_t (*func)();
+    int ok;
+    int cpu;
+    int iterations;
+    uint64_t cycles;
+} CheckasmFuncVersion;
+
+typedef struct CheckasmFunc {
+    struct CheckasmFunc *left;
+    struct CheckasmFunc *right;
+    CheckasmFuncVersion versions;
+    char name[1];
+} CheckasmFunc;
+
+static CheckasmFunc *funcs;
+static CheckasmFunc *current_func;
+static CheckasmFuncVersion *current_func_ver;
+static const char *bench_pattern;
+static int bench_pattern_len;
+static int num_checked;
+static int num_failed;
+static int cpu_flag;
+static int nop_time;
+static const char *cpu_flag_name;
+
+AVLFG checkasm_lfg;
+
+static const char *cpu_suffix(int cpu)
+{
+    int i = FF_ARRAY_ELEMS(cpus);
+
+    while (--i >= 0)
+        if (cpu & cpus[i].flag)
+            return cpus[i].suffix;
+
+    return "c";
+}
+
+static int cmp_nop(const void *a, const void *b)
+{
+    return *(const uint16_t*)a - *(const uint16_t*)b;
+}
+
+/* Measure the overhead of the timing code (in decicycles) */
+static int measure_nop_time(void)
+{
+    uint16_t nops[10000];
+    int i, nop_sum = 0;
+
+    for (i = 0; i < 10000; i++) {
+        uint32_t t = read_time();
+        nops[i] = read_time() - t;
+    }
+
+    qsort(nops, 10000, sizeof(uint16_t), cmp_nop);
+    for (i = 2500; i < 7500; i++)
+        nop_sum += nops[i];
+
+    return nop_sum / 500;
+}
+
+/* Print benchmark results */
+static void print_benchs(CheckasmFunc *f)
+{
+    if (f) {
+        print_benchs(f->left);
+
+        /* Only print functions with at least one assembly version */
+        if (f->versions.cpu || f->versions.next) {
+            CheckasmFuncVersion *v = &f->versions;
+            do {
+                if (v->iterations) {
+                    int decicycles = (10*v->cycles/v->iterations - nop_time) / 4;
+                    printf("%s_%s: %d.%d\n", f->name, cpu_suffix(v->cpu), decicycles/10, decicycles%10);
+                }
+            } while ((v = v->next));
+        }
+
+        print_benchs(f->right);
+    }
+}
+
+/* ASCIIbetical sort except preserving natural order for numbers */
+static int cmp_func_names(const char *a, const char *b)
+{
+    int ascii_diff, digit_diff;
+
+    for (; !(ascii_diff = *a - *b) && *a; a++, b++);
+    for (; av_isdigit(*a) && av_isdigit(*b); a++, b++);
+
+    return (digit_diff = av_isdigit(*a) - av_isdigit(*b)) ? digit_diff : ascii_diff;
+}
+
+/* Get a node with the specified name, creating it if it doesn't exist */
+static CheckasmFunc *get_func(const char *name, int length)
+{
+    CheckasmFunc *f, **f_ptr = &funcs;
+
+    /* Search the tree for a matching node */
+    while ((f = *f_ptr)) {
+        int cmp = cmp_func_names(name, f->name);
+        if (!cmp)
+            return f;
+
+        f_ptr = (cmp < 0) ? &f->left : &f->right;
+    }
+
+    /* Allocate and insert a new node into the tree */
+    f = *f_ptr = calloc(1, sizeof(CheckasmFunc) + length);
+    assert(f);
+    memcpy(f->name, name, length+1);
+
+    return f;
+}
+
+static void destroy_func_tree(CheckasmFunc *f)
+{
+    if (f) {
+        CheckasmFuncVersion *v = f->versions.next;
+        while (v) {
+            CheckasmFuncVersion *next = v->next;
+            free(v);
+            v = next;
+        }
+
+        destroy_func_tree(f->left);
+        destroy_func_tree(f->right);
+        free(f);
+    }
+}
+
+/* Print colored text to stderr if the terminal supports it */
+static void color_printf(int color, const char *fmt, ...)
+{
+    static int use_color = -1;
+    va_list arg;
+
+#if HAVE_SETCONSOLETEXTATTRIBUTE
+    static HANDLE con;
+    static WORD org_attributes;
+
+    if (use_color < 0) {
+        CONSOLE_SCREEN_BUFFER_INFO con_info;
+        con = GetStdHandle(STD_ERROR_HANDLE);
+        if (con && con != INVALID_HANDLE_VALUE && GetConsoleScreenBufferInfo(con, &con_info)) {
+            org_attributes = con_info.wAttributes;
+            use_color = 1;
+        } else
+            use_color = 0;
+    }
+    if (use_color)
+        SetConsoleTextAttribute(con, (org_attributes & 0xfff0) | (color & 0x0f));
+#else
+    if (use_color < 0) {
+        const char *term = getenv("TERM");
+        use_color = term && strcmp(term, "dumb") && isatty(2);
+    }
+    if (use_color)
+        fprintf(stderr, "\x1b[%d;3%dm", (color & 0x08) >> 3, color & 0x07);
+#endif
+
+    va_start(arg, fmt);
+    vfprintf(stderr, fmt, arg);
+    va_end(arg);
+
+    if (use_color) {
+#if HAVE_SETCONSOLETEXTATTRIBUTE
+        SetConsoleTextAttribute(con, org_attributes);
+#else
+        fputs("\x1b[0m", stderr);
+#endif
+    }
+}
+
+/* Perform tests and benchmarks for the specified cpu flag if supported by the host */
+static void check_cpu_flag(const char *name, int flag)
+{
+    int old_cpu_flag = cpu_flag;
+
+    flag |= cpu_flag;
+    av_set_cpu_flags_mask(flag);
+    cpu_flag = av_get_cpu_flags();
+
+    if (!flag || cpu_flag != old_cpu_flag) {
+        int i;
+
+        cpu_flag_name = name;
+        for (i = 0; tests[i]; i++)
+            tests[i]();
+    }
+}
+
+int main(int argc, char *argv[])
+{
+    int seed, i, ret = 0;
+
+    if (!tests[0] || !cpus[0].flag) {
+        fputs("checkasm: no tests to perform\n", stderr);
+        return 1;
+    }
+
+    if (argc > 1 && !strncmp(argv[1], "--bench", 7)) {
+#if !ARCH_X86
+        fputs("checkasm: --bench is not supported on your system\n", stderr);
+        return 1;
+#endif
+#if !HAVE_INLINE_ASM
+        fputs("checkasm: inline assembly is required to use --bench\n", stderr);
+        return 1;
+#endif
+        if (argv[1][7] == '=') {
+            bench_pattern = argv[1]+8;
+            bench_pattern_len = strlen(bench_pattern);
+        } else
+            bench_pattern = "";
+
+        argc--;
+        argv++;
+    }
+
+    seed = (argc > 1) ? atoi(argv[1]) : av_get_random_seed();
+    fprintf(stderr, "checkasm: using random seed %u\n", seed);
+    av_lfg_init(&checkasm_lfg, seed);
+
+    check_cpu_flag(NULL, 0);
+    for (i = 0; cpus[i].flag; i++)
+        check_cpu_flag(cpus[i].name, cpus[i].flag);
+
+    if (num_failed) {
+        fprintf(stderr, "checkasm: %d of %d tests have failed\n", num_failed, num_checked);
+        ret = 1;
+    } else {
+        fprintf(stderr, "checkasm: all %d tests passed\n", num_checked);
+
+        if (bench_pattern) {
+            nop_time = measure_nop_time();
+            printf("nop: %d.%d\n", nop_time/10, nop_time%10);
+            print_benchs(funcs);
+        }
+    }
+
+    destroy_func_tree(funcs);
+
+    return ret;
+}
+
+/* Decide whether or not the specified function needs to be tested and
+ * allocate/initialize data structures if needed. Returns a pointer to a
+ * reference function if the function should be tested, otherwise NULL */
+intptr_t (*checkasm_check_func(intptr_t (*func)(), const char *name, ...))()
+{
+    char name_buf[256];
+    intptr_t (*ref)() = func;
+    CheckasmFuncVersion *v;
+    int name_length;
+    va_list arg;
+
+    va_start(arg, name);
+    name_length = vsnprintf(name_buf, sizeof(name_buf), name, arg);
+    va_end(arg);
+    assert(name_length > 0 && name_length < sizeof(name_buf));
+
+    current_func = get_func(name_buf, name_length);
+    v = &current_func->versions;
+
+    if (v->func) {
+        CheckasmFuncVersion *prev;
+        do {
+            /* Only test functions that haven't already been tested */
+            if (v->func == func)
+                return NULL;
+
+            if (v->ok)
+                ref = v->func;
+
+            prev = v;
+        } while ((v = v->next));
+
+        v = prev->next = calloc(1, sizeof(CheckasmFuncVersion));
+        assert(v);
+    }
+
+    v->func = func;
+    v->ok = 1;
+    v->cpu = cpu_flag;
+    current_func_ver = v;
+
+    if (cpu_flag)
+        num_checked++;
+
+    return ref;
+}
+
+/* Decide whether or not the current function needs to be benchmarked */
+int checkasm_bench_func(void)
+{
+    return !num_failed && bench_pattern && !strncmp(current_func->name, bench_pattern, bench_pattern_len);
+}
+
+/* Indicate that the current test has failed */
+void checkasm_fail_func(const char *msg, ...)
+{
+    if (current_func_ver->cpu && current_func_ver->ok) {
+        va_list arg;
+
+        fprintf(stderr, "   %s_%s: ", current_func->name, cpu_suffix(current_func_ver->cpu));
+        va_start(arg, msg);
+        vfprintf(stderr, msg, arg);
+        va_end(arg);
+        fputc('\n', stderr);
+
+        current_func_ver->ok = 0;
+        num_failed++;
+    }
+}
+
+/* Update benchmark results of the current function */
+void checkasm_update_bench(int iterations, uint64_t cycles)
+{
+    current_func_ver->iterations += iterations;
+    current_func_ver->cycles += cycles;
+}
+
+/* Print the outcome of all tests performed since the last time this function was called */
+void checkasm_report(const char *name, ...)
+{
+    static int prev_checked, prev_failed, max_length;
+
+    if (num_checked > prev_checked) {
+        if (cpu_flag_name) {
+            /* Print the CPU flag name only if it's actually used */
+            color_printf(COLOR_YELLOW, "%s:\n", cpu_flag_name);
+            cpu_flag_name = NULL;
+        }
+
+        if (name && *name) {
+            int pad_length = max_length;
+            va_list arg;
+
+            fputs(" - ", stderr);
+            va_start(arg, name);
+            pad_length -= vfprintf(stderr, name, arg);
+            va_end(arg);
+            fprintf(stderr, "%*c", FFMAX(pad_length, 0) + 2, '[');
+        } else
+            fprintf(stderr, " - %-*s [", max_length, current_func->name);
+
+        if (num_failed == prev_failed)
+            color_printf(COLOR_GREEN, "OK");
+        else
+            color_printf(COLOR_RED, "FAILED");
+        fputs("]\n", stderr);
+
+        prev_checked = num_checked;
+        prev_failed  = num_failed;
+    } else if (!cpu_flag) {
+        int length;
+
+        /* Calculate the amount of padding required to make the output vertically aligned */
+        if (name && *name) {
+            va_list arg;
+            va_start(arg, name);
+            length = vsnprintf(NULL, 0, name, arg);
+            va_end(arg);
+        } else
+            length = strlen(current_func->name);
+
+        if (length > max_length)
+            max_length = length;
+    }
+}
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
new file mode 100644
index 0000000..9c5216a
--- /dev/null
+++ b/tests/checkasm/checkasm.h
@@ -0,0 +1,123 @@
+/*
+ * Assembly testing and benchmarking tool
+ * Copyright (c) 2014 Henrik Gramner
+ * Copyright (c) 2008 Loren Merritt
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Libav; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef CHECKASM_H
+#define CHECKASM_H
+
+#include <stdint.h>
+
+#include "config.h"
+#include "libavutil/avstring.h"
+#include "libavutil/cpu.h"
+#include "libavutil/lfg.h"
+
+void checkasm_check_h264pred(void);
+
+intptr_t (*checkasm_check_func(intptr_t (*func)(), const char *name, ...))() av_printf_format(2, 3);
+int checkasm_bench_func(void);
+void checkasm_fail_func(const char *msg, ...) av_printf_format(1, 2);
+void checkasm_update_bench(int iterations, uint64_t cycles);
+void checkasm_report(const char *name, ...) av_printf_format(1, 2);
+
+extern AVLFG checkasm_lfg;
+#define rnd() av_lfg_get(&checkasm_lfg)
+
+static av_unused intptr_t (*func_ref)();
+static av_unused intptr_t (*func_new)();
+
+#define BENCH_RUNS 1000  /* Trade-off between accuracy and speed */
+
+/* Decide whether or not the specified function needs to be tested */
+#define check_func(func, ...) ((func_new = (intptr_t (*)())func) &&\
+                              (func_ref = checkasm_check_func(func_new, __VA_ARGS__)))
+
+/* Indicate that the current test has failed */
+#define fail() checkasm_fail_func("%s:%d", av_basename(__FILE__), __LINE__)
+
+/* Print the test outcome */
+#define report(...) checkasm_report("" __VA_ARGS__)
+
+/* Call the reference function */
+#define call_ref(...) func_ref(__VA_ARGS__)
+
+#if ARCH_X86 && HAVE_YASM
+/* Verifies that clobbered callee-saved registers are properly saved and restored */
+intptr_t checkasm_checked_call(intptr_t (*func)(), ...);
+#endif
+
+/* Call the function */
+#if ARCH_X86_64 && HAVE_YASM
+/* Evil hack: detect incorrect assumptions that 32-bit ints are zero-extended to 64-bit.
+ * This is done by clobbering the stack with junk around the stack pointer and calling the
+ * assembly function through x264_checkasm_call with added dummy arguments which forces all
+ * real arguments to be passed on the stack and not in registers. For 32-bit arguments the
+ * upper half of the 64-bit register locations on the stack will now contain junk which will
+ * cause misbehaving functions to either produce incorrect output or segfault. Note that
+ * even though this works extremely well in practice, it's technically not guaranteed
+ * and false negatives is theoretically possible, but there can never be any false positives.
+ */
+int checkasm_stack_clobber(uint64_t clobber, ...);
+#define CLOB (UINT64_C(0xdeadbeefdeadbeef))
+#define call_new(...) (checkasm_stack_clobber(CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,\
+                                              CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB) ? 0 :\
+                      checkasm_checked_call(func_new, 0, 0, 0, 0, 0, __VA_ARGS__))
+#elif ARCH_X86_32 && HAVE_YASM
+#define call_new(...) checkasm_checked_call(func_new, __VA_ARGS__)
+#else
+#define call_new(...) func_new(__VA_ARGS__)
+#endif
+
+/* Benchmark the function */
+#define bench_new(...)\
+    do {\
+        if (checkasm_bench_func()) {\
+            intptr_t (*tfunc)() = func_new;\
+            uint64_t tsum = 0;\
+            int tcount = 0;\
+            int ti;\
+            for (ti = 0; ti < BENCH_RUNS; ti++) {\
+                uint32_t t = read_time();\
+                tfunc(__VA_ARGS__);\
+                tfunc(__VA_ARGS__);\
+                tfunc(__VA_ARGS__);\
+                tfunc(__VA_ARGS__);\
+                t = read_time() - t;\
+                if ((uint64_t)t*tcount <= tsum*4 && ti > 0) {\
+                    tsum += t;\
+                    tcount++;\
+                }\
+            }\
+            checkasm_update_bench(tcount, tsum);\
+        }\
+    } while (0)
+
+static av_always_inline uint32_t read_time(void)
+{
+    uint32_t a = 0;
+#if ARCH_X86 && HAVE_INLINE_ASM
+    __asm__ volatile ("lfence \n"
+                      "rdtsc  \n"
+                      : "=a"(a) :: "edx", "memory");
+#endif
+    return a;
+}
+#endif
diff --git a/tests/checkasm/h264pred.c b/tests/checkasm/h264pred.c
new file mode 100644
index 0000000..8ae5f46
--- /dev/null
+++ b/tests/checkasm/h264pred.c
@@ -0,0 +1,253 @@
+/*
+ * Copyright (c) 2014 Henrik Gramner
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Libav; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+
+#include "checkasm.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/h264pred.h"
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+
+static const int codec_ids[4] = { AV_CODEC_ID_H264, AV_CODEC_ID_VP8, AV_CODEC_ID_RV40, AV_CODEC_ID_SVQ3 };
+
+static const char * const pred4x4_modes[4][15] = {
+    { /* H264 */
+        [VERT_PRED           ] = "vertical",
+        [HOR_PRED            ] = "horizontal",
+        [DC_PRED             ] = "dc",
+        [DIAG_DOWN_LEFT_PRED ] = "down_left",
+        [DIAG_DOWN_RIGHT_PRED] = "down_right",
+        [VERT_RIGHT_PRED     ] = "vertical_right",
+        [HOR_DOWN_PRED       ] = "horizontal_right",
+        [VERT_LEFT_PRED      ] = "vertical_left",
+        [HOR_UP_PRED         ] = "horizontal_up",
+        [LEFT_DC_PRED        ] = "left_dc",
+        [TOP_DC_PRED         ] = "top_dc",
+        [DC_128_PRED         ] = "dc_128",
+    },
+    { /* VP8 */
+        [VERT_PRED     ] = "vertical_vp8",
+        [HOR_PRED      ] = "horizontal_vp8",
+        [VERT_LEFT_PRED] = "vertical_left_vp8",
+        [TM_VP8_PRED   ] = "tm_vp8",
+        [DC_127_PRED   ] = "dc_127_vp8",
+        [DC_129_PRED   ] = "dc_129_vp8",
+    },
+    { /* RV40 */
+        [DIAG_DOWN_LEFT_PRED            ] = "down_left_rv40",
+        [VERT_LEFT_PRED                 ] = "vertical_left_rv40",
+        [HOR_UP_PRED                    ] = "horizontal_up_rv40",
+        [DIAG_DOWN_LEFT_PRED_RV40_NODOWN] = "down_left_nodown_rv40",
+        [HOR_UP_PRED_RV40_NODOWN        ] = "horizontal_up_nodown_rv40",
+        [VERT_LEFT_PRED_RV40_NODOWN     ] = "vertical_left_nodown_rv40",
+    },
+    { /* SVQ3 */
+        [DIAG_DOWN_LEFT_PRED] = "down_left_svq3",
+    },
+};
+
+static const char * const pred8x8_modes[4][11] = {
+    { /* H264 */
+        [DC_PRED8x8              ] = "dc",
+        [HOR_PRED8x8             ] = "horizontal",
+        [VERT_PRED8x8            ] = "vertical",
+        [PLANE_PRED8x8           ] = "plane",
+        [LEFT_DC_PRED8x8         ] = "left_dc",
+        [TOP_DC_PRED8x8          ] = "top_dc",
+        [DC_128_PRED8x8          ] = "dc_128",
+        [ALZHEIMER_DC_L0T_PRED8x8] = "mad_cow_dc_l0t",
+        [ALZHEIMER_DC_0LT_PRED8x8] = "mad_cow_dc_0lt",
+        [ALZHEIMER_DC_L00_PRED8x8] = "mad_cow_dc_l00",
+        [ALZHEIMER_DC_0L0_PRED8x8] = "mad_cow_dc_0l0",
+    },
+    { /* VP8 */
+        [PLANE_PRED8x8 ] = "tm_vp8",
+        [DC_127_PRED8x8] = "dc_127_vp8",
+        [DC_129_PRED8x8] = "dc_129_vp8",
+    },
+    { /* RV40 */
+        [DC_PRED8x8     ] = "dc_rv40",
+        [LEFT_DC_PRED8x8] = "left_dc_rv40",
+        [TOP_DC_PRED8x8 ] = "top_dc_rv40",
+    },
+    { /* SVQ3 */
+    },
+};
+
+static const char * const pred16x16_modes[4][9] = {
+    { /* H264 */
+        [DC_PRED8x8     ] = "dc",
+        [HOR_PRED8x8    ] = "horizontal",
+        [VERT_PRED8x8   ] = "vertical",
+        [PLANE_PRED8x8  ] = "plane",
+        [LEFT_DC_PRED8x8] = "left_dc",
+        [TOP_DC_PRED8x8 ] = "top_dc",
+        [DC_128_PRED8x8 ] = "dc_128",
+    },
+    { /* VP8 */
+        [PLANE_PRED8x8 ] = "tm_vp8",
+        [DC_127_PRED8x8] = "dc_127_vp8",
+        [DC_129_PRED8x8] = "dc_129_vp8",
+    },
+    { /* RV40 */
+        [PLANE_PRED8x8] = "plane_rv40",
+    },
+    { /* SVQ3 */
+        [PLANE_PRED8x8] = "plane_svq3",
+    },
+};
+
+static const uint32_t pixel_mask[3] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff };
+
+#define SIZEOF_PIXEL ((bit_depth + 7) / 8)
+#define BUF_SIZE (3*16*17)
+
+#define check_pred_func(func, name, mode_name)\
+    (mode_name && ((codec_ids[codec] == AV_CODEC_ID_H264) ?\
+    check_func(func, "pred%s_%s_%d", name, mode_name, bit_depth) :\
+    check_func(func, "pred%s_%s", name, mode_name)))
+
+#define randomize_buffers()\
+    do {\
+        uint32_t mask = pixel_mask[bit_depth-8];\
+        int i;\
+        for (i = 0; i < BUF_SIZE; i += 4) {\
+            uint32_t r = rnd() & mask;\
+            AV_WN32A(buf0+i, r);\
+            AV_WN32A(buf1+i, r);\
+        }\
+    } while (0)
+
+#define src0 (buf0 + 4*16) /* Offset to allow room for top and left. */
+#define src1 (buf1 + 4*16)
+
+static void check_pred4x4(H264PredContext *h, uint8_t *buf0, uint8_t *buf1,
+                          int codec, int chroma_format, int bit_depth)
+{
+    if (chroma_format == 1) {
+        uint8_t *topright = buf0 + 2*16;
+        int pred_mode;
+        for (pred_mode = 0; pred_mode < 15; pred_mode++) {
+            if (check_pred_func(h->pred4x4[pred_mode], "4x4", pred4x4_modes[codec][pred_mode])) {
+                randomize_buffers();
+                call_ref(src0, topright, (ptrdiff_t)12*SIZEOF_PIXEL);
+                call_new(src1, topright, (ptrdiff_t)12*SIZEOF_PIXEL);
+                if (memcmp(buf0, buf1, BUF_SIZE))
+                    fail();
+                bench_new(src1, topright, (ptrdiff_t)12*SIZEOF_PIXEL);
+            }
+        }
+    }
+}
+
+static void check_pred8x8(H264PredContext *h, uint8_t *buf0, uint8_t *buf1,
+                          int codec, int chroma_format, int bit_depth)
+{
+    int pred_mode;
+    for (pred_mode = 0; pred_mode < 11; pred_mode++) {
+        if (check_pred_func(h->pred8x8[pred_mode], (chroma_format == 2) ? "8x16" : "8x8",
+                            pred8x8_modes[codec][pred_mode])) {
+            randomize_buffers();
+            call_ref(src0, (ptrdiff_t)24*SIZEOF_PIXEL);
+            call_new(src1, (ptrdiff_t)24*SIZEOF_PIXEL);
+            if (memcmp(buf0, buf1, BUF_SIZE))
+                fail();
+            bench_new(src1, (ptrdiff_t)24*SIZEOF_PIXEL);
+        }
+    }
+}
+
+static void check_pred16x16(H264PredContext *h, uint8_t *buf0, uint8_t *buf1,
+                            int codec, int chroma_format, int bit_depth)
+{
+    if (chroma_format == 1) {
+        int pred_mode;
+        for (pred_mode = 0; pred_mode < 9; pred_mode++) {
+            if (check_pred_func(h->pred16x16[pred_mode], "16x16", pred16x16_modes[codec][pred_mode])) {
+                randomize_buffers();
+                call_ref(src0, (ptrdiff_t)48);
+                call_new(src1, (ptrdiff_t)48);
+                if (memcmp(buf0, buf1, BUF_SIZE))
+                    fail();
+                bench_new(src1, (ptrdiff_t)48);
+            }
+        }
+    }
+}
+
+static void check_pred8x8l(H264PredContext *h, uint8_t *buf0, uint8_t *buf1,
+                           int codec, int chroma_format, int bit_depth)
+{
+    if (chroma_format == 1 && codec_ids[codec] == AV_CODEC_ID_H264) {
+        int pred_mode;
+        for (pred_mode = 0; pred_mode < 12; pred_mode++) {
+            if (check_pred_func(h->pred8x8l[pred_mode], "8x8l", pred4x4_modes[codec][pred_mode])) {
+                int neighbors;
+                for (neighbors = 0; neighbors <= 0xc000; neighbors += 0x4000) {
+                    int has_topleft  = neighbors & 0x8000;
+                    int has_topright = neighbors & 0x4000;
+
+                    if ((pred_mode == DIAG_DOWN_RIGHT_PRED || pred_mode == VERT_RIGHT_PRED) && !has_topleft)
+                        continue; /* Those aren't allowed according to the spec  */
+
+                    randomize_buffers();
+                    call_ref(src0, has_topleft, has_topright, (ptrdiff_t)24*SIZEOF_PIXEL);
+                    call_new(src1, has_topleft, has_topright, (ptrdiff_t)24*SIZEOF_PIXEL);
+                    if (memcmp(buf0, buf1, BUF_SIZE))
+                        fail();
+                    bench_new(src1, has_topleft, has_topright, (ptrdiff_t)24*SIZEOF_PIXEL);
+                }
+            }
+        }
+    }
+}
+
+/* TODO: Add tests for H.264 lossless H/V prediction. */
+
+void checkasm_check_h264pred(void)
+{
+    static const struct {
+        void (*func)(H264PredContext*, uint8_t*, uint8_t*, int, int, int);
+        const char *name;
+    } tests[] = {
+        { check_pred4x4,   "pred4x4"   },
+        { check_pred8x8,   "pred8x8"   },
+        { check_pred16x16, "pred16x16" },
+        { check_pred8x8l,  "pred8x8l"  },
+    };
+
+    DECLARE_ALIGNED(16, uint8_t, buf0)[BUF_SIZE];
+    DECLARE_ALIGNED(16, uint8_t, buf1)[BUF_SIZE];
+    H264PredContext h;
+    int test, codec, chroma_format, bit_depth;
+
+    for (test = 0; test < FF_ARRAY_ELEMS(tests); test++) {
+        for (codec = 0; codec < 4; codec++) {
+            int codec_id = codec_ids[codec];
+            for (bit_depth = 8; bit_depth <= (codec_id == AV_CODEC_ID_H264 ? 10 : 8); bit_depth++)
+                for (chroma_format = 1; chroma_format <= (codec_id == AV_CODEC_ID_H264 ? 2 : 1); chroma_format++) {
+                    ff_h264_pred_init(&h, codec_id, bit_depth, chroma_format);
+                    tests[test].func(&h, buf0, buf1, codec, chroma_format, bit_depth);
+                }
+        }
+        report("%s", tests[test].name);
+    }
+}
diff --git a/tests/checkasm/x86/Makefile b/tests/checkasm/x86/Makefile
new file mode 100644
index 0000000..0254c61
--- /dev/null
+++ b/tests/checkasm/x86/Makefile
@@ -0,0 +1,6 @@
+CHECKASMOBJS-$(HAVE_YASM) += x86/checkasm.o
+
+tests/checkasm/x86/%.o: tests/checkasm/x86/%.asm
+	$(DEPYASM) $(YASMFLAGS) -I $(<D)/ -M -o $@ $< > $(@:.o=.d)
+	$(YASM) $(YASMFLAGS) -I $(<D)/ -o $@ $<
+	-$(STRIP) $(STRIPFLAGS) $@
diff --git a/tests/checkasm/x86/checkasm.asm b/tests/checkasm/x86/checkasm.asm
new file mode 100644
index 0000000..b55ca69
--- /dev/null
+++ b/tests/checkasm/x86/checkasm.asm
@@ -0,0 +1,194 @@
+;*****************************************************************************
+;* Assembly testing and benchmarking tool
+;* Copyright (c) 2008 Loren Merritt
+;* Copyright (c) 2012 Henrik Gramner
+;*
+;* This file is part of Libav.
+;*
+;* Libav is free software; you can redistribute it and/or modify
+;* it under the terms of the GNU General Public License as published by
+;* the Free Software Foundation; either version 2 of the License, or
+;* (at your option) any later version.
+;*
+;* Libav is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;* GNU General Public License for more details.
+;*
+;* You should have received a copy of the GNU General Public License
+;* along with this program; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+;*****************************************************************************
+
+%define private_prefix checkasm
+%include "libavutil/x86/x86inc.asm"
+
+SECTION_RODATA
+
+error_message: db "failed to preserve register", 0
+
+%if ARCH_X86_64
+; just random numbers to reduce the chance of incidental match
+ALIGN 16
+x6:  ddq 0x79445c159ce790641a1b2550a612b48c
+x7:  ddq 0x86b2536fcd8cf6362eed899d5a28ddcd
+x8:  ddq 0x3f2bf84fc0fcca4eb0856806085e7943
+x9:  ddq 0xd229e1f5b281303facbd382dcf5b8de2
+x10: ddq 0xab63e2e11fa38ed971aeaff20b095fd9
+x11: ddq 0x77d410d5c42c882d89b0c0765892729a
+x12: ddq 0x24b3c1d2a024048bc45ea11a955d8dd5
+x13: ddq 0xdd7b8919edd427862e8ec680de14b47c
+x14: ddq 0x11e53e2b2ac655ef135ce6888fa02cbf
+x15: ddq 0x6de8f4c914c334d5011ff554472a7a10
+n7:   dq 0x21f86d66c8ca00ce
+n8:   dq 0x75b6ba21077c48ad
+n9:   dq 0xed56bb2dcb3c7736
+n10:  dq 0x8bda43d3fd1a7e06
+n11:  dq 0xb64a9c9e5d318408
+n12:  dq 0xdf9a54b303f1d3a3
+n13:  dq 0x4a75479abd64e097
+n14:  dq 0x249214109d5d1c88
+%endif
+
+SECTION .text
+
+cextern fail_func
+
+; max number of args used by any asm function.
+; (max_args % 4) must equal 3 for stack alignment
+%define max_args 15
+
+%if ARCH_X86_64
+
+;-----------------------------------------------------------------------------
+; int checkasm_stack_clobber(uint64_t clobber, ...)
+;-----------------------------------------------------------------------------
+cglobal stack_clobber, 1,2
+    ; Clobber the stack with junk below the stack pointer
+    %define size (max_args+6)*8
+    SUB  rsp, size
+    mov   r1, size-8
+.loop:
+    mov [rsp+r1], r0
+    sub   r1, 8
+    jge .loop
+    ADD  rsp, size
+    xor  eax, eax
+    RET
+
+%if WIN64
+    %assign free_regs 7
+%else
+    %assign free_regs 9
+%endif
+
+;-----------------------------------------------------------------------------
+; intptr_t checkasm_checked_call(intptr_t (*func)(), ...)
+;-----------------------------------------------------------------------------
+INIT_XMM
+cglobal checked_call, 2,15,16,max_args*8+8
+    mov  r6, r0
+
+    ; All arguments have been pushed on the stack instead of registers in order to
+    ; test for incorrect assumptions that 32-bit ints are zero-extended to 64-bit.
+    mov  r0, r6mp
+    mov  r1, r7mp
+    mov  r2, r8mp
+    mov  r3, r9mp
+%if UNIX64
+    mov  r4, r10mp
+    mov  r5, r11mp
+    %assign i 6
+    %rep max_args-6
+        mov  r9, [rsp+stack_offset+(i+1)*8]
+        mov  [rsp+(i-6)*8], r9
+        %assign i i+1
+    %endrep
+%else
+    %assign i 4
+    %rep max_args-4
+        mov  r9, [rsp+stack_offset+(i+7)*8]
+        mov  [rsp+i*8], r9
+        %assign i i+1
+    %endrep
+%endif
+
+%if WIN64
+    %assign i 6
+    %rep 16-6
+        mova m %+ i, [x %+ i]
+        %assign i i+1
+    %endrep
+%endif
+
+%assign i 14
+%rep 15-free_regs
+    mov r %+ i, [n %+ i]
+    %assign i i-1
+%endrep
+    call r6
+%assign i 14
+%rep 15-free_regs
+    xor r %+ i, [n %+ i]
+    or  r14, r %+ i
+    %assign i i-1
+%endrep
+
+%if WIN64
+    %assign i 6
+    %rep 16-6
+        pxor m %+ i, [x %+ i]
+        por  m6, m %+ i
+        %assign i i+1
+    %endrep
+    packsswb m6, m6
+    movq r5, m6
+    or  r14, r5
+%endif
+
+    jz .ok
+    mov  r9, rax
+    lea  r0, [error_message]
+    call fail_func
+    mov rax, r9
+.ok:
+    RET
+
+%else
+
+; just random numbers to reduce the chance of incidental match
+%define n3 dword 0x6549315c
+%define n4 dword 0xe02f3e23
+%define n5 dword 0xb78d0d1d
+%define n6 dword 0x33627ba7
+
+;-----------------------------------------------------------------------------
+; intptr_t checkasm_checked_call(intptr_t (*func)(), ...)
+;-----------------------------------------------------------------------------
+cglobal checked_call, 1,7
+    mov  r3, n3
+    mov  r4, n4
+    mov  r5, n5
+    mov  r6, n6
+%rep max_args
+    PUSH dword [esp+20+max_args*4]
+%endrep
+    call r0
+    xor  r3, n3
+    xor  r4, n4
+    xor  r5, n5
+    xor  r6, n6
+    or   r3, r4
+    or   r5, r6
+    or   r3, r5
+    jz .ok
+    mov  r3, eax
+    lea  r0, [error_message]
+    mov [esp], r0
+    call fail_func
+    mov  eax, r3
+.ok:
+    add  esp, max_args*4
+    REP_RET
+
+%endif ; ARCH_X86_64
-- 
1.8.3.2

_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to