On Tue, 22 Apr 2014, Janne Grunau wrote:

Optimized for the default filter length 16.

30% faster opus silk decoding.
---
The assembler source is due to extensive macro use uglier than I would
like it to be but it's 4 functions in one. It looked nicer before I added
double support. The need for 4 neon registers for 8 doubles is the main
problem.

libavresample/aarch64/Makefile        |   6 +-
libavresample/aarch64/resample_init.c |  64 ++++++++++
libavresample/aarch64/resample_neon.S | 219 ++++++++++++++++++++++++++++++++++
libavresample/internal.h              |   3 +
libavresample/resample.c              |   3 +
5 files changed, 293 insertions(+), 2 deletions(-)
create mode 100644 libavresample/aarch64/resample_init.c
create mode 100644 libavresample/aarch64/resample_neon.S

diff --git a/libavresample/aarch64/Makefile b/libavresample/aarch64/Makefile
index 320ed67..9d3856e 100644
--- a/libavresample/aarch64/Makefile
+++ b/libavresample/aarch64/Makefile
@@ -1,5 +1,7 @@
-OBJS                             += aarch64/audio_convert_init.o
+OBJS                             += aarch64/audio_convert_init.o    \
+                                    aarch64/resample_init.o

OBJS-$(CONFIG_NEON_CLOBBER_TEST) += aarch64/neontest.o

-NEON-OBJS                        += aarch64/audio_convert_neon.o
+NEON-OBJS                        += aarch64/audio_convert_neon.o    \
+                                    aarch64/resample_neon.o
diff --git a/libavresample/aarch64/resample_init.c 
b/libavresample/aarch64/resample_init.c
new file mode 100644
index 0000000..3aff280
--- /dev/null
+++ b/libavresample/aarch64/resample_init.c
@@ -0,0 +1,64 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "config.h"
+#include "libavutil/cpu.h"
+#include "libavutil/aarch64/cpu.h"
+#include "libavutil/samplefmt.h"
+#include "libavresample/internal.h"
+
+
+void ff_resample_one_dbl_neon(struct ResampleContext *c, void *dst0,
+                              int dst_index, const void *src0,
+                              unsigned int index, int frac);
+void ff_resample_one_flt_neon(struct ResampleContext *c, void *dst0,
+                              int dst_index, const void *src0,
+                              unsigned int index, int frac);
+void ff_resample_one_s16_neon(struct ResampleContext *c, void *dst0,
+                              int dst_index, const void *src0,
+                              unsigned int index, int frac);
+void ff_resample_one_s32_neon(struct ResampleContext *c, void *dst0,
+                              int dst_index, const void *src0,
+                              unsigned int index, int frac);
+
+void ff_audio_resample_init_aarch64(ResampleContext *c,
+                                    enum AVSampleFormat sample_fmt)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags)) {
+        if (!c->linear) {
+            switch (sample_fmt) {
+            case AV_SAMPLE_FMT_DBLP:
+                c->resample_one  = ff_resample_one_dbl_neon;
+                break;
+            case AV_SAMPLE_FMT_FLTP:
+                c->resample_one  = ff_resample_one_flt_neon;
+                break;
+            case AV_SAMPLE_FMT_S16P:
+                c->resample_one  = ff_resample_one_s16_neon;
+                break;
+            case AV_SAMPLE_FMT_S32P:
+                c->resample_one  = ff_resample_one_s32_neon;
+                break;
+            }
+        }
+    }
+}
diff --git a/libavresample/aarch64/resample_neon.S 
b/libavresample/aarch64/resample_neon.S
new file mode 100644
index 0000000..548c820
--- /dev/null
+++ b/libavresample/aarch64/resample_neon.S
@@ -0,0 +1,219 @@
+/*
+ * Copyright (c) 2014 Janne Grunau <[email protected]>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/aarch64/asm.S"
+
+.macro resample_one     fmt, es=2
+function ff_resample_one_\fmt\()_neon, export=1
+        sxtw            x2,  w2
+        ldr             x9,  [x0, #16]          // filter_bank
+        ldr             w6,  [x0, #24]          // filter_length
+        ldr             w7,  [x0, #52]          // phase_shift
+        ldr             w8,  [x0, #56]          // phase_mask

I missed this earlier; this probably requires something like libavcodec/arm/asm-offsets.h to make sure it stays in sync with the struct.

// Martin
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to