On Tue, 22 Apr 2014, Janne Grunau wrote:
Optimized for the default filter length 16.30% faster opus silk decoding. --- The assembler source is due to extensive macro use uglier than I would like it to be but it's 4 functions in one. It looked nicer before I added double support. The need for 4 neon registers for 8 doubles is the main problem. libavresample/aarch64/Makefile | 6 +- libavresample/aarch64/resample_init.c | 64 ++++++++++ libavresample/aarch64/resample_neon.S | 219 ++++++++++++++++++++++++++++++++++ libavresample/internal.h | 3 + libavresample/resample.c | 3 + 5 files changed, 293 insertions(+), 2 deletions(-) create mode 100644 libavresample/aarch64/resample_init.c create mode 100644 libavresample/aarch64/resample_neon.S diff --git a/libavresample/aarch64/Makefile b/libavresample/aarch64/Makefile index 320ed67..9d3856e 100644 --- a/libavresample/aarch64/Makefile +++ b/libavresample/aarch64/Makefile @@ -1,5 +1,7 @@ -OBJS += aarch64/audio_convert_init.o +OBJS += aarch64/audio_convert_init.o \ + aarch64/resample_init.o OBJS-$(CONFIG_NEON_CLOBBER_TEST) += aarch64/neontest.o -NEON-OBJS += aarch64/audio_convert_neon.o +NEON-OBJS += aarch64/audio_convert_neon.o \ + aarch64/resample_neon.o diff --git a/libavresample/aarch64/resample_init.c b/libavresample/aarch64/resample_init.c new file mode 100644 index 0000000..3aff280 --- /dev/null +++ b/libavresample/aarch64/resample_init.c @@ -0,0 +1,64 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <stdint.h> + +#include "config.h" +#include "libavutil/cpu.h" +#include "libavutil/aarch64/cpu.h" +#include "libavutil/samplefmt.h" +#include "libavresample/internal.h" + + +void ff_resample_one_dbl_neon(struct ResampleContext *c, void *dst0, + int dst_index, const void *src0, + unsigned int index, int frac); +void ff_resample_one_flt_neon(struct ResampleContext *c, void *dst0, + int dst_index, const void *src0, + unsigned int index, int frac); +void ff_resample_one_s16_neon(struct ResampleContext *c, void *dst0, + int dst_index, const void *src0, + unsigned int index, int frac); +void ff_resample_one_s32_neon(struct ResampleContext *c, void *dst0, + int dst_index, const void *src0, + unsigned int index, int frac); + +void ff_audio_resample_init_aarch64(ResampleContext *c, + enum AVSampleFormat sample_fmt) +{ + int cpu_flags = av_get_cpu_flags(); + + if (have_neon(cpu_flags)) { + if (!c->linear) { + switch (sample_fmt) { + case AV_SAMPLE_FMT_DBLP: + c->resample_one = ff_resample_one_dbl_neon; + break; + case AV_SAMPLE_FMT_FLTP: + c->resample_one = ff_resample_one_flt_neon; + break; + case AV_SAMPLE_FMT_S16P: + c->resample_one = ff_resample_one_s16_neon; + break; + case AV_SAMPLE_FMT_S32P: + c->resample_one = ff_resample_one_s32_neon; + break; + } + } + } +} diff --git a/libavresample/aarch64/resample_neon.S b/libavresample/aarch64/resample_neon.S new file mode 100644 index 0000000..548c820 --- /dev/null +++ b/libavresample/aarch64/resample_neon.S @@ -0,0 +1,219 @@ +/* + * Copyright (c) 2014 Janne Grunau <[email protected]> + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" +#include "libavutil/aarch64/asm.S" + +.macro resample_one fmt, es=2 +function ff_resample_one_\fmt\()_neon, export=1 + sxtw x2, w2 + ldr x9, [x0, #16] // filter_bank + ldr w6, [x0, #24] // filter_length + ldr w7, [x0, #52] // phase_shift + ldr w8, [x0, #56] // phase_mask
I missed this earlier; this probably requires something like libavcodec/arm/asm-offsets.h to make sure it stays in sync with the struct.
// Martin _______________________________________________ libav-devel mailing list [email protected] https://lists.libav.org/mailman/listinfo/libav-devel
