./ffmpeg -threads 1  -f lavfi -t 60 -i anoisesrc -af 'anlmdn' -f null 
-benchmark -

Test results on Snapdragon 845:
    Before:
        size=N/A time=00:01:00.00 bitrate=N/A speed=11.2x
        video:0kB audio:5625kB subtitle:0kB other streams:0kB global 
headers:0kB muxing overhead: unknown
        bench: utime=5.320s stime=0.010s rtime=5.358s
        bench: maxrss=14172kB

    After:
        size=N/A time=00:01:00.00 bitrate=N/A speed=15.4x
        video:0kB audio:5625kB subtitle:0kB other streams:0kB global 
headers:0kB muxing overhead: unknown
        bench: utime=3.870s stime=0.000s rtime=3.902s
        bench: maxrss=14036kB
---
 libavfilter/aarch64/Makefile         |   2 +
 libavfilter/aarch64/af_anlmdn_init.c |  31 ++++++++
 libavfilter/aarch64/af_anlmdn_neon.S | 112 +++++++++++++++++++++++++++
 libavfilter/af_anlmdn.c              |   3 +
 libavfilter/af_anlmdndsp.h           |   1 +
 5 files changed, 149 insertions(+)
 create mode 100644 libavfilter/aarch64/af_anlmdn_init.c
 create mode 100644 libavfilter/aarch64/af_anlmdn_neon.S

diff --git a/libavfilter/aarch64/Makefile b/libavfilter/aarch64/Makefile
index f52d7a4842..6c727f9859 100644
--- a/libavfilter/aarch64/Makefile
+++ b/libavfilter/aarch64/Makefile
@@ -1,5 +1,7 @@
 OBJS-$(CONFIG_NLMEANS_FILTER)                += aarch64/af_afir_init.o
+OBJS-$(CONFIG_NLMEANS_FILTER)                += aarch64/af_anlmdn_init.o
 OBJS-$(CONFIG_NLMEANS_FILTER)                += aarch64/vf_nlmeans_init.o
 
 NEON-OBJS-$(CONFIG_NLMEANS_FILTER)           += aarch64/af_afir_neon.o
+NEON-OBJS-$(CONFIG_NLMEANS_FILTER)           += aarch64/af_anlmdn_neon.o
 NEON-OBJS-$(CONFIG_NLMEANS_FILTER)           += aarch64/vf_nlmeans_neon.o
diff --git a/libavfilter/aarch64/af_anlmdn_init.c 
b/libavfilter/aarch64/af_anlmdn_init.c
new file mode 100644
index 0000000000..e28a152e04
--- /dev/null
+++ b/libavfilter/aarch64/af_anlmdn_init.c
@@ -0,0 +1,31 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/aarch64/cpu.h"
+#include "libavfilter/af_anlmdndsp.h"
+
+float ff_compute_distance_ssd_neon(const float *f1, const float *f2,
+                                   ptrdiff_t len);
+
+av_cold void ff_anlmdn_init_aarch64(AudioNLMDNDSPContext *s)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags))
+        s->compute_distance_ssd = ff_compute_distance_ssd_neon;
+}
diff --git a/libavfilter/aarch64/af_anlmdn_neon.S 
b/libavfilter/aarch64/af_anlmdn_neon.S
new file mode 100644
index 0000000000..3ad985b476
--- /dev/null
+++ b/libavfilter/aarch64/af_anlmdn_neon.S
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2020 Zhao Zhili
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/aarch64/asm.S"
+
+// float ff_compute_distance_ssd_neon(const float *f1, const float *f2, 
ptrdiff_t len);
+function ff_compute_distance_ssd_neon, export=1
+       fmov    s0, wzr
+       add     x3, x0, x2, lsl #2              // end of f1
+       sub     x0, x0, x2, lsl #2              // begin of f1
+       sub     x1, x1, x2, lsl #2              // begin of f2
+       add     x3, x3, #4                      // end + 1 of f1
+
+       // process 32 pairs of data per loop
+       add     x4, x0, #128
+       cmp     x4, x3
+       b.gt    2f
+1:     ld1     {v16.4S, v17.4S, v18.4S, v19.4S}, [x0], #64
+       ld1     {v20.4S, v21.4S, v22.4S, v23.4S}, [x1], #64
+       ld1     {v24.4S, v25.4S, v26.4S, v27.4S}, [x0], #64
+       ld1     {v28.4S, v29.4S, v30.4S, v31.4S}, [x1], #64
+
+       fsub    v16.4S, v16.4S, v20.4S
+
+       fsub    v17.4S, v17.4S, v21.4S
+       fmul    v16.4S, v16.4S, v16.4S
+
+       fsub    v18.4S, v18.4S, v22.4S
+       fmul    v17.4S, v17.4S, v17.4S
+
+       fsub    v19.4S, v19.4S, v23.4S
+       fmla    v16.4S, v18.4S, v18.4S
+
+       fsub    v24.4S, v24.4S, v28.4S
+       fmla    v17.4S, v19.4S, v19.4S
+
+       fsub    v25.4S, v25.4S, v29.4S
+       fmla    v16.4S, v24.4S, v24.4S
+
+       fsub    v26.4S, v26.4S, v30.4S
+       fmla    v17.4S, v25.4S, v25.4S
+
+       fsub    v27.4S, v27.4S, v31.4S
+       fmla    v16.4S, v26.4S, v26.4S
+
+       fmla    v17.4S, v27.4S, v27.4S
+
+       fadd    v1.4S, v16.4S, v17.4S
+       faddp   v1.4S, v1.4S, v1.4S
+       faddp   s1, v1.2S
+       fadd    s0, s0, s1
+       add     x4, x0, #128
+       cmp     x4, x3
+       b.le    1b
+
+       // process 16 pairs of data per loop
+2:     add     x4, x0, #64
+       cmp     x4, x3
+       b.gt    4f
+3:     ld1     {v16.4S, v17.4S, v18.4S, v19.4S}, [x0], #64
+       ld1     {v20.4S, v21.4S, v22.4S, v23.4S}, [x1], #64
+
+       fsub    v16.4S, v16.4S, v20.4S
+
+       fsub    v17.4S, v17.4S, v21.4S
+       fmul    v16.4S, v16.4S, v16.4S
+
+       fsub    v18.4S, v18.4S, v22.4S
+       fmul    v17.4S, v17.4S, v17.4S
+
+       fsub    v19.4S, v19.4S, v23.4S
+       fmla    v16.4S, v18.4S, v18.4S
+
+       fmla    v17.4S, v19.4S, v19.4S
+
+       fadd    v1.4S, v16.4S, v17.4S
+       faddp   v1.4S, v1.4S, v1.4S
+       faddp   s1, v1.2S
+       fadd    s0, s0, s1
+       add     x4, x0, #64
+       cmp     x4, x3
+       b.le    3b
+
+       // process 1 pair of data per loop
+4:     cmp     x0, x3
+       b.eq    6f
+5:     ldr     s1, [x0], #4
+       ldr     s2, [x1], #4
+       fsub    s1, s1, s2
+       cmp     x0, x3
+       fmadd   s0, s1, s1, s0
+       b.ne    5b
+6:     ret
+
+endfunc
diff --git a/libavfilter/af_anlmdn.c b/libavfilter/af_anlmdn.c
index b8aef31c35..63bc1a1f2c 100644
--- a/libavfilter/af_anlmdn.c
+++ b/libavfilter/af_anlmdn.c
@@ -145,6 +145,9 @@ void ff_anlmdn_init(AudioNLMDNDSPContext *dsp)
 
     if (ARCH_X86)
         ff_anlmdn_init_x86(dsp);
+    if (ARCH_AARCH64) {
+        ff_anlmdn_init_aarch64(dsp);
+    }
 }
 
 static int config_output(AVFilterLink *outlink)
diff --git a/libavfilter/af_anlmdndsp.h b/libavfilter/af_anlmdndsp.h
index d8f5136cd8..f9d8a80c83 100644
--- a/libavfilter/af_anlmdndsp.h
+++ b/libavfilter/af_anlmdndsp.h
@@ -35,6 +35,7 @@ typedef struct AudioNLMDNDSPContext {
 } AudioNLMDNDSPContext;
 
 void ff_anlmdn_init(AudioNLMDNDSPContext *s);
+void ff_anlmdn_init_aarch64(AudioNLMDNDSPContext *s);
 void ff_anlmdn_init_x86(AudioNLMDNDSPContext *s);
 
 #endif /* AVFILTER_ANLMDNDSP_H */
-- 
2.24.0

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Reply via email to