vlc | branch: master | Rémi Denis-Courmont <[email protected]> | Tue Dec 11 
21:16:49 2018 +0200| [3985c874e0b71f73911aef084391a3a838eca877] | committer: 
Rémi Denis-Courmont

deinterlace: initial ARM SVE merge function

> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=3985c874e0b71f73911aef084391a3a838eca877
---

 modules/video_filter/Makefile.am               |  4 ++
 modules/video_filter/deinterlace/deinterlace.c |  5 +++
 modules/video_filter/deinterlace/merge.h       |  3 ++
 modules/video_filter/deinterlace/merge_sve.S   | 54 ++++++++++++++++++++++++++
 4 files changed, 66 insertions(+)

diff --git a/modules/video_filter/Makefile.am b/modules/video_filter/Makefile.am
index af190d16e3..d3db0c0d0a 100644
--- a/modules/video_filter/Makefile.am
+++ b/modules/video_filter/Makefile.am
@@ -144,6 +144,10 @@ if HAVE_ARM64
 libdeinterlace_plugin_la_SOURCES += video_filter/deinterlace/merge_arm64.S
 libdeinterlace_plugin_la_CFLAGS += -DCAN_COMPILE_ARM64
 endif
+if HAVE_SVE
+libdeinterlace_plugin_la_SOURCES += video_filter/deinterlace/merge_sve.S
+libdeinterlace_plugin_la_CFLAGS += -DCAN_COMPILE_SVE
+endif
 libdeinterlace_plugin_la_LIBADD = libdeinterlace_common.la
 video_filter_LTLIBRARIES += libdeinterlace_plugin.la
 
diff --git a/modules/video_filter/deinterlace/deinterlace.c 
b/modules/video_filter/deinterlace/deinterlace.c
index 6bcba0f3bb..b8d0586123 100644
--- a/modules/video_filter/deinterlace/deinterlace.c
+++ b/modules/video_filter/deinterlace/deinterlace.c
@@ -575,6 +575,11 @@ notsupp:
         p_sys->pf_merge = pixel_size == 1 ? merge8_armv6 : merge16_armv6;
     else
 #endif
+#if defined(CAN_COMPILE_SVE)
+    if( vlc_CPU_ARM_SVE() )
+        p_sys->pf_merge = pixel_size == 1 ? merge8_arm_sve : merge16_arm_sve;
+    else
+#endif
 #if defined(CAN_COMPILE_ARM64)
     if( vlc_CPU_ARM_NEON() )
         p_sys->pf_merge = pixel_size == 1 ? merge8_arm64_neon : 
merge16_arm64_neon;
diff --git a/modules/video_filter/deinterlace/merge.h 
b/modules/video_filter/deinterlace/merge.h
index 70dcbef096..1a54b32db9 100644
--- a/modules/video_filter/deinterlace/merge.h
+++ b/modules/video_filter/deinterlace/merge.h
@@ -181,6 +181,9 @@ void merge16_arm64_neon (void *, const void *, const void 
*, size_t);
 
 #endif
 
+void merge8_arm_sve(void *, const void *, const void *, size_t);
+void merge16_arm_sve(void *, const void *, const void *, size_t);
+
 /*****************************************************************************
  * EndMerge routines
  *****************************************************************************/
diff --git a/modules/video_filter/deinterlace/merge_sve.S 
b/modules/video_filter/deinterlace/merge_sve.S
new file mode 100644
index 0000000000..9213d8b50f
--- /dev/null
+++ b/modules/video_filter/deinterlace/merge_sve.S
@@ -0,0 +1,54 @@
+/******************************************************************************
+ * merge_sve.S : ARM SVE mean
+ ******************************************************************************
+ * Copyright (C) 2018 Rémi Denis-Courmont
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+       .arch_extension sve
+
+       /* TODO: prefetch, unroll */
+
+       .text
+       .globl  merge8_arm_sve
+       .type   merge8_arm_sve, %function
+merge8_arm_sve:
+       mov     x4, #0
+       b       2f
+1:     ld1b    {z0.h}, p0/z, [x1, x4]
+       ld1b    {z1.h}, p0/z, [x2, x4]
+       add     z0.h, z0.h, z1.h
+       lsr     z0.h, z0.h, #1
+       st1b    {z0.h}, p0, [x0, x4]
+       inch    x4
+2:     whilelt p0.h, x4, x3
+       b.first 1b
+       ret
+
+       .globl  merge16_arm_sve
+       .type   merge16_arm_sve, %function
+merge16_arm_sve:
+       mov     x4, #0
+       b       2f
+1:     ld1h    {z0.s}, p0/z, [x1, x4, lsl #1]
+       ld1h    {z1.s}, p0/z, [x2, x4, lsl #1]
+       add     z0.s, z0.s, z1.s
+       lsr     z0.s, z0.s, #1
+       st1h    {z0.s}, p0, [x0, x4, lsl #1]
+       incw    x4
+2:     whilelt p0.s, x4, x3
+       b.first 1b
+       ret

_______________________________________________
vlc-commits mailing list
[email protected]
https://mailman.videolan.org/listinfo/vlc-commits

Reply via email to