vlc | branch: master | Rémi Denis-Courmont <[email protected]> | Tue Dec 11 21:16:49 2018 +0200| [3985c874e0b71f73911aef084391a3a838eca877] | committer: Rémi Denis-Courmont
deinterlace: initial ARM SVE merge function > http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=3985c874e0b71f73911aef084391a3a838eca877 --- modules/video_filter/Makefile.am | 4 ++ modules/video_filter/deinterlace/deinterlace.c | 5 +++ modules/video_filter/deinterlace/merge.h | 3 ++ modules/video_filter/deinterlace/merge_sve.S | 54 ++++++++++++++++++++++++++ 4 files changed, 66 insertions(+) diff --git a/modules/video_filter/Makefile.am b/modules/video_filter/Makefile.am index af190d16e3..d3db0c0d0a 100644 --- a/modules/video_filter/Makefile.am +++ b/modules/video_filter/Makefile.am @@ -144,6 +144,10 @@ if HAVE_ARM64 libdeinterlace_plugin_la_SOURCES += video_filter/deinterlace/merge_arm64.S libdeinterlace_plugin_la_CFLAGS += -DCAN_COMPILE_ARM64 endif +if HAVE_SVE +libdeinterlace_plugin_la_SOURCES += video_filter/deinterlace/merge_sve.S +libdeinterlace_plugin_la_CFLAGS += -DCAN_COMPILE_SVE +endif libdeinterlace_plugin_la_LIBADD = libdeinterlace_common.la video_filter_LTLIBRARIES += libdeinterlace_plugin.la diff --git a/modules/video_filter/deinterlace/deinterlace.c b/modules/video_filter/deinterlace/deinterlace.c index 6bcba0f3bb..b8d0586123 100644 --- a/modules/video_filter/deinterlace/deinterlace.c +++ b/modules/video_filter/deinterlace/deinterlace.c @@ -575,6 +575,11 @@ notsupp: p_sys->pf_merge = pixel_size == 1 ? merge8_armv6 : merge16_armv6; else #endif +#if defined(CAN_COMPILE_SVE) + if( vlc_CPU_ARM_SVE() ) + p_sys->pf_merge = pixel_size == 1 ? merge8_arm_sve : merge16_arm_sve; + else +#endif #if defined(CAN_COMPILE_ARM64) if( vlc_CPU_ARM_NEON() ) p_sys->pf_merge = pixel_size == 1 ? merge8_arm64_neon : merge16_arm64_neon; diff --git a/modules/video_filter/deinterlace/merge.h b/modules/video_filter/deinterlace/merge.h index 70dcbef096..1a54b32db9 100644 --- a/modules/video_filter/deinterlace/merge.h +++ b/modules/video_filter/deinterlace/merge.h @@ -181,6 +181,9 @@ void merge16_arm64_neon (void *, const void *, const void *, size_t); #endif +void merge8_arm_sve(void *, const void *, const void *, size_t); +void merge16_arm_sve(void *, const void *, const void *, size_t); + /***************************************************************************** * EndMerge routines *****************************************************************************/ diff --git a/modules/video_filter/deinterlace/merge_sve.S b/modules/video_filter/deinterlace/merge_sve.S new file mode 100644 index 0000000000..9213d8b50f --- /dev/null +++ b/modules/video_filter/deinterlace/merge_sve.S @@ -0,0 +1,54 @@ +/****************************************************************************** + * merge_sve.S : ARM SVE mean + ****************************************************************************** + * Copyright (C) 2018 Rémi Denis-Courmont + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA. + *****************************************************************************/ + + .arch_extension sve + + /* TODO: prefetch, unroll */ + + .text + .globl merge8_arm_sve + .type merge8_arm_sve, %function +merge8_arm_sve: + mov x4, #0 + b 2f +1: ld1b {z0.h}, p0/z, [x1, x4] + ld1b {z1.h}, p0/z, [x2, x4] + add z0.h, z0.h, z1.h + lsr z0.h, z0.h, #1 + st1b {z0.h}, p0, [x0, x4] + inch x4 +2: whilelt p0.h, x4, x3 + b.first 1b + ret + + .globl merge16_arm_sve + .type merge16_arm_sve, %function +merge16_arm_sve: + mov x4, #0 + b 2f +1: ld1h {z0.s}, p0/z, [x1, x4, lsl #1] + ld1h {z1.s}, p0/z, [x2, x4, lsl #1] + add z0.s, z0.s, z1.s + lsr z0.s, z0.s, #1 + st1h {z0.s}, p0, [x0, x4, lsl #1] + incw x4 +2: whilelt p0.s, x4, x3 + b.first 1b + ret _______________________________________________ vlc-commits mailing list [email protected] https://mailman.videolan.org/listinfo/vlc-commits
