Add an implementation of luma_hvpp, using Neon I8MM implementation for the horizontal part, and Armv8.0 Neon implementation for the vertical part. --- source/common/aarch64/filter-neon-i8mm.cpp | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-)
diff --git a/source/common/aarch64/filter-neon-i8mm.cpp b/source/common/aarch64/filter-neon-i8mm.cpp index f8334016d..fb42d6672 100644 --- a/source/common/aarch64/filter-neon-i8mm.cpp +++ b/source/common/aarch64/filter-neon-i8mm.cpp @@ -755,9 +755,29 @@ void interp4_horiz_pp_i8mm(const uint8_t *src, intptr_t srcStride, uint8_t *dst, } } +// Declaration for use in interp_hv_pp_i8mm(). +template<int N, int width, int height> +void interp_vert_sp_neon(const int16_t *src, intptr_t srcStride, uint8_t *dst, + intptr_t dstStride, int coeffIdx); + +// Implementation of luma_hvpp, using Neon i8mm implementation for the +// horizontal part, and Armv8.0 Neon implementation for the vertical part. +template<int width, int height> +void interp_hv_pp_i8mm(const pixel *src, intptr_t srcStride, pixel *dst, + intptr_t dstStride, int idxX, int idxY) +{ + const int N_TAPS = 8; + ALIGN_VAR_32(int16_t, immed[width * (height + N_TAPS - 1)]); + + interp8_horiz_ps_i8mm<width, height>(src, srcStride, immed, width, idxX, 1); + interp_vert_sp_neon<N_TAPS, width, height>(immed + (N_TAPS / 2 - 1) * width, + width, dst, dstStride, idxY); +} + #define LUMA_I8MM(W, H) \ p.pu[LUMA_ ## W ## x ## H].luma_hpp = interp8_horiz_pp_i8mm<W, H>; \ - p.pu[LUMA_ ## W ## x ## H].luma_hps = interp8_horiz_ps_i8mm<W, H>; + p.pu[LUMA_ ## W ## x ## H].luma_hps = interp8_horiz_ps_i8mm<W, H>; \ + p.pu[LUMA_ ## W ## x ## H].luma_hvpp = interp_hv_pp_i8mm<W, H>; #define CHROMA_420_I8MM(W, H) \ p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_hpp = \ -- 2.42.1
>From 988b25411508117185a4d88bb519838d2c40fd0b Mon Sep 17 00:00:00 2001 Message-ID: <988b25411508117185a4d88bb519838d2c40fd0b.1725629250.git.hari.lim...@arm.com> In-Reply-To: <cover.1725629250.git.hari.lim...@arm.com> References: <cover.1725629250.git.hari.lim...@arm.com> From: Hari Limaye <hari.lim...@arm.com> Date: Thu, 16 May 2024 00:07:55 +0100 Subject: [PATCH 11/14] AArch64: Add Armv8.6 Neon I8MM implementation of interp_hv_pp Add an implementation of luma_hvpp, using Neon I8MM implementation for the horizontal part, and Armv8.0 Neon implementation for the vertical part. --- source/common/aarch64/filter-neon-i8mm.cpp | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/source/common/aarch64/filter-neon-i8mm.cpp b/source/common/aarch64/filter-neon-i8mm.cpp index f8334016d..fb42d6672 100644 --- a/source/common/aarch64/filter-neon-i8mm.cpp +++ b/source/common/aarch64/filter-neon-i8mm.cpp @@ -755,9 +755,29 @@ void interp4_horiz_pp_i8mm(const uint8_t *src, intptr_t srcStride, uint8_t *dst, } } +// Declaration for use in interp_hv_pp_i8mm(). +template<int N, int width, int height> +void interp_vert_sp_neon(const int16_t *src, intptr_t srcStride, uint8_t *dst, + intptr_t dstStride, int coeffIdx); + +// Implementation of luma_hvpp, using Neon i8mm implementation for the +// horizontal part, and Armv8.0 Neon implementation for the vertical part. +template<int width, int height> +void interp_hv_pp_i8mm(const pixel *src, intptr_t srcStride, pixel *dst, + intptr_t dstStride, int idxX, int idxY) +{ + const int N_TAPS = 8; + ALIGN_VAR_32(int16_t, immed[width * (height + N_TAPS - 1)]); + + interp8_horiz_ps_i8mm<width, height>(src, srcStride, immed, width, idxX, 1); + interp_vert_sp_neon<N_TAPS, width, height>(immed + (N_TAPS / 2 - 1) * width, + width, dst, dstStride, idxY); +} + #define LUMA_I8MM(W, H) \ p.pu[LUMA_ ## W ## x ## H].luma_hpp = interp8_horiz_pp_i8mm<W, H>; \ - p.pu[LUMA_ ## W ## x ## H].luma_hps = interp8_horiz_ps_i8mm<W, H>; + p.pu[LUMA_ ## W ## x ## H].luma_hps = interp8_horiz_ps_i8mm<W, H>; \ + p.pu[LUMA_ ## W ## x ## H].luma_hvpp = interp_hv_pp_i8mm<W, H>; #define CHROMA_420_I8MM(W, H) \ p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_hpp = \ -- 2.42.1
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel