SBD 4-tap and 8-tap Neon implementations of interp_vert_ss_neon are used for HBD as well, extend these functions to support all CHROMA and LUMA block sizes. --- source/common/aarch64/filter-prim.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+)
diff --git a/source/common/aarch64/filter-prim.cpp b/source/common/aarch64/filter-prim.cpp index 568e6f40f..5032567f3 100644 --- a/source/common/aarch64/filter-prim.cpp +++ b/source/common/aarch64/filter-prim.cpp @@ -5097,6 +5097,24 @@ void setupFilterPrimitives_neon(EncoderPrimitives &p) p.pu[LUMA_4x16].luma_hvpp = interp_hv_pp_neon<8, 4, 16>; p.pu[LUMA_12x16].luma_hvpp = interp_hv_pp_neon<8, 12, 16>; + p.chroma[X265_CSP_I422].pu[CHROMA_422_2x8].filter_vss = interp_vert_ss_neon<4, 2, 8>; + p.chroma[X265_CSP_I422].pu[CHROMA_422_2x16].filter_vss = interp_vert_ss_neon<4, 2, 16>; + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x4].filter_vss = interp_vert_ss_neon<4, 4, 4>; + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].filter_vss = interp_vert_ss_neon<4, 4, 8>; + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].filter_vss = interp_vert_ss_neon<4, 4, 16>; + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].filter_vss = interp_vert_ss_neon<4, 4, 32>; + p.chroma[X265_CSP_I422].pu[CHROMA_422_6x16].filter_vss = interp_vert_ss_neon<4, 6, 16>; + p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].filter_vss = interp_vert_ss_neon<4, 12, 32>; + + p.chroma[X265_CSP_I444].pu[LUMA_4x4].filter_vss = interp_vert_ss_neon<4, 4, 4>; + p.chroma[X265_CSP_I444].pu[LUMA_4x8].filter_vss = interp_vert_ss_neon<4, 4, 8>; + p.chroma[X265_CSP_I444].pu[LUMA_4x16].filter_vss = interp_vert_ss_neon<4, 4, 16>; + p.chroma[X265_CSP_I444].pu[LUMA_12x16].filter_vss = interp_vert_ss_neon<4, 12, 16>; + + p.pu[LUMA_4x4].luma_vss = interp_vert_ss_neon<8, 4, 4>; + p.pu[LUMA_4x8].luma_vss = interp_vert_ss_neon<8, 4, 8>; + p.pu[LUMA_4x16].luma_vss = interp_vert_ss_neon<8, 4, 16>; + p.pu[LUMA_12x16].luma_vss = interp_vert_ss_neon<8, 12, 16>; #endif // HIGH_BIT_DEPTH } -- 2.39.5 (Apple Git-154)
>From d5a5d2e4ec03bff5a120481eba489fc078fe8bb4 Mon Sep 17 00:00:00 2001 Message-Id: <d5a5d2e4ec03bff5a120481eba489fc078fe8bb4.1741721714.git.gerdazsejke.m...@arm.com> In-Reply-To: <cover.1741721714.git.gerdazsejke.m...@arm.com> References: <cover.1741721714.git.gerdazsejke.m...@arm.com> From: Gerda Zsejke More <gerdazsejke.m...@arm.com> Date: Thu, 20 Feb 2025 18:50:54 +0100 Subject: [PATCH v2 09/10] AArch64: Enable existing interp_vert_ss impl for HBD SBD 4-tap and 8-tap Neon implementations of interp_vert_ss_neon are used for HBD as well, extend these functions to support all CHROMA and LUMA block sizes. --- source/common/aarch64/filter-prim.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/source/common/aarch64/filter-prim.cpp b/source/common/aarch64/filter-prim.cpp index 568e6f40f..5032567f3 100644 --- a/source/common/aarch64/filter-prim.cpp +++ b/source/common/aarch64/filter-prim.cpp @@ -5097,6 +5097,24 @@ void setupFilterPrimitives_neon(EncoderPrimitives &p) p.pu[LUMA_4x16].luma_hvpp = interp_hv_pp_neon<8, 4, 16>; p.pu[LUMA_12x16].luma_hvpp = interp_hv_pp_neon<8, 12, 16>; + p.chroma[X265_CSP_I422].pu[CHROMA_422_2x8].filter_vss = interp_vert_ss_neon<4, 2, 8>; + p.chroma[X265_CSP_I422].pu[CHROMA_422_2x16].filter_vss = interp_vert_ss_neon<4, 2, 16>; + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x4].filter_vss = interp_vert_ss_neon<4, 4, 4>; + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].filter_vss = interp_vert_ss_neon<4, 4, 8>; + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].filter_vss = interp_vert_ss_neon<4, 4, 16>; + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].filter_vss = interp_vert_ss_neon<4, 4, 32>; + p.chroma[X265_CSP_I422].pu[CHROMA_422_6x16].filter_vss = interp_vert_ss_neon<4, 6, 16>; + p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].filter_vss = interp_vert_ss_neon<4, 12, 32>; + + p.chroma[X265_CSP_I444].pu[LUMA_4x4].filter_vss = interp_vert_ss_neon<4, 4, 4>; + p.chroma[X265_CSP_I444].pu[LUMA_4x8].filter_vss = interp_vert_ss_neon<4, 4, 8>; + p.chroma[X265_CSP_I444].pu[LUMA_4x16].filter_vss = interp_vert_ss_neon<4, 4, 16>; + p.chroma[X265_CSP_I444].pu[LUMA_12x16].filter_vss = interp_vert_ss_neon<4, 12, 16>; + + p.pu[LUMA_4x4].luma_vss = interp_vert_ss_neon<8, 4, 4>; + p.pu[LUMA_4x8].luma_vss = interp_vert_ss_neon<8, 4, 8>; + p.pu[LUMA_4x16].luma_vss = interp_vert_ss_neon<8, 4, 16>; + p.pu[LUMA_12x16].luma_vss = interp_vert_ss_neon<8, 12, 16>; #endif // HIGH_BIT_DEPTH } -- 2.39.5 (Apple Git-154)
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel