From: Shaun Loo <shaunlo...@gmail.com> This is a part of Google Summer of Code 2023
Co-authored-by: Nuo Mi <nuomi2...@gmail.com> --- libavcodec/x86/h26x/h2656_sao.asm | 8 +-- libavcodec/x86/vvc/Makefile | 2 + libavcodec/x86/vvc/dsp_init.c | 41 +++++++++++ libavcodec/x86/vvc/sao.asm | 73 +++++++++++++++++++ libavcodec/x86/vvc/sao_10bit.asm | 113 ++++++++++++++++++++++++++++++ 5 files changed, 233 insertions(+), 4 deletions(-) create mode 100644 libavcodec/x86/vvc/sao.asm create mode 100644 libavcodec/x86/vvc/sao_10bit.asm diff --git a/libavcodec/x86/h26x/h2656_sao.asm b/libavcodec/x86/h26x/h2656_sao.asm index 504fcb388b..a80ee26178 100644 --- a/libavcodec/x86/h26x/h2656_sao.asm +++ b/libavcodec/x86/h26x/h2656_sao.asm @@ -147,7 +147,7 @@ align 16 %assign i i+mmsize %endrep -%if %2 == 48 +%if %2 == 48 || %2 == 80 || %2 == 112 INIT_XMM cpuname mova m13, [srcq + i] @@ -160,7 +160,7 @@ INIT_XMM cpuname %if cpuflag(avx2) INIT_YMM cpuname %endif -%endif ; %2 == 48 +%endif ; %2 == 48 || %2 == 80 || %2 == 112 add dstq, dststrideq ; dst += dststride add srcq, srcstrideq ; src += srcstride @@ -280,7 +280,7 @@ align 16 %assign i i+mmsize %endrep -%if %2 == 48 +%if %2 == 48 || %2 == 80 || %2 == 112 INIT_XMM cpuname mova m1, [srcq + i] @@ -291,7 +291,7 @@ INIT_XMM cpuname %if cpuflag(avx2) INIT_YMM cpuname %endif -%endif +%endif ; %2 == 48 || %2 == 80 || %2 == 112 add dstq, dststrideq add srcq, EDGE_SRCSTRIDE diff --git a/libavcodec/x86/vvc/Makefile b/libavcodec/x86/vvc/Makefile index 86a6c8ba7c..c426b156c1 100644 --- a/libavcodec/x86/vvc/Makefile +++ b/libavcodec/x86/vvc/Makefile @@ -8,4 +8,6 @@ X86ASM-OBJS-$(CONFIG_VVC_DECODER) += x86/vvc/alf.o \ x86/vvc/mc.o \ x86/vvc/of.o \ x86/vvc/sad.o \ + x86/vvc/sao.o \ + x86/vvc/sao_10bit.o \ x86/h26x/h2656_inter.o diff --git a/libavcodec/x86/vvc/dsp_init.c b/libavcodec/x86/vvc/dsp_init.c index bb68ba0b1e..cbcfa40a66 100644 --- a/libavcodec/x86/vvc/dsp_init.c +++ b/libavcodec/x86/vvc/dsp_init.c @@ -215,6 +215,44 @@ ALF_FUNCS(16, 12, avx2) #endif +#define SAO_FILTER_FUNC(wd, bitd, opt) \ +void ff_vvc_sao_band_filter_##wd##_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \ + const int16_t *sao_offset_val, int sao_left_class, int width, int height); \ +void ff_vvc_sao_edge_filter_##wd##_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \ + const int16_t *sao_offset_val, int eo, int width, int height); \ + +#define SAO_FILTER_FUNCS(bitd, opt) \ + SAO_FILTER_FUNC(8, bitd, opt) \ + SAO_FILTER_FUNC(16, bitd, opt) \ + SAO_FILTER_FUNC(32, bitd, opt) \ + SAO_FILTER_FUNC(48, bitd, opt) \ + SAO_FILTER_FUNC(64, bitd, opt) \ + SAO_FILTER_FUNC(80, bitd, opt) \ + SAO_FILTER_FUNC(96, bitd, opt) \ + SAO_FILTER_FUNC(112, bitd, opt) \ + SAO_FILTER_FUNC(128, bitd, opt) \ + +SAO_FILTER_FUNCS(8, avx2) +SAO_FILTER_FUNCS(10, avx2) +SAO_FILTER_FUNCS(12, avx2) + +#define SAO_FILTER_INIT(type, bitd, opt) do { \ + c->sao.type##_filter[0] = ff_vvc_sao_##type##_filter_8_##bitd##_##opt; \ + c->sao.type##_filter[1] = ff_vvc_sao_##type##_filter_16_##bitd##_##opt; \ + c->sao.type##_filter[2] = ff_vvc_sao_##type##_filter_32_##bitd##_##opt; \ + c->sao.type##_filter[3] = ff_vvc_sao_##type##_filter_48_##bitd##_##opt; \ + c->sao.type##_filter[4] = ff_vvc_sao_##type##_filter_64_##bitd##_##opt; \ + c->sao.type##_filter[5] = ff_vvc_sao_##type##_filter_80_##bitd##_##opt; \ + c->sao.type##_filter[6] = ff_vvc_sao_##type##_filter_96_##bitd##_##opt; \ + c->sao.type##_filter[7] = ff_vvc_sao_##type##_filter_112_##bitd##_##opt; \ + c->sao.type##_filter[8] = ff_vvc_sao_##type##_filter_128_##bitd##_##opt; \ +} while (0) + +#define SAO_INIT(bitd, opt) do { \ + SAO_FILTER_INIT(band, bitd, opt); \ + SAO_FILTER_INIT(edge, bitd, opt); \ +} while (0) + #define AVG_INIT(bd, opt) do { \ c->inter.avg = bf(vvc_avg, bd, opt); \ c->inter.w_avg = bf(vvc_w_avg, bd, opt); \ @@ -329,6 +367,7 @@ void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd) // filter ALF_INIT(8); + SAO_INIT(8, avx2); } #endif break; @@ -350,6 +389,7 @@ void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd) // filter ALF_INIT(10); + SAO_INIT(10, avx2); } #endif break; @@ -371,6 +411,7 @@ void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd) // filter ALF_INIT(12); + SAO_INIT(12, avx2); } #endif break; diff --git a/libavcodec/x86/vvc/sao.asm b/libavcodec/x86/vvc/sao.asm new file mode 100644 index 0000000000..5f7d7e5358 --- /dev/null +++ b/libavcodec/x86/vvc/sao.asm @@ -0,0 +1,73 @@ +;****************************************************************************** +;* SIMD optimized SAO functions for VVC 8bit decoding +;* +;* Copyright (c) 2024 Shaun Loo +;* Copyright (c) 2024 Nuo Mi +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%define MAX_PB_SIZE 128 +%include "libavcodec/x86/h26x/h2656_sao.asm" + +%macro VVC_SAO_BAND_FILTER 2 + H2656_SAO_BAND_FILTER vvc, %1, %2 +%endmacro + +%macro VVC_SAO_BAND_FILTER_FUNCS 0 +VVC_SAO_BAND_FILTER 8, 0 +VVC_SAO_BAND_FILTER 16, 1 +VVC_SAO_BAND_FILTER 32, 2 +VVC_SAO_BAND_FILTER 48, 2 +VVC_SAO_BAND_FILTER 64, 4 +VVC_SAO_BAND_FILTER 80, 4 +VVC_SAO_BAND_FILTER 96, 6 +VVC_SAO_BAND_FILTER 112, 6 +VVC_SAO_BAND_FILTER 128, 8 +%endmacro + +%if HAVE_AVX2_EXTERNAL +INIT_XMM avx2 +VVC_SAO_BAND_FILTER 8, 0 +VVC_SAO_BAND_FILTER 16, 1 +INIT_YMM avx2 +VVC_SAO_BAND_FILTER 32, 1 +VVC_SAO_BAND_FILTER 48, 1 +VVC_SAO_BAND_FILTER 64, 2 +VVC_SAO_BAND_FILTER 80, 2 +VVC_SAO_BAND_FILTER 96, 3 +VVC_SAO_BAND_FILTER 112, 3 +VVC_SAO_BAND_FILTER 128, 4 +%endif + +%macro VVC_SAO_EDGE_FILTER 2-3 + H2656_SAO_EDGE_FILTER vvc, %{1:-1} +%endmacro + +%if HAVE_AVX2_EXTERNAL +INIT_XMM avx2 +VVC_SAO_EDGE_FILTER 8, 0 +VVC_SAO_EDGE_FILTER 16, 1, a +INIT_YMM avx2 +VVC_SAO_EDGE_FILTER 32, 1, a +VVC_SAO_EDGE_FILTER 48, 1, u +VVC_SAO_EDGE_FILTER 64, 2, a +VVC_SAO_EDGE_FILTER 80, 2, u +VVC_SAO_EDGE_FILTER 96, 3, a +VVC_SAO_EDGE_FILTER 112, 3, u +VVC_SAO_EDGE_FILTER 128, 4, a +%endif diff --git a/libavcodec/x86/vvc/sao_10bit.asm b/libavcodec/x86/vvc/sao_10bit.asm new file mode 100644 index 0000000000..b7d3d08008 --- /dev/null +++ b/libavcodec/x86/vvc/sao_10bit.asm @@ -0,0 +1,113 @@ +;****************************************************************************** +;* SIMD optimized SAO functions for VVC 10/12bit decoding +;* +;* Copyright (c) 2024 Shaun Loo +;* Copyright (c) 2024 Nuo Mi +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%define MAX_PB_SIZE 128 +%include "libavcodec/x86/h26x/h2656_sao_10bit.asm" + +%macro VVC_SAO_BAND_FILTER 3 + H2656_SAO_BAND_FILTER vvc, %1, %2, %3 +%endmacro + +%macro VVC_SAO_BAND_FILTER_FUNCS 1 + VVC_SAO_BAND_FILTER %1, 8, 1 + VVC_SAO_BAND_FILTER %1, 16, 2 + VVC_SAO_BAND_FILTER %1, 32, 4 + VVC_SAO_BAND_FILTER %1, 48, 6 + VVC_SAO_BAND_FILTER %1, 64, 8 + VVC_SAO_BAND_FILTER %1, 80, 10 + VVC_SAO_BAND_FILTER %1, 96, 12 + VVC_SAO_BAND_FILTER %1, 112, 14 + VVC_SAO_BAND_FILTER %1, 128, 16 +%endmacro + +%macro VVC_SAO_BAND_FILTER_FUNCS 0 + VVC_SAO_BAND_FILTER_FUNCS 10 + VVC_SAO_BAND_FILTER_FUNCS 12 +%endmacro + +INIT_XMM sse2 +VVC_SAO_BAND_FILTER_FUNCS +INIT_XMM avx +VVC_SAO_BAND_FILTER_FUNCS + +%if HAVE_AVX2_EXTERNAL + +%macro VVC_SAO_BAND_FILTER_FUNCS_AVX2 1 + INIT_XMM avx2 + VVC_SAO_BAND_FILTER %1, 8, 1 + INIT_YMM avx2 + VVC_SAO_BAND_FILTER %1, 16, 1 + VVC_SAO_BAND_FILTER %1, 32, 2 + VVC_SAO_BAND_FILTER %1, 48, 3 + VVC_SAO_BAND_FILTER %1, 64, 4 + VVC_SAO_BAND_FILTER %1, 80, 5 + VVC_SAO_BAND_FILTER %1, 96, 6 + VVC_SAO_BAND_FILTER %1, 112, 7 + VVC_SAO_BAND_FILTER %1, 128, 8 +%endmacro + +VVC_SAO_BAND_FILTER_FUNCS_AVX2 10 +VVC_SAO_BAND_FILTER_FUNCS_AVX2 12 + +%endif ; HAVE_AVX2_EXTERNAL + +%macro VVC_SAO_EDGE_FILTER 3 + H2656_SAO_EDGE_FILTER vvc, %1, %2, %3 +%endmacro + +%macro VVC_SAO_EDGE_FILTER_FUNCS 1 + VVC_SAO_EDGE_FILTER %1, 8, 1 + VVC_SAO_EDGE_FILTER %1, 16, 2 + VVC_SAO_EDGE_FILTER %1, 32, 4 + VVC_SAO_EDGE_FILTER %1, 48, 6 + VVC_SAO_EDGE_FILTER %1, 64, 8 + VVC_SAO_EDGE_FILTER %1, 80, 10 + VVC_SAO_EDGE_FILTER %1, 96, 12 + VVC_SAO_EDGE_FILTER %1, 112, 14 + VVC_SAO_EDGE_FILTER %1, 128, 16 +%endmacro + +INIT_XMM sse2 +VVC_SAO_EDGE_FILTER_FUNCS 10 +VVC_SAO_EDGE_FILTER_FUNCS 12 + +%if HAVE_AVX2_EXTERNAL + +%macro VVC_SAO_EDGE_FILTER_FUNCS_AVX2 1 + INIT_XMM avx2 + VVC_SAO_EDGE_FILTER %1, 8, 1 + INIT_YMM avx2 + VVC_SAO_EDGE_FILTER %1, 16, 1 + VVC_SAO_EDGE_FILTER %1, 32, 2 + VVC_SAO_EDGE_FILTER %1, 48, 3 + VVC_SAO_EDGE_FILTER %1, 64, 4 + VVC_SAO_EDGE_FILTER %1, 80, 5 + VVC_SAO_EDGE_FILTER %1, 96, 6 + VVC_SAO_EDGE_FILTER %1, 112, 7 + VVC_SAO_EDGE_FILTER %1, 128, 8 +%endmacro + +VVC_SAO_EDGE_FILTER_FUNCS_AVX2 10 +VVC_SAO_EDGE_FILTER_FUNCS_AVX2 12 + +%endif ; HAVE_AVX2_EXTERNAL -- 2.34.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".