PR #20951 opened by HecaiYuan URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20951 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20951.patch
This wrapper performs runtime dispatch to SIMD-optimized functions and falls back to a C implementation for unsupported block heights. >From 3d5177ecfb1fe88ecfcb0dc66db68c1dd8d2977c Mon Sep 17 00:00:00 2001 From: yuanhecai <[email protected]> Date: Mon, 17 Nov 2025 17:32:53 +0800 Subject: [PATCH] avcodec: fix checkasm-hpeldsp failed on LA This wrapper performs runtime dispatch to SIMD-optimized functions and falls back to a C implementation for unsupported block heights. --- libavcodec/loongarch/hpeldsp_init_loongarch.c | 108 ++++++++++++++++-- 1 file changed, 101 insertions(+), 7 deletions(-) diff --git a/libavcodec/loongarch/hpeldsp_init_loongarch.c b/libavcodec/loongarch/hpeldsp_init_loongarch.c index 1690be5438..681e11a70a 100644 --- a/libavcodec/loongarch/hpeldsp_init_loongarch.c +++ b/libavcodec/loongarch/hpeldsp_init_loongarch.c @@ -23,28 +23,122 @@ #include "libavcodec/hpeldsp.h" #include "libavcodec/loongarch/hpeldsp_lasx.h" +static op_pixels_func put_pixels16_xy2_8_c_fallback = NULL; +static op_pixels_func put_no_rnd_pixels16_y2_8_c_fallback = NULL; +static op_pixels_func put_no_rnd_pixels16_xy2_8_c_fallback = NULL; +static op_pixels_func put_no_rnd_pixels8_y2_8_c_fallback = NULL; +static op_pixels_func put_no_rnd_pixels8_xy2_8_c_fallback = NULL; +static op_pixels_func put_no_rnd_pixels16_x2_8_c_fallback = NULL; +static op_pixels_func put_no_rnd_pixels8_x2_8_c_fallback = NULL; + +static inline void put_no_rnd_pix16_y2_8_lasx_wrap(uint8_t *block, + const uint8_t *pixels, + ptrdiff_t line_size, int h) +{ + if (h == 16 || h == 8) { + ff_put_no_rnd_pixels16_y2_8_lasx(block, pixels, line_size, h); + } else { + put_no_rnd_pixels16_y2_8_c_fallback(block, pixels, line_size, h); + } +} + +static inline void put_no_rnd_pix16_xy2_8_lasx_wrap(uint8_t *block, + const uint8_t *pixels, + ptrdiff_t line_size, int h) +{ + if (h == 16 || h == 8) { + ff_put_no_rnd_pixels16_xy2_8_lasx(block, pixels, line_size, h); + } else { + put_no_rnd_pixels16_xy2_8_c_fallback(block, pixels, line_size, h); + } +} + +static inline void put_no_rnd_pix8_y2_8_lasx_wrap(uint8_t *block, + const uint8_t *pixels, + ptrdiff_t line_size, int h) +{ + if (h == 8 || h == 4) { + ff_put_no_rnd_pixels8_y2_8_lasx(block, pixels, line_size, h); + } else { + put_no_rnd_pixels8_y2_8_c_fallback(block, pixels, line_size, h); + } +} + +static inline void put_no_rnd_pix8_xy2_8_lasx_wrap(uint8_t *block, + const uint8_t *pixels, + ptrdiff_t line_size, int h) +{ + if (h == 8 || h == 4) { + ff_put_no_rnd_pixels8_xy2_8_lasx(block, pixels, line_size, h); + } else { + put_no_rnd_pixels8_xy2_8_c_fallback(block, pixels, line_size, h); + } +} + +static inline void put_pix16_xy2_8_lasx_wrap(uint8_t *block, + const uint8_t *pixels, + ptrdiff_t line_size, int h) +{ + if (h == 16) { + ff_put_pixels16_xy2_8_lasx(block, pixels, line_size, h); + } else { + put_pixels16_xy2_8_c_fallback(block, pixels, line_size, h); + } +} + +static inline void put_no_rnd_pix16_x2_8_lasx_wrap(uint8_t *block, + const uint8_t *pixels, + ptrdiff_t line_size, int h) +{ + if (h == 16 || h == 8) { + ff_put_no_rnd_pixels16_x2_8_lasx(block, pixels, line_size, h); + } else { + put_no_rnd_pixels16_x2_8_c_fallback(block, pixels, line_size, h); + } +} + +static inline void put_no_rnd_pix8_x2_8_lasx_wrap(uint8_t *block, + const uint8_t *pixels, + ptrdiff_t line_size, int h) +{ + if (h == 8 || h == 4) { + ff_put_no_rnd_pixels8_x2_8_lasx(block, pixels, line_size, h); + } else { + put_no_rnd_pixels8_x2_8_c_fallback(block, pixels, line_size, h); + } +} + void ff_hpeldsp_init_loongarch(HpelDSPContext *c, int flags) { int cpu_flags = av_get_cpu_flags(); if (have_lasx(cpu_flags)) { + + put_pixels16_xy2_8_c_fallback = c->put_pixels_tab[0][3]; + put_no_rnd_pixels16_y2_8_c_fallback = c->put_no_rnd_pixels_tab[0][2]; + put_no_rnd_pixels16_xy2_8_c_fallback = c->put_no_rnd_pixels_tab[0][3]; + put_no_rnd_pixels8_y2_8_c_fallback = c->put_no_rnd_pixels_tab[1][2]; + put_no_rnd_pixels8_xy2_8_c_fallback = c->put_no_rnd_pixels_tab[1][3]; + put_no_rnd_pixels16_x2_8_c_fallback = c->put_no_rnd_pixels_tab[0][1]; + put_no_rnd_pixels8_x2_8_c_fallback = c->put_no_rnd_pixels_tab[1][1]; + c->put_pixels_tab[0][0] = ff_put_pixels16_8_lsx; c->put_pixels_tab[0][1] = ff_put_pixels16_x2_8_lasx; c->put_pixels_tab[0][2] = ff_put_pixels16_y2_8_lasx; - c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_8_lasx; + c->put_pixels_tab[0][3] = put_pix16_xy2_8_lasx_wrap; c->put_pixels_tab[1][0] = ff_put_pixels8_8_lasx; c->put_pixels_tab[1][1] = ff_put_pixels8_x2_8_lasx; c->put_pixels_tab[1][2] = ff_put_pixels8_y2_8_lasx; c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_8_lasx; c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_8_lsx; - c->put_no_rnd_pixels_tab[0][1] = ff_put_no_rnd_pixels16_x2_8_lasx; - c->put_no_rnd_pixels_tab[0][2] = ff_put_no_rnd_pixels16_y2_8_lasx; - c->put_no_rnd_pixels_tab[0][3] = ff_put_no_rnd_pixels16_xy2_8_lasx; + c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pix16_x2_8_lasx_wrap; + c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pix16_y2_8_lasx_wrap; + c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pix16_xy2_8_lasx_wrap; c->put_no_rnd_pixels_tab[1][0] = ff_put_pixels8_8_lasx; - c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_8_lasx; - c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_8_lasx; - c->put_no_rnd_pixels_tab[1][3] = ff_put_no_rnd_pixels8_xy2_8_lasx; + c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pix8_x2_8_lasx_wrap; + c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pix8_y2_8_lasx_wrap; + c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pix8_xy2_8_lasx_wrap; } } -- 2.49.1 _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
