PR #20951 opened by HecaiYuan
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20951
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20951.patch

This wrapper performs runtime dispatch to SIMD-optimized functions and
falls back to a C implementation for unsupported block heights.


>From 3d5177ecfb1fe88ecfcb0dc66db68c1dd8d2977c Mon Sep 17 00:00:00 2001
From: yuanhecai <[email protected]>
Date: Mon, 17 Nov 2025 17:32:53 +0800
Subject: [PATCH] avcodec: fix checkasm-hpeldsp failed on LA

This wrapper performs runtime dispatch to SIMD-optimized functions and
falls back to a C implementation for unsupported block heights.
---
 libavcodec/loongarch/hpeldsp_init_loongarch.c | 108 ++++++++++++++++--
 1 file changed, 101 insertions(+), 7 deletions(-)

diff --git a/libavcodec/loongarch/hpeldsp_init_loongarch.c 
b/libavcodec/loongarch/hpeldsp_init_loongarch.c
index 1690be5438..681e11a70a 100644
--- a/libavcodec/loongarch/hpeldsp_init_loongarch.c
+++ b/libavcodec/loongarch/hpeldsp_init_loongarch.c
@@ -23,28 +23,122 @@
 #include "libavcodec/hpeldsp.h"
 #include "libavcodec/loongarch/hpeldsp_lasx.h"
 
+static op_pixels_func put_pixels16_xy2_8_c_fallback = NULL;
+static op_pixels_func put_no_rnd_pixels16_y2_8_c_fallback = NULL;
+static op_pixels_func put_no_rnd_pixels16_xy2_8_c_fallback = NULL;
+static op_pixels_func put_no_rnd_pixels8_y2_8_c_fallback = NULL;
+static op_pixels_func put_no_rnd_pixels8_xy2_8_c_fallback = NULL;
+static op_pixels_func put_no_rnd_pixels16_x2_8_c_fallback = NULL;
+static op_pixels_func put_no_rnd_pixels8_x2_8_c_fallback = NULL;
+
+static inline void put_no_rnd_pix16_y2_8_lasx_wrap(uint8_t *block,
+                                                   const uint8_t *pixels,
+                                                   ptrdiff_t line_size, int h)
+{
+    if (h == 16 || h == 8) {
+        ff_put_no_rnd_pixels16_y2_8_lasx(block, pixels, line_size, h);
+    } else {
+        put_no_rnd_pixels16_y2_8_c_fallback(block, pixels, line_size, h);
+    }
+}
+
+static inline void put_no_rnd_pix16_xy2_8_lasx_wrap(uint8_t *block,
+                                                    const uint8_t *pixels,
+                                                    ptrdiff_t line_size, int h)
+{
+    if (h == 16 || h == 8) {
+        ff_put_no_rnd_pixels16_xy2_8_lasx(block, pixels, line_size, h);
+    } else {
+        put_no_rnd_pixels16_xy2_8_c_fallback(block, pixels, line_size, h);
+    }
+}
+
+static inline void put_no_rnd_pix8_y2_8_lasx_wrap(uint8_t *block,
+                                                  const uint8_t *pixels,
+                                                  ptrdiff_t line_size, int h)
+{
+    if (h == 8 || h == 4) {
+        ff_put_no_rnd_pixels8_y2_8_lasx(block, pixels, line_size, h);
+    } else {
+        put_no_rnd_pixels8_y2_8_c_fallback(block, pixels, line_size, h);
+    }
+}
+
+static inline void put_no_rnd_pix8_xy2_8_lasx_wrap(uint8_t *block,
+                                                   const uint8_t *pixels,
+                                                   ptrdiff_t line_size, int h)
+{
+    if (h == 8 || h == 4) {
+        ff_put_no_rnd_pixels8_xy2_8_lasx(block, pixels, line_size, h);
+    } else {
+        put_no_rnd_pixels8_xy2_8_c_fallback(block, pixels, line_size, h);
+    }
+}
+
+static inline void put_pix16_xy2_8_lasx_wrap(uint8_t *block,
+                                             const uint8_t *pixels,
+                                             ptrdiff_t line_size, int h)
+{
+   if (h == 16) {
+      ff_put_pixels16_xy2_8_lasx(block, pixels, line_size, h);
+   } else {
+      put_pixels16_xy2_8_c_fallback(block, pixels, line_size, h);
+   }
+}
+
+static inline void put_no_rnd_pix16_x2_8_lasx_wrap(uint8_t *block,
+                                                   const uint8_t *pixels,
+                                                   ptrdiff_t line_size, int h)
+{
+    if (h == 16 || h == 8) {
+        ff_put_no_rnd_pixels16_x2_8_lasx(block, pixels, line_size, h);
+    } else {
+        put_no_rnd_pixels16_x2_8_c_fallback(block, pixels, line_size, h);
+   }
+}
+
+static inline void put_no_rnd_pix8_x2_8_lasx_wrap(uint8_t *block,
+                                                 const uint8_t *pixels,
+                                                 ptrdiff_t line_size, int h)
+{
+    if (h == 8 || h == 4) {
+       ff_put_no_rnd_pixels8_x2_8_lasx(block, pixels, line_size, h);
+    } else {
+       put_no_rnd_pixels8_x2_8_c_fallback(block, pixels, line_size, h);
+    }
+}
+
 void ff_hpeldsp_init_loongarch(HpelDSPContext *c, int flags)
 {
     int cpu_flags = av_get_cpu_flags();
 
     if (have_lasx(cpu_flags)) {
+
+        put_pixels16_xy2_8_c_fallback        = c->put_pixels_tab[0][3];
+        put_no_rnd_pixels16_y2_8_c_fallback  = c->put_no_rnd_pixels_tab[0][2];
+        put_no_rnd_pixels16_xy2_8_c_fallback = c->put_no_rnd_pixels_tab[0][3];
+        put_no_rnd_pixels8_y2_8_c_fallback   = c->put_no_rnd_pixels_tab[1][2];
+        put_no_rnd_pixels8_xy2_8_c_fallback  = c->put_no_rnd_pixels_tab[1][3];
+        put_no_rnd_pixels16_x2_8_c_fallback  = c->put_no_rnd_pixels_tab[0][1];
+        put_no_rnd_pixels8_x2_8_c_fallback   = c->put_no_rnd_pixels_tab[1][1];
+
         c->put_pixels_tab[0][0] = ff_put_pixels16_8_lsx;
         c->put_pixels_tab[0][1] = ff_put_pixels16_x2_8_lasx;
         c->put_pixels_tab[0][2] = ff_put_pixels16_y2_8_lasx;
-        c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_8_lasx;
+        c->put_pixels_tab[0][3] = put_pix16_xy2_8_lasx_wrap;
 
         c->put_pixels_tab[1][0] = ff_put_pixels8_8_lasx;
         c->put_pixels_tab[1][1] = ff_put_pixels8_x2_8_lasx;
         c->put_pixels_tab[1][2] = ff_put_pixels8_y2_8_lasx;
         c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_8_lasx;
         c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_8_lsx;
-        c->put_no_rnd_pixels_tab[0][1] = ff_put_no_rnd_pixels16_x2_8_lasx;
-        c->put_no_rnd_pixels_tab[0][2] = ff_put_no_rnd_pixels16_y2_8_lasx;
-        c->put_no_rnd_pixels_tab[0][3] = ff_put_no_rnd_pixels16_xy2_8_lasx;
+        c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pix16_x2_8_lasx_wrap;
+        c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pix16_y2_8_lasx_wrap;
+        c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pix16_xy2_8_lasx_wrap;
 
         c->put_no_rnd_pixels_tab[1][0] = ff_put_pixels8_8_lasx;
-        c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_8_lasx;
-        c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_8_lasx;
-        c->put_no_rnd_pixels_tab[1][3] = ff_put_no_rnd_pixels8_xy2_8_lasx;
+        c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pix8_x2_8_lasx_wrap;
+        c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pix8_y2_8_lasx_wrap;
+        c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pix8_xy2_8_lasx_wrap;
     }
 }
-- 
2.49.1

_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to