vf_scale_cuda: generalize kernel signature to accept weights

Niklas Haas via ffmpeg-cvslog Mon, 29 Jun 2026 07:31:07 -0700

This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch release/8.1
in repository ffmpeg.


commit d215ba76a0a0adccddad8c26c7518ab712bf63c5
Author:     Niklas Haas <[email protected]>
AuthorDate: Mon Jun 22 18:43:59 2026 +0200
Commit:     Marvin Scholz <[email protected]>
CommitDate: Mon Jun 29 15:51:32 2026 +0200

    avfilter/vf_scale_cuda: generalize kernel signature to accept weights
    
    Ignored for now by the existing fixed function kernels.
    
    Signed-off-by: Niklas Haas <[email protected]>
    (cherry-picked from commit d98cc462f6447bf3d759df0b81ccca50b3a8f4d0)
    Signed-off-by: Marvin Scholz <[email protected]>
---
 libavfilter/vf_scale_cuda.cu | 27 ++++++++++++++++++++-------
 libavfilter/vf_scale_cuda.h  |  5 +++++
 2 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/libavfilter/vf_scale_cuda.cu b/libavfilter/vf_scale_cuda.cu
index d674c0885a..a2a38b7cd8 100644
--- a/libavfilter/vf_scale_cuda.cu
+++ b/libavfilter/vf_scale_cuda.cu
@@ -28,7 +28,9 @@ using subsample_function_t = T (*)(cudaTextureObject_t tex, 
int xo, int yo,
                                    int dst_width, int dst_height,
                                    int src_left, int src_top,
                                    int src_width, int src_height,
-                                   int bit_depth, float param);
+                                   int bit_depth, float param,
+                                   const float *weights, const int *offsets,
+                                   int filter_size);
 
 // --- CONVERSION LOGIC ---
 
@@ -90,14 +92,16 @@ static inline __device__ ushort conv_16to10pl(ushort in)
     __device__ static inline void N(cudaTextureObject_t src_tex[4], T *dst[4], 
int xo, int yo, \
                                     int dst_width, int dst_height, int 
dst_pitch,              \
                                     int src_left, int src_top, int src_width, 
int src_height,  \
-                                    float param, int mpeg_range)
+                                    float param, int mpeg_range,               
                \
+                                    const float *weights, const int *offsets, 
int filter_size)
 
 #define SUB_F(m, plane) \
     subsample_func_##m(src_tex[plane], xo, yo, \
                        dst_width, dst_height,  \
                        src_left, src_top,      \
                        src_width, src_height,  \
-                       in_bit_depth, param)
+                       in_bit_depth, param,    \
+                       weights, offsets, filter_size)
 
 // FFmpeg passes pitch in bytes, CUDA uses potentially larger types
 #define FIXED_PITCH \
@@ -1095,7 +1099,9 @@ __device__ static inline T 
Subsample_Nearest(cudaTextureObject_t tex,
                                              int dst_width, int dst_height,
                                              int src_left, int src_top,
                                              int src_width, int src_height,
-                                             int bit_depth, float param)
+                                             int bit_depth, float param,
+                                             const float *weights, const int 
*offsets,
+                                             int filter_size)
 {
     float hscale = (float)src_width / (float)dst_width;
     float vscale = (float)src_height / (float)dst_height;
@@ -1111,7 +1117,9 @@ __device__ static inline T 
Subsample_Bilinear(cudaTextureObject_t tex,
                                               int dst_width, int dst_height,
                                               int src_left, int src_top,
                                               int src_width, int src_height,
-                                              int bit_depth, float param)
+                                              int bit_depth, float param,
+                                              const float *weights, const int 
*offsets,
+                                              int filter_size)
 {
     float hscale = (float)src_width / (float)dst_width;
     float vscale = (float)src_height / (float)dst_height;
@@ -1143,7 +1151,9 @@ __device__ static inline T 
Subsample_Bicubic(cudaTextureObject_t tex,
                                              int dst_width, int dst_height,
                                              int src_left, int src_top,
                                              int src_width, int src_height,
-                                             int bit_depth, float param)
+                                             int bit_depth, float param,
+                                             const float *weights, const int 
*offsets,
+                                             int filter_size)
 {
     float hscale = (float)src_width / (float)dst_width;
     float vscale = (float)src_height / (float)dst_height;
@@ -1194,7 +1204,10 @@ __device__ static inline T 
Subsample_Bicubic(cudaTextureObject_t tex,
         params.dst_width, params.dst_height, params.dst_pitch, \
         params.src_left, params.src_top,                \
         params.src_width, params.src_height,            \
-        params.param, params.mpeg_range);
+        params.param, params.mpeg_range,                \
+        (const float*) params.weights,                  \
+        (const int*) params.offsets,                    \
+        params.filter_size);
 
 extern "C" {
 
diff --git a/libavfilter/vf_scale_cuda.h b/libavfilter/vf_scale_cuda.h
index 81fd8061e3..e3e34a94aa 100644
--- a/libavfilter/vf_scale_cuda.h
+++ b/libavfilter/vf_scale_cuda.h
@@ -45,6 +45,11 @@ typedef struct {
     int src_height;
     float param;
     int mpeg_range;
+
+    /* Weights for the generic filter kernel */
+    CUdeviceptr weights;
+    CUdeviceptr offsets;
+    int filter_size;
 } CUDAScaleKernelParams;
 
 #endif

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

[FFmpeg-cvslog] [ffmpeg] 05/17: avfilter/vf_scale_cuda: generalize kernel signature to accept weights

Reply via email to