PR #23619 opened by philipl
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23619
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23619.patch

It's been years, but took another look at the bwdif_cuda implementation and
there are a couple of typos sitting in there. Found them when I was doing a
comparison with the Vulkan implementation.

This probably explains the small PSNR differences we've noted in the past.

# Summary of changes

Briefly describe what this PR does and why.

<!--
If this PR requires new FATE test samples, attach them to the PR and
list their target paths below (relative to the fate-suite root).

Attached filenames must match the sample's filename:

```fate-samples
# e.g. vorbis/new-sample.ogg
```
-->



>From d33873dd5194f79b9287e32b9520431fac1a8311 Mon Sep 17 00:00:00 2001
From: Philip Langdale <[email protected]>
Date: Fri, 26 Jun 2026 22:56:34 -0700
Subject: [PATCH] avfilter/vf_bwdif_cuda: fix typos in algorithm

It's been years, but took another look at the bwdif_cuda implementation and
there are a couple of typos sitting in there. Found them when I was doing a
comparison with the Vulkan implementation.

This probably explains the small PSNR differences we've noted in the past.
---
 libavfilter/vf_bwdif_cuda.cu | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/libavfilter/vf_bwdif_cuda.cu b/libavfilter/vf_bwdif_cuda.cu
index 3d4c29d8c3..c09b6f7f36 100644
--- a/libavfilter/vf_bwdif_cuda.cu
+++ b/libavfilter/vf_bwdif_cuda.cu
@@ -91,7 +91,7 @@ __inline__ __device__ T filter(T cur_prefs3, T cur_prefs, T 
cur_mrefs, T cur_mre
         if (abs(c - e) > temporal_diff0) {
             interpol = (((coef_hf[0] * (prev2_0 + next2_0)
                 - coef_hf[1] * (prev2_mrefs2 + next2_mrefs2 + prev2_prefs2 + 
next2_prefs2)
-                + coef_hf[2] * (prev2_mrefs4 + next2_mrefs4 + prev2_prefs4 + 
next2_mrefs4)) >> 2)
+                + coef_hf[2] * (prev2_mrefs4 + next2_mrefs4 + prev2_prefs4 + 
next2_prefs4)) >> 2)
                 + coef_lf[0] * (c + e) - coef_lf[1] * (cur_mrefs3 + 
cur_prefs3)) >> 13;
         } else {
             interpol = (coef_sp[0] * (c + e) - coef_sp[1] * (cur_mrefs3 + 
cur_prefs3)) >> 13;
@@ -146,10 +146,10 @@ __inline__ __device__ void bwdif_single(T *dst,
     // Calculate temporal prediction
     int is_second_field = !(parity ^ tff);
 
-    cudaTextureObject_t prev2 = prev;
-    cudaTextureObject_t prev1 = is_second_field ? cur : prev;
-    cudaTextureObject_t next1 = is_second_field ? next : cur;
-    cudaTextureObject_t next2 = next;
+    cudaTextureObject_t prev2 = is_second_field ? cur : prev;
+    cudaTextureObject_t prev1 = prev;
+    cudaTextureObject_t next1 = next;
+    cudaTextureObject_t next2 = is_second_field ? next : cur;
 
     T prev2_prefs4 = tex2D<T>(prev2, xo,  yo + 4);
     T prev2_prefs2 = tex2D<T>(prev2, xo,  yo + 2);
@@ -213,10 +213,10 @@ __inline__ __device__ void bwdif_double(T *dst,
 
     int is_second_field = !(parity ^ tff);
 
-    cudaTextureObject_t prev2 = prev;
-    cudaTextureObject_t prev1 = is_second_field ? cur : prev;
-    cudaTextureObject_t next1 = is_second_field ? next : cur;
-    cudaTextureObject_t next2 = next;
+    cudaTextureObject_t prev2 = is_second_field ? cur : prev;
+    cudaTextureObject_t prev1 = prev;
+    cudaTextureObject_t next1 = next;
+    cudaTextureObject_t next2 = is_second_field ? next : cur;
 
     T prev2_prefs4 = tex2D<T>(prev2, xo,  yo + 4);
     T prev2_prefs2 = tex2D<T>(prev2, xo,  yo + 2);
-- 
2.52.0

_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to