Patches attached. - Andreas
From 9de09c4db6c914eeec505d4365850fda3bf86b8d Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <andreas.rheinha...@outlook.com> Date: Wed, 4 Jun 2025 03:20:02 +0200 Subject: [PATCH 1/6] avfilter/vf_overlay: Don't perform UB pointer arithmetic
This happens when the pixel format of the output does not have an alpha channel. It leads to FATE failures with the ffmpeg-filter_colorkey, filter-overlay-dvdsub-2397 filter-overlay, filter-overlay_{gbrp_gbrap,nv12,nv21,yuv420,yuv420_yuva420, yuv420p10,yuv422_yuva422,yuv422p10,yuv444_yuva444,yuv444p10} and sub2video tests when using Clang UBSan. Fix this by only performing the pointer arithmetic when it is going to be used. This can be checked via variables that compile-time constants due to inlining, so that the checks are free. Given that the pointer is potentially used as a function argument, the compiler could elide the calculation, but not it can. The size of .text decreased by 1632B with GCC 14 and by 1392B with Clang 19 (both -O3). Signed-off-by: Andreas Rheinhardt <andreas.rheinha...@outlook.com> --- libavfilter/vf_overlay.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/libavfilter/vf_overlay.c b/libavfilter/vf_overlay.c index 8560ed7c17..0a9ff60ebb 100644 --- a/libavfilter/vf_overlay.c +++ b/libavfilter/vf_overlay.c @@ -467,7 +467,7 @@ static av_always_inline void blend_plane_##depth##_##nbits##bits(AVFilterContext int dst_hp = AV_CEIL_RSHIFT(dst_h, vsub); \ int yp = y>>vsub; \ int xp = x>>hsub; \ - uint##depth##_t *s, *sp, *d, *dp, *dap, *a, *da, *ap; \ + uint##depth##_t *s, *sp, *d, *dp, *dap, *a, *ap; \ int jmax, j, k, kmax; \ int slice_start, slice_end; \ const uint##depth##_t max = (1 << nbits) - 1; \ @@ -486,14 +486,15 @@ static av_always_inline void blend_plane_##depth##_##nbits##bits(AVFilterContext + (yp + slice_start) * dst->linesize[dst_plane] \ + dst_offset); \ ap = (uint##depth##_t *)(src->data[3] + (slice_start << vsub) * src->linesize[3]); \ - dap = (uint##depth##_t *)(dst->data[3] + ((yp + slice_start) << vsub) * dst->linesize[3]); \ + if (main_has_alpha) \ + dap = (uint##depth##_t *)(dst->data[3] + ((yp + slice_start) << vsub) * dst->linesize[3]); \ \ for (j = slice_start; j < slice_end; j++) { \ k = FFMAX(-xp, 0); \ d = dp + (xp+k) * dst_step; \ s = sp + k; \ a = ap + (k<<hsub); \ - da = dap + ((xp+k) << hsub); \ + uint##depth##_t *da = main_has_alpha ? dap + ((xp+k) << hsub) : NULL; \ kmax = FFMIN(-xp + dst_wp, src_wp); \ \ if (nbits == 8 && ((vsub && j+1 < src_hp) || !vsub) && octx->blend_row[i]) { \ @@ -502,7 +503,8 @@ static av_always_inline void blend_plane_##depth##_##nbits##bits(AVFilterContext \ s += c; \ d += dst_step * c; \ - da += (1 << hsub) * c; \ + if (main_has_alpha) \ + da += (1 << hsub) * c; \ a += (1 << hsub) * c; \ k += c; \ } \ @@ -560,13 +562,15 @@ static av_always_inline void blend_plane_##depth##_##nbits##bits(AVFilterContext } \ s++; \ d += dst_step; \ - da += 1 << hsub; \ + if (main_has_alpha) \ + da += 1 << hsub; \ a += 1 << hsub; \ } \ dp += dst->linesize[dst_plane] / bytes; \ sp += src->linesize[i] / bytes; \ ap += (1 << vsub) * src->linesize[3] / bytes; \ - dap += (1 << vsub) * dst->linesize[3] / bytes; \ + if (main_has_alpha) \ + dap += (1 << vsub) * dst->linesize[3] / bytes; \ } \ } DEFINE_BLEND_PLANE(8, 8) -- 2.45.2
From e25ce3fdc6333c7d6408a221d0fc0b90eb057973 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <andreas.rheinha...@outlook.com> Date: Wed, 4 Jun 2025 15:54:55 +0200 Subject: [PATCH 2/6] avfilter/vf_overlay: Avoid converting stride to uint16_t and back Just keep the pointers for the beginning of a line uint8_t* and use uint16_t* to do the actual processing. Signed-off-by: Andreas Rheinhardt <andreas.rheinha...@outlook.com> --- libavfilter/vf_overlay.c | 42 ++++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/libavfilter/vf_overlay.c b/libavfilter/vf_overlay.c index 0a9ff60ebb..88aaad2aad 100644 --- a/libavfilter/vf_overlay.c +++ b/libavfilter/vf_overlay.c @@ -467,7 +467,6 @@ static av_always_inline void blend_plane_##depth##_##nbits##bits(AVFilterContext int dst_hp = AV_CEIL_RSHIFT(dst_h, vsub); \ int yp = y>>vsub; \ int xp = x>>hsub; \ - uint##depth##_t *s, *sp, *d, *dp, *dap, *a, *ap; \ int jmax, j, k, kmax; \ int slice_start, slice_end; \ const uint##depth##_t max = (1 << nbits) - 1; \ @@ -481,20 +480,19 @@ static av_always_inline void blend_plane_##depth##_##nbits##bits(AVFilterContext slice_start = j + (jmax * jobnr) / nb_jobs; \ slice_end = j + (jmax * (jobnr+1)) / nb_jobs; \ \ - sp = (uint##depth##_t *)(src->data[i] + (slice_start) * src->linesize[i]); \ - dp = (uint##depth##_t *)(dst->data[dst_plane] \ + const uint8_t *sp = src->data[i] + (slice_start) * src->linesize[i]; \ + uint8_t *dp = dst->data[dst_plane] \ + (yp + slice_start) * dst->linesize[dst_plane] \ - + dst_offset); \ - ap = (uint##depth##_t *)(src->data[3] + (slice_start << vsub) * src->linesize[3]); \ - if (main_has_alpha) \ - dap = (uint##depth##_t *)(dst->data[3] + ((yp + slice_start) << vsub) * dst->linesize[3]); \ + + dst_offset; \ + const uint8_t *ap = src->data[3] + (slice_start << vsub) * src->linesize[3]; \ + const uint8_t *dap = main_has_alpha ? dst->data[3] + ((yp + slice_start) << vsub) * dst->linesize[3] : NULL; \ \ for (j = slice_start; j < slice_end; j++) { \ k = FFMAX(-xp, 0); \ - d = dp + (xp+k) * dst_step; \ - s = sp + k; \ - a = ap + (k<<hsub); \ - uint##depth##_t *da = main_has_alpha ? dap + ((xp+k) << hsub) : NULL; \ + const uint##depth##_t *s = (const uint##depth##_t *)sp + k; \ + const uint##depth##_t *a = (const uint##depth##_t *)ap + (k<<hsub); \ + const uint##depth##_t *da = main_has_alpha ? (uint##depth##_t *)dap + ((xp + k) << hsub) : NULL; \ + uint##depth##_t *d = (uint##depth##_t *)dp + (xp + k) * dst_step; \ kmax = FFMIN(-xp + dst_wp, src_wp); \ \ if (nbits == 8 && ((vsub && j+1 < src_hp) || !vsub) && octx->blend_row[i]) { \ @@ -566,11 +564,11 @@ static av_always_inline void blend_plane_##depth##_##nbits##bits(AVFilterContext da += 1 << hsub; \ a += 1 << hsub; \ } \ - dp += dst->linesize[dst_plane] / bytes; \ - sp += src->linesize[i] / bytes; \ - ap += (1 << vsub) * src->linesize[3] / bytes; \ + dp += dst->linesize[dst_plane]; \ + sp += src->linesize[i]; \ + ap += (1 << vsub) * src->linesize[3]; \ if (main_has_alpha) \ - dap += (1 << vsub) * dst->linesize[3] / bytes; \ + dap += (1 << vsub) * dst->linesize[3]; \ } \ } DEFINE_BLEND_PLANE(8, 8) @@ -584,11 +582,9 @@ static inline void alpha_composite_##depth##_##nbits##bits(const AVFrame *src, c int jobnr, int nb_jobs) \ { \ uint##depth##_t alpha; /* the amount of overlay to blend on to main */ \ - uint##depth##_t *s, *sa, *d, *da; \ int i, imax, j, jmax; \ int slice_start, slice_end; \ const uint##depth##_t max = (1 << nbits) - 1; \ - int bytes = depth / 8; \ \ imax = FFMIN3(-y + dst_h, FFMIN(src_h, dst_h), y + src_h); \ i = FFMAX(-y, 0); \ @@ -596,13 +592,13 @@ static inline void alpha_composite_##depth##_##nbits##bits(const AVFrame *src, c slice_start = i + (imax * jobnr) / nb_jobs; \ slice_end = i + ((imax * (jobnr+1)) / nb_jobs); \ \ - sa = (uint##depth##_t *)(src->data[3] + (slice_start) * src->linesize[3]); \ - da = (uint##depth##_t *)(dst->data[3] + (y + slice_start) * dst->linesize[3]); \ + const uint8_t *sa = src->data[3] + (slice_start) * src->linesize[3]; \ + uint8_t *da = dst->data[3] + (y + slice_start) * dst->linesize[3]; \ \ for (i = slice_start; i < slice_end; i++) { \ j = FFMAX(-x, 0); \ - s = sa + j; \ - d = da + x+j; \ + const uint##depth##_t *s = (const uint##depth##_t *)sa + j; \ + uint##depth##_t *d = (uint##depth##_t *)da + x+j; \ \ for (jmax = FFMIN(-x + dst_w, src_w); j < jmax; j++) { \ alpha = *s; \ @@ -622,8 +618,8 @@ static inline void alpha_composite_##depth##_##nbits##bits(const AVFrame *src, c d += 1; \ s += 1; \ } \ - da += dst->linesize[3] / bytes; \ - sa += src->linesize[3] / bytes; \ + da += dst->linesize[3]; \ + sa += src->linesize[3]; \ } \ } DEFINE_ALPHA_COMPOSITE(8, 8) -- 2.45.2
From 3e8107e7dcaf969051f0895e1de98ec064513857 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <andreas.rheinha...@outlook.com> Date: Wed, 4 Jun 2025 16:28:08 +0200 Subject: [PATCH 3/6] avfilter/vf_overlay: Pass variable type directly in macro Improves readability. Signed-off-by: Andreas Rheinhardt <andreas.rheinha...@outlook.com> --- libavfilter/vf_overlay.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/libavfilter/vf_overlay.c b/libavfilter/vf_overlay.c index 88aaad2aad..7d8d280a41 100644 --- a/libavfilter/vf_overlay.c +++ b/libavfilter/vf_overlay.c @@ -444,7 +444,7 @@ static av_always_inline void blend_slice_packed_rgb(AVFilterContext *ctx, } } -#define DEFINE_BLEND_PLANE(depth, nbits) \ +#define DEFINE_BLEND_PLANE(depth, T, nbits) \ static av_always_inline void blend_plane_##depth##_##nbits##bits(AVFilterContext *ctx, \ AVFrame *dst, const AVFrame *src, \ int src_w, int src_h, \ @@ -469,8 +469,8 @@ static av_always_inline void blend_plane_##depth##_##nbits##bits(AVFilterContext int xp = x>>hsub; \ int jmax, j, k, kmax; \ int slice_start, slice_end; \ - const uint##depth##_t max = (1 << nbits) - 1; \ - const uint##depth##_t mid = (1 << (nbits -1)) ; \ + const T max = (1 << nbits) - 1; \ + const T mid = (1 << (nbits - 1)); \ int bytes = depth / 8; \ \ dst_step /= bytes; \ @@ -489,10 +489,10 @@ static av_always_inline void blend_plane_##depth##_##nbits##bits(AVFilterContext \ for (j = slice_start; j < slice_end; j++) { \ k = FFMAX(-xp, 0); \ - const uint##depth##_t *s = (const uint##depth##_t *)sp + k; \ - const uint##depth##_t *a = (const uint##depth##_t *)ap + (k<<hsub); \ - const uint##depth##_t *da = main_has_alpha ? (uint##depth##_t *)dap + ((xp + k) << hsub) : NULL; \ - uint##depth##_t *d = (uint##depth##_t *)dp + (xp + k) * dst_step; \ + const T *s = (const T *)sp + k; \ + const T *a = (const T *)ap + (k << hsub); \ + const T *da = main_has_alpha ? (T *)dap + ((xp + k) << hsub) : NULL; \ + T *d = (T *)dp + (xp + k) * dst_step; \ kmax = FFMIN(-xp + dst_wp, src_wp); \ \ if (nbits == 8 && ((vsub && j+1 < src_hp) || !vsub) && octx->blend_row[i]) { \ @@ -571,20 +571,20 @@ static av_always_inline void blend_plane_##depth##_##nbits##bits(AVFilterContext dap += (1 << vsub) * dst->linesize[3]; \ } \ } -DEFINE_BLEND_PLANE(8, 8) -DEFINE_BLEND_PLANE(16, 10) +DEFINE_BLEND_PLANE(8, uint8_t, 8) +DEFINE_BLEND_PLANE(16, uint16_t, 10) -#define DEFINE_ALPHA_COMPOSITE(depth, nbits) \ +#define DEFINE_ALPHA_COMPOSITE(depth, T, nbits) \ static inline void alpha_composite_##depth##_##nbits##bits(const AVFrame *src, const AVFrame *dst, \ int src_w, int src_h, \ int dst_w, int dst_h, \ int x, int y, \ int jobnr, int nb_jobs) \ { \ - uint##depth##_t alpha; /* the amount of overlay to blend on to main */ \ + T alpha; /* the amount of overlay to blend on to main */ \ int i, imax, j, jmax; \ int slice_start, slice_end; \ - const uint##depth##_t max = (1 << nbits) - 1; \ + const T max = (1 << nbits) - 1; \ \ imax = FFMIN3(-y + dst_h, FFMIN(src_h, dst_h), y + src_h); \ i = FFMAX(-y, 0); \ @@ -597,8 +597,8 @@ static inline void alpha_composite_##depth##_##nbits##bits(const AVFrame *src, c \ for (i = slice_start; i < slice_end; i++) { \ j = FFMAX(-x, 0); \ - const uint##depth##_t *s = (const uint##depth##_t *)sa + j; \ - uint##depth##_t *d = (uint##depth##_t *)da + x+j; \ + const T *s = (const T *)sa + j; \ + T *d = (T *)da + x + j; \ \ for (jmax = FFMIN(-x + dst_w, src_w); j < jmax; j++) { \ alpha = *s; \ @@ -622,8 +622,8 @@ static inline void alpha_composite_##depth##_##nbits##bits(const AVFrame *src, c sa += src->linesize[3]; \ } \ } -DEFINE_ALPHA_COMPOSITE(8, 8) -DEFINE_ALPHA_COMPOSITE(16, 10) +DEFINE_ALPHA_COMPOSITE(8, uint8_t, 8) +DEFINE_ALPHA_COMPOSITE(16, uint16_t, 10) #define DEFINE_BLEND_SLICE_YUV(depth, nbits) \ static av_always_inline void blend_slice_yuv_##depth##_##nbits##bits(AVFilterContext *ctx, \ -- 2.45.2
From 8b374a0b9fc847c4d786995fa9486fdffa6edcb2 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <andreas.rheinha...@outlook.com> Date: Wed, 4 Jun 2025 19:34:01 +0200 Subject: [PATCH 4/6] avfilter/vf_overlay: Use correct alpha when > 8 bits When chroma subsampling is in use, the filter averages the corresponding (non subsampled) alpha values to get the actual alpha value. When vertical subsampling is in use, the next line is accessed via a[src->linesize[3]], yet a is an uint16_t* for >8 bit formats and linesize is always in bytes, so that this actually uses the second line below the current one. This is fixed in this commit. No FATE test needed updates, because the filter-overlay-yuv420p10 and filter-overlay-yuv444p10 tests use a yuv420p test file that has constant opacity after conversion to yuva. Signed-off-by: Andreas Rheinhardt <andreas.rheinha...@outlook.com> --- libavfilter/vf_overlay.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/libavfilter/vf_overlay.c b/libavfilter/vf_overlay.c index 7d8d280a41..cd92c7323b 100644 --- a/libavfilter/vf_overlay.c +++ b/libavfilter/vf_overlay.c @@ -353,6 +353,9 @@ static int config_output(AVFilterLink *outlink) // ((((x) + (y)) << 8) - ((x) + (y)) - (y) * (x)) is a faster version of: 255 * (x + y) #define UNPREMULTIPLY_ALPHA(x, y) ((((x) << 16) - ((x) << 9) + (x)) / ((((x) + (y)) << 8) - ((x) + (y)) - (y) * (x))) +#define PTR_ADD(TYPE, ptr, byte_addend) ((TYPE*)((uint8_t*)ptr + (byte_addend))) +#define CPTR_ADD(TYPE, ptr, byte_addend) ((const TYPE*)((const uint8_t*)ptr + (byte_addend))) + /** * Blend image in src to destination buffer dst at position (x, y). */ @@ -511,13 +514,14 @@ static av_always_inline void blend_plane_##depth##_##nbits##bits(AVFilterContext \ /* average alpha for color components, improve quality */ \ if (hsub && vsub && j+1 < src_hp && k+1 < src_wp) { \ - alpha = (a[0] + a[src->linesize[3]] + \ - a[1] + a[src->linesize[3]+1]) >> 2; \ + const T *next_line = CPTR_ADD(T, a, src->linesize[3]); \ + alpha = (a[0] + next_line[0] + \ + a[1] + next_line[1]) >> 2; \ } else if (hsub || vsub) { \ alpha_h = hsub && k+1 < src_wp ? \ (a[0] + a[1]) >> 1 : a[0]; \ alpha_v = vsub && j+1 < src_hp ? \ - (a[0] + a[src->linesize[3]]) >> 1 : a[0]; \ + (a[0] + *CPTR_ADD(T, a, src->linesize[3])) >> 1 : a[0]; \ alpha = (alpha_v + alpha_h) >> 1; \ } else \ alpha = a[0]; \ @@ -527,13 +531,14 @@ static av_always_inline void blend_plane_##depth##_##nbits##bits(AVFilterContext /* average alpha for color components, improve quality */ \ uint8_t alpha_d; \ if (hsub && vsub && j+1 < src_hp && k+1 < src_wp) { \ - alpha_d = (da[0] + da[dst->linesize[3]] + \ - da[1] + da[dst->linesize[3]+1]) >> 2; \ + const T *next_line = CPTR_ADD(T, da, dst->linesize[3]); \ + alpha_d = (da[0] + next_line[0] + \ + da[1] + next_line[1]) >> 2; \ } else if (hsub || vsub) { \ alpha_h = hsub && k+1 < src_wp ? \ (da[0] + da[1]) >> 1 : da[0]; \ alpha_v = vsub && j+1 < src_hp ? \ - (da[0] + da[dst->linesize[3]]) >> 1 : da[0]; \ + (da[0] + *CPTR_ADD(T, da, dst->linesize[3])) >> 1 : da[0]; \ alpha_d = (alpha_v + alpha_h) >> 1; \ } else \ alpha_d = da[0]; \ -- 2.45.2
From 99d166480e89e78c5d6f8dfff407e896a296fccc Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <andreas.rheinha...@outlook.com> Date: Wed, 4 Jun 2025 20:06:01 +0200 Subject: [PATCH 5/6] avfilter/vf_overlay: Keep dst_step in bytes Signed-off-by: Andreas Rheinhardt <andreas.rheinha...@outlook.com> --- libavfilter/vf_overlay.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/libavfilter/vf_overlay.c b/libavfilter/vf_overlay.c index cd92c7323b..17fe8e47c9 100644 --- a/libavfilter/vf_overlay.c +++ b/libavfilter/vf_overlay.c @@ -474,9 +474,7 @@ static av_always_inline void blend_plane_##depth##_##nbits##bits(AVFilterContext int slice_start, slice_end; \ const T max = (1 << nbits) - 1; \ const T mid = (1 << (nbits - 1)); \ - int bytes = depth / 8; \ \ - dst_step /= bytes; \ j = FFMAX(-yp, 0); \ jmax = FFMIN3(-yp + dst_hp, FFMIN(src_hp, dst_hp), yp + src_hp); \ \ @@ -495,7 +493,7 @@ static av_always_inline void blend_plane_##depth##_##nbits##bits(AVFilterContext const T *s = (const T *)sp + k; \ const T *a = (const T *)ap + (k << hsub); \ const T *da = main_has_alpha ? (T *)dap + ((xp + k) << hsub) : NULL; \ - T *d = (T *)dp + (xp + k) * dst_step; \ + T *d = (T *)(dp + (xp + k) * dst_step); \ kmax = FFMIN(-xp + dst_wp, src_wp); \ \ if (nbits == 8 && ((vsub && j+1 < src_hp) || !vsub) && octx->blend_row[i]) { \ @@ -503,7 +501,7 @@ static av_always_inline void blend_plane_##depth##_##nbits##bits(AVFilterContext (uint8_t*)a, kmax - k, src->linesize[3]); \ \ s += c; \ - d += dst_step * c; \ + d = PTR_ADD(T, d, dst_step * c); \ if (main_has_alpha) \ da += (1 << hsub) * c; \ a += (1 << hsub) * c; \ @@ -564,7 +562,7 @@ static av_always_inline void blend_plane_##depth##_##nbits##bits(AVFilterContext } \ } \ s++; \ - d += dst_step; \ + d = PTR_ADD(T, d, dst_step); \ if (main_has_alpha) \ da += 1 << hsub; \ a += 1 << hsub; \ -- 2.45.2
From 36483d23c356a049ca6c3dd6034dbb6453062c8a Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <andreas.rheinha...@outlook.com> Date: Wed, 4 Jun 2025 20:27:46 +0200 Subject: [PATCH 6/6] avfilter/vf_overlay: Hoist calculations out of loop Also use const where appropriate. Signed-off-by: Andreas Rheinhardt <andreas.rheinha...@outlook.com> --- libavfilter/vf_overlay.c | 36 ++++++++++++++---------------------- 1 file changed, 14 insertions(+), 22 deletions(-) diff --git a/libavfilter/vf_overlay.c b/libavfilter/vf_overlay.c index 17fe8e47c9..d77fe87c05 100644 --- a/libavfilter/vf_overlay.c +++ b/libavfilter/vf_overlay.c @@ -470,16 +470,13 @@ static av_always_inline void blend_plane_##depth##_##nbits##bits(AVFilterContext int dst_hp = AV_CEIL_RSHIFT(dst_h, vsub); \ int yp = y>>vsub; \ int xp = x>>hsub; \ - int jmax, j, k, kmax; \ - int slice_start, slice_end; \ const T max = (1 << nbits) - 1; \ const T mid = (1 << (nbits - 1)); \ \ - j = FFMAX(-yp, 0); \ - jmax = FFMIN3(-yp + dst_hp, FFMIN(src_hp, dst_hp), yp + src_hp); \ - \ - slice_start = j + (jmax * jobnr) / nb_jobs; \ - slice_end = j + (jmax * (jobnr+1)) / nb_jobs; \ + const int jmin = FFMAX(-yp, 0), jmax = FFMIN3(-yp + dst_hp, FFMIN(src_hp, dst_hp), yp + src_hp); \ + const int kmin = FFMAX(-xp, 0), kmax = FFMIN(-xp + dst_wp, src_wp); \ + const int slice_start = jmin + (jmax * jobnr) / nb_jobs; \ + const int slice_end = jmin + (jmax * (jobnr + 1)) / nb_jobs; \ \ const uint8_t *sp = src->data[i] + (slice_start) * src->linesize[i]; \ uint8_t *dp = dst->data[dst_plane] \ @@ -488,13 +485,12 @@ static av_always_inline void blend_plane_##depth##_##nbits##bits(AVFilterContext const uint8_t *ap = src->data[3] + (slice_start << vsub) * src->linesize[3]; \ const uint8_t *dap = main_has_alpha ? dst->data[3] + ((yp + slice_start) << vsub) * dst->linesize[3] : NULL; \ \ - for (j = slice_start; j < slice_end; j++) { \ - k = FFMAX(-xp, 0); \ + for (int j = slice_start; j < slice_end; ++j) { \ + int k = kmin; \ const T *s = (const T *)sp + k; \ const T *a = (const T *)ap + (k << hsub); \ const T *da = main_has_alpha ? (T *)dap + ((xp + k) << hsub) : NULL; \ T *d = (T *)(dp + (xp + k) * dst_step); \ - kmax = FFMIN(-xp + dst_wp, src_wp); \ \ if (nbits == 8 && ((vsub && j+1 < src_hp) || !vsub) && octx->blend_row[i]) { \ int c = octx->blend_row[i]((uint8_t*)d, (uint8_t*)da, (uint8_t*)s, \ @@ -585,25 +581,21 @@ static inline void alpha_composite_##depth##_##nbits##bits(const AVFrame *src, c int jobnr, int nb_jobs) \ { \ T alpha; /* the amount of overlay to blend on to main */ \ - int i, imax, j, jmax; \ - int slice_start, slice_end; \ const T max = (1 << nbits) - 1; \ \ - imax = FFMIN3(-y + dst_h, FFMIN(src_h, dst_h), y + src_h); \ - i = FFMAX(-y, 0); \ - \ - slice_start = i + (imax * jobnr) / nb_jobs; \ - slice_end = i + ((imax * (jobnr+1)) / nb_jobs); \ + const int imin = FFMAX(-y, 0), imax = FFMIN3(-y + dst_h, FFMIN(src_h, dst_h), y + src_h); \ + const int jmin = FFMAX(-x, 0), jmax = FFMIN(-x + dst_w, src_w); \ + const int slice_start = imin + ( imax * jobnr) / nb_jobs; \ + const int slice_end = imin + ((imax * (jobnr + 1)) / nb_jobs); \ \ const uint8_t *sa = src->data[3] + (slice_start) * src->linesize[3]; \ uint8_t *da = dst->data[3] + (y + slice_start) * dst->linesize[3]; \ \ - for (i = slice_start; i < slice_end; i++) { \ - j = FFMAX(-x, 0); \ - const T *s = (const T *)sa + j; \ - T *d = (T *)da + x + j; \ + for (int i = slice_start; i < slice_end; ++i) { \ + const T *s = (const T *)sa + jmin; \ + T *d = (T *)da + x + jmin; \ \ - for (jmax = FFMIN(-x + dst_w, src_w); j < jmax; j++) { \ + for (int j = jmin; j < jmax; ++j) { \ alpha = *s; \ if (alpha != 0 && alpha != max) { \ uint8_t alpha_d = *d; \ -- 2.45.2
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".