PR #23442 opened by michaelni
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23442
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23442.patch

swresample/x86/resample: write only int16 in the int16 resampler 
The resample asm code as it is currently handles 1 sample at a time

The asm code should be redesigned and handle more than 1 sample at a
time. That is the whole purpose of SIMD. There is also multiple samples
available that need identical handling like from several channels or
similar handling from other points in time.

Such redesign would make the resampler faster and would change the
requirements of padding and maybe memory layout. So it seems simpler
to just avoid overwriting in the asm as it is today than to have
the allocation handle specific overallocation for asm code that
ideally should be redesigned

Fixes writing 16bits over the end of the array

This is an alternative fix for #23053

Found-by: Ivan Grigorev <[email protected]>
Signed-off-by: Michael Niedermayer <[email protected]>

I will also post an alternative fix that changes the memory allocation instead


>From 15976a0266ad0c832bbeace6e6203a64e005e565 Mon Sep 17 00:00:00 2001
From: Ivan Grigorev <[email protected]>
Date: Wed, 10 Jun 2026 17:42:43 +0200
Subject: [PATCH 1/2] swresample/tests: add resample realloc regression test

Add a regression test exercising the swr_convert(N) -> swr_convert(2N)
edge case: the second call reuses the internal preout buffer at full
capacity, with no trailing slack from swri_realloc_audio()'s amortized
doubling. internal_sample_fmt is forced to S16P to reach the int16 SIMD
resample path, where ff_resample_common_int16_sse2 overruns its
destination by 2 bytes on the last iteration.

Without a resampler fix this test fails under valgrind/ASAN with a
heap-buffer-overflow (Invalid write of size 4, 2 bytes past the end).

Signed-off-by: Ivan Grigorev <[email protected]>
---
 libswresample/Makefile                        |   3 +-
 .../tests/swresample_resample_realloc.c       | 120 ++++++++++++++++++
 tests/fate/libswresample.mak                  |   4 +
 tests/ref/fate/swr-resample-realloc           |   0
 4 files changed, 126 insertions(+), 1 deletion(-)
 create mode 100644 libswresample/tests/swresample_resample_realloc.c
 create mode 100644 tests/ref/fate/swr-resample-realloc

diff --git a/libswresample/Makefile b/libswresample/Makefile
index 8b9a0fe6f5..12fbfc35c1 100644
--- a/libswresample/Makefile
+++ b/libswresample/Makefile
@@ -24,4 +24,5 @@ SHLIBOBJS              += log2_tab.o
 # Windows resource file
 SHLIBOBJS-$(HAVE_GNU_WINDRES) += swresampleres.o
 
-TESTPROGS = swresample
+TESTPROGS = swresample \
+            swresample_resample_realloc \
diff --git a/libswresample/tests/swresample_resample_realloc.c 
b/libswresample/tests/swresample_resample_realloc.c
new file mode 100644
index 0000000000..a72194a7f8
--- /dev/null
+++ b/libswresample/tests/swresample_resample_realloc.c
@@ -0,0 +1,120 @@
+/*
+ * Exercise the swr_convert(N) -> swr_convert(2N) edge case where the
+ * second call reuses the internal preout buffer at full capacity, with
+ * no trailing slack from swri_realloc_audio()'s amortized doubling.
+ * Forces internal_sample_fmt=S16P to reach the int16 SIMD resample path.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "libavutil/channel_layout.h"
+#include "libavutil/mem.h"
+#include "libavutil/opt.h"
+#include "libavutil/samplefmt.h"
+#include "libswresample/swresample.h"
+
+int main(void)
+{
+    const int IN_RATE  = 48000;
+    const int OUT_RATE = 16000;
+    /* First call asks for N out frames, second call asks for 2N. */
+    const int N1_OUT = 160;
+    const int N2_OUT = 320;
+    const int N1_IN  = N1_OUT * IN_RATE / OUT_RATE; /* 480 */
+    const int N2_IN  = N2_OUT * IN_RATE / OUT_RATE; /* 960 */
+
+    SwrContext *swr = swr_alloc();
+    AVChannelLayout mono = AV_CHANNEL_LAYOUT_MONO;
+    int ret = 0;
+
+    if (!swr) {
+        fprintf(stderr, "swr_alloc failed\n");
+        return 1;
+    }
+
+    av_opt_set_chlayout   (swr, "in_chlayout",          &mono,                 
0);
+    av_opt_set_chlayout   (swr, "out_chlayout",         &mono,                 
0);
+    av_opt_set_int        (swr, "in_sample_rate",       IN_RATE,               
0);
+    av_opt_set_int        (swr, "out_sample_rate",      OUT_RATE,              
0);
+    av_opt_set_sample_fmt (swr, "in_sample_fmt",        AV_SAMPLE_FMT_S16,     
0);
+    av_opt_set_sample_fmt (swr, "out_sample_fmt",       AV_SAMPLE_FMT_S16,     
0);
+    /* Force the int16 SIMD resample path. */
+    av_opt_set_sample_fmt (swr, "internal_sample_fmt",  AV_SAMPLE_FMT_S16P,    
0);
+
+    if ((ret = swr_init(swr)) < 0) {
+        fprintf(stderr, "swr_init failed: %d\n", ret);
+        ret = 1;
+        goto end;
+    }
+
+    {
+        int16_t *input = av_calloc(N2_IN,  sizeof(int16_t));
+        int16_t *out   = av_calloc(N2_OUT, sizeof(int16_t));
+        const uint8_t *in_planes[1];
+        uint8_t       *out_planes[1];
+        int i, n;
+
+        if (!input || !out) {
+            fprintf(stderr, "alloc failed\n");
+            av_free(input);
+            av_free(out);
+            ret = 1;
+            goto end;
+        }
+
+        /* Non-zero samples so the SIMD inner loop produces real data. */
+        for (i = 0; i < N2_IN; ++i)
+            input[i] = (int16_t)((i * 7) & 0x3fff);
+
+        /* Call #1: out_count = N. swri_realloc_audio() doubles count and
+         * grows s->preout to capacity 2N (e.g. 640 bytes for N=160). */
+        in_planes[0]  = (const uint8_t *)input;
+        out_planes[0] = (uint8_t *)out;
+        n = swr_convert(swr, out_planes, N1_OUT, in_planes, N1_IN);
+        if (n < 0) {
+            fprintf(stderr, "swr_convert call#1 failed: %d\n", n);
+            av_free(input);
+            av_free(out);
+            ret = 1;
+            goto end;
+        }
+
+        /* Call #2: out_count = 2N. a->count == 2N, so swri_realloc_audio()
+         * skips realloc and reuses the existing buffer at full capacity. */
+        in_planes[0]  = (const uint8_t *)input;
+        out_planes[0] = (uint8_t *)out;
+        n = swr_convert(swr, out_planes, N2_OUT, in_planes, N2_IN);
+        if (n < 0) {
+            fprintf(stderr, "swr_convert call#2 failed: %d\n", n);
+            av_free(input);
+            av_free(out);
+            ret = 1;
+            goto end;
+        }
+
+        av_free(input);
+        av_free(out);
+    }
+
+end:
+    swr_free(&swr);
+    return ret;
+}
diff --git a/tests/fate/libswresample.mak b/tests/fate/libswresample.mak
index 5317d4148d..fbf9cf064c 100644
--- a/tests/fate/libswresample.mak
+++ b/tests/fate/libswresample.mak
@@ -1107,3 +1107,7 @@ fate-swr-custom-rematrix: REF = 
2a14a44deb4ae26e3b474ddbfbc048f8
 FATE_SWR += $(FATE_SWR_CUSTOM_REMATRIX-yes)
 FATE_FFMPEG += $(FATE_SWR)
 fate-swr: $(FATE_SWR)
+
+FATE_SWR += fate-swr-resample-realloc
+fate-swr-resample-realloc: 
libswresample/tests/swresample_resample_realloc$(EXESUF)
+fate-swr-resample-realloc: CMD = run 
libswresample/tests/swresample_resample_realloc$(EXESUF)
diff --git a/tests/ref/fate/swr-resample-realloc 
b/tests/ref/fate/swr-resample-realloc
new file mode 100644
index 0000000000..e69de29bb2
-- 
2.52.0


>From ac508c4a573825060a90e9c827b6d6ddbcd2bada Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <[email protected]>
Date: Wed, 10 Jun 2026 17:48:42 +0200
Subject: [PATCH 2/2] swresample/x86/resample: write only int16 in the int16
 resampler

The resample asm code as it is currently handles 1 sample at a time

The asm code should be redesigned and handle more than 1 sample at a
time. That is the whole purpose of SIMD. There is also multiple samples
available that need identical handling like from several channels or
similar handling from other points in time.

Such redesign would make the resampler faster and would change the
requirements of padding and maybe memory layout. So it seems simpler
to just avoid overwriting in the asm as it is today than to have
the allocation handle specific overallocation for asm code that
ideally should be redesigned

Fixes writing 16bits over the end of the array

This is an alternative fix for https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23053

Found-by: Ivan Grigorev <[email protected]>
Signed-off-by: Michael Niedermayer <[email protected]>
---
 libswresample/x86/resample.asm | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/libswresample/x86/resample.asm b/libswresample/x86/resample.asm
index 6c3dc28703..e155f171d1 100644
--- a/libswresample/x86/resample.asm
+++ b/libswresample/x86/resample.asm
@@ -198,7 +198,8 @@ cglobal resample_common_%1, 1, 7, 2, ctx, phase_count, dst, 
frac, \
     add                        fracd, dst_incr_modd
     packssdw                      m0, m0
     add                       indexd, dst_incr_divd
-    movd                      [dstq], m0
+    movd                     filterd, m0
+    mov                       [dstq], filterw
 %else ; float/double
     ; horizontal sum & store
 %if mmsize == 32
@@ -478,7 +479,8 @@ cglobal resample_linear_%1, 1, 7, 5, ctx, 
min_filter_length_x4, filter2, \
     paddd                         m0, m1
     psrad                         m0, 15
     packssdw                      m0, m0
-    movd                      [dstq], m0
+    movd                         eax, m0
+    mov                       [dstq], ax
 
     ; note that for imul/idiv, I need to move filter to edx/eax for each:
     ; - 32bit: eax=r0[filter1], edx=r2[filter2]
-- 
2.52.0

_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to