[FFmpeg-devel] [PATCH] avfilter/boxblur: add AVX2 horizontal pass (PR #20714)
PR #20714 opened by MakarDev
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20714
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20714.patch
Signed-off-by: MakarDev
Added 1D AVX2 assembly vectorization support for the sliding window accumulator
on the avfiler/boxblur filter.
Benchmarking results
AVX2:
- vf_boxblur.boxblur_row8 [OK]
- vf_boxblur.boxblur_row16 [OK]
checkasm: all 2 tests passed
boxblur_blur_row8_c: 884.7 ( 1.00x)
boxblur_blur_row8_avx2: 92.7 ( 9.54x)
boxblur_blur_row16_c: 315.8 ( 1.00x)
boxblur_blur_row16_avx2: 255.3 ( 1.24x)
From 7915f3f232ac5e57fb7ac7e342653108c2119719 Mon Sep 17 00:00:00 2001
From: MakarDev
Date: Thu, 16 Oct 2025 22:44:31 -0700
Subject: [PATCH] avfilter/boxblur: add AVX2 horizontal pass
Signed-off-by: MakarDev
---
libavfilter/vf_boxblur_dsp.h | 45 +++
libavfilter/x86/Makefile | 2 +
libavfilter/x86/vf_boxblur.asm| 575 ++
libavfilter/x86/vf_boxblur_init.c | 71
tests/checkasm/Makefile | 1 +
tests/checkasm/checkasm.c | 3 +
tests/checkasm/checkasm.h | 1 +
tests/checkasm/vf_boxblur.c | 165 +
8 files changed, 863 insertions(+)
create mode 100644 libavfilter/vf_boxblur_dsp.h
create mode 100644 libavfilter/x86/vf_boxblur.asm
create mode 100644 libavfilter/x86/vf_boxblur_init.c
create mode 100644 tests/checkasm/vf_boxblur.c
diff --git a/libavfilter/vf_boxblur_dsp.h b/libavfilter/vf_boxblur_dsp.h
new file mode 100644
index 00..246c748eea
--- /dev/null
+++ b/libavfilter/vf_boxblur_dsp.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2025 Makar Kuznietsov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_BOXBLUR_DSP_H
+#define AVFILTER_BOXBLUR_DSP_H
+
+#include
+#include
+
+typedef struct FFBoxblurDSPContext {
+/* 1D horizontal blur on one row of len pixels */
+void (*blur_row8)(uint8_t *dst, ptrdiff_t dst_step,
+ const uint8_t *src, ptrdiff_t src_step,
+ int len, int radius);
+
+void (*blur_row16)(uint16_t *dst, ptrdiff_t dst_step,
+ const uint16_t *src, ptrdiff_t src_step,
+ int len, int radius);
+} FFBoxblurDSPContext;
+
+/* C initializers */
+void ff_boxblur_dsp_init(FFBoxblurDSPContext *dsp);
+void ff_boxblur_dsp_init_aarch64(FFBoxblurDSPContext *dsp);
+void ff_boxblur_dsp_init_x86(FFBoxblurDSPContext *dsp);
+
+#endif /* AVFILTER_BOXBLUR_DSP_H */
+
+
diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile
index b485c10fbe..a89e9e4b78 100644
--- a/libavfilter/x86/Makefile
+++ b/libavfilter/x86/Makefile
@@ -14,6 +14,7 @@ OBJS-$(CONFIG_EQ_FILTER) +=
x86/vf_eq_init.o
OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp_init.o
OBJS-$(CONFIG_GBLUR_FILTER) += x86/vf_gblur_init.o
OBJS-$(CONFIG_GRADFUN_FILTER)+= x86/vf_gradfun_init.o
+OBJS-$(CONFIG_BOXBLUR_FILTER)+= x86/vf_boxblur_init.o
OBJS-$(CONFIG_FRAMERATE_FILTER) += x86/vf_framerate_init.o
OBJS-$(CONFIG_HALDCLUT_FILTER) += x86/vf_lut3d_init.o
OBJS-$(CONFIG_HFLIP_FILTER) += x86/vf_hflip_init.o
@@ -63,6 +64,7 @@ X86ASM-OBJS-$(CONFIG_FRAMERATE_FILTER) +=
x86/vf_framerate.o
X86ASM-OBJS-$(CONFIG_FSPP_FILTER)+= x86/vf_fspp.o
X86ASM-OBJS-$(CONFIG_GBLUR_FILTER) += x86/vf_gblur.o
X86ASM-OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun.o
+X86ASM-OBJS-$(CONFIG_BOXBLUR_FILTER) += x86/vf_boxblur.o
X86ASM-OBJS-$(CONFIG_HALDCLUT_FILTER)+= x86/vf_lut3d.o
X86ASM-OBJS-$(CONFIG_HFLIP_FILTER) += x86/vf_hflip.o
X86ASM-OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d.o
diff --git a/libavfilter/x86/vf_boxblur.asm b/libavfilter/x86/vf_boxblur.asm
new file mode 100644
index 00..48bd64d8f6
--- /dev/null
+++ b/libavfilter/x86/vf_boxblur.asm
@@ -0,0 +1,575 @@
+;*
+;* x86 AVX2-optimized functions for boxblur 1D row blur
+;*
+;* Copyright (C) 2025 Makar Kuznietsov
+;*
+;* This fi
[FFmpeg-devel] [PATCH] avfilter/boxblur: add AVX2 assembly (PR #20770)
PR #20770 opened by MakarDev URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20770 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20770.patch AVX2 assembly implementation of the boxblur filter. As the boxblur filter has a dependency chain over sum, it can't be fully vectorized, but speedup was achieved through vectorizing all the other operations in the filter. Also, assembly is written only for the "steady-state" middle part of the image, to which boxblur is applied. Benchmarking results tests/checkasm/checkasm --test=vf_boxblur --bench AVX2: - vf_boxblur.boxblur_blur8 [OK] - vf_boxblur.boxblur_blur16 [OK] checkasm: all 2 tests passed boxblur_blur8_c: 1396.9 ( 1.00x) boxblur_blur8_avx2:541.1 ( 2.58x) boxblur_blur16_c: 1256.0 ( 1.00x) boxblur_blur16_avx2: 504.2 ( 2.49x) >From 26e836c1ebf2bfcd3c02f9e7d7a46dd135ee6174 Mon Sep 17 00:00:00 2001 From: MakarDev Date: Thu, 16 Oct 2025 22:44:31 -0700 Subject: [PATCH] avfilter/boxblur: add AVX2 assembly --- libavfilter/Makefile | 2 +- libavfilter/boxblur.h | 9 ++ libavfilter/boxblur_dsp.c | 37 ++ libavfilter/vf_boxblur.c | 93 ++--- libavfilter/vf_boxblur_dsp.h | 46 +++ libavfilter/x86/Makefile | 2 + libavfilter/x86/vf_boxblur.asm| 213 ++ libavfilter/x86/vf_boxblur_init.c | 50 +++ tests/checkasm/Makefile | 1 + tests/checkasm/checkasm.c | 3 + tests/checkasm/checkasm.h | 1 + tests/checkasm/vf_boxblur.c | 148 + tests/fate/checkasm.mak | 1 + 13 files changed, 585 insertions(+), 21 deletions(-) create mode 100644 libavfilter/boxblur_dsp.c create mode 100644 libavfilter/vf_boxblur_dsp.h create mode 100644 libavfilter/x86/vf_boxblur.asm create mode 100644 libavfilter/x86/vf_boxblur_init.c create mode 100644 tests/checkasm/vf_boxblur.c diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 69d74183b2..00f956dc19 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -217,7 +217,7 @@ OBJS-$(CONFIG_BLEND_VULKAN_FILTER) += vf_blend_vulkan.o framesync.o vu OBJS-$(CONFIG_BLOCKDETECT_FILTER)+= vf_blockdetect.o OBJS-$(CONFIG_BLURDETECT_FILTER) += vf_blurdetect.o edge_common.o OBJS-$(CONFIG_BM3D_FILTER) += vf_bm3d.o framesync.o -OBJS-$(CONFIG_BOXBLUR_FILTER)+= vf_boxblur.o boxblur.o +OBJS-$(CONFIG_BOXBLUR_FILTER)+= vf_boxblur.o boxblur.o boxblur_dsp.o OBJS-$(CONFIG_BOXBLUR_OPENCL_FILTER) += vf_avgblur_opencl.o opencl.o \ opencl/avgblur.o boxblur.o OBJS-$(CONFIG_BWDIF_FILTER) += vf_bwdif.o bwdifdsp.o yadif_common.o diff --git a/libavfilter/boxblur.h b/libavfilter/boxblur.h index 214d4e0c93..16ca377600 100644 --- a/libavfilter/boxblur.h +++ b/libavfilter/boxblur.h @@ -44,4 +44,13 @@ int ff_boxblur_eval_filter_params(AVFilterLink *inlink, FilterParam *chroma_param, FilterParam *alpha_param); +/* Forward declaration */ +typedef struct FFBoxblurDSPContext FFBoxblurDSPContext; + +/* Blur functions - used for testing and internally */ +void ff_boxblur_blur8(uint8_t *dst, int dst_step, const uint8_t *src, + int src_step, int len, int radius, FFBoxblurDSPContext *dsp); +void ff_boxblur_blur16(uint16_t *dst, int dst_step, const uint16_t *src, + int src_step, int len, int radius, FFBoxblurDSPContext *dsp); + #endif // AVFILTER_BOXBLUR_H diff --git a/libavfilter/boxblur_dsp.c b/libavfilter/boxblur_dsp.c new file mode 100644 index 00..9633cd1062 --- /dev/null +++ b/libavfilter/boxblur_dsp.c @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2025 Makar Kuznietsov + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" +#include "libavutil/attributes.h" +#include "vf_boxblur_dsp.h" + +#if ARCH_X86_64 +void ff_boxblur_dsp_init_x86(FFBoxblurDSPContext *dsp, int depth); +#endif + +av_cold void ff_boxblur_dsp_i
