[FFmpeg-devel] [PATCH] avfilter/boxblur: add AVX2 horizontal pass (PR #20714)

2025-10-18 Thread MakarDev via ffmpeg-devel
PR #20714 opened by MakarDev
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20714
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20714.patch

Signed-off-by: MakarDev 
Added 1D AVX2 assembly vectorization support for the sliding window accumulator 
on the avfiler/boxblur filter. 
Benchmarking results

AVX2:
 - vf_boxblur.boxblur_row8  [OK]
 - vf_boxblur.boxblur_row16 [OK]
checkasm: all 2 tests passed
boxblur_blur_row8_c:   884.7 ( 1.00x)
boxblur_blur_row8_avx2: 92.7 ( 9.54x)
boxblur_blur_row16_c:  315.8 ( 1.00x)
boxblur_blur_row16_avx2:   255.3 ( 1.24x)


From 7915f3f232ac5e57fb7ac7e342653108c2119719 Mon Sep 17 00:00:00 2001
From: MakarDev 
Date: Thu, 16 Oct 2025 22:44:31 -0700
Subject: [PATCH] avfilter/boxblur: add AVX2 horizontal pass

Signed-off-by: MakarDev 
---
 libavfilter/vf_boxblur_dsp.h  |  45 +++
 libavfilter/x86/Makefile  |   2 +
 libavfilter/x86/vf_boxblur.asm| 575 ++
 libavfilter/x86/vf_boxblur_init.c |  71 
 tests/checkasm/Makefile   |   1 +
 tests/checkasm/checkasm.c |   3 +
 tests/checkasm/checkasm.h |   1 +
 tests/checkasm/vf_boxblur.c   | 165 +
 8 files changed, 863 insertions(+)
 create mode 100644 libavfilter/vf_boxblur_dsp.h
 create mode 100644 libavfilter/x86/vf_boxblur.asm
 create mode 100644 libavfilter/x86/vf_boxblur_init.c
 create mode 100644 tests/checkasm/vf_boxblur.c

diff --git a/libavfilter/vf_boxblur_dsp.h b/libavfilter/vf_boxblur_dsp.h
new file mode 100644
index 00..246c748eea
--- /dev/null
+++ b/libavfilter/vf_boxblur_dsp.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2025 Makar Kuznietsov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_BOXBLUR_DSP_H
+#define AVFILTER_BOXBLUR_DSP_H
+
+#include 
+#include 
+
+typedef struct FFBoxblurDSPContext {
+/* 1D horizontal blur on one row of len pixels */
+void (*blur_row8)(uint8_t *dst, ptrdiff_t dst_step,
+  const uint8_t *src, ptrdiff_t src_step,
+  int len, int radius);
+
+void (*blur_row16)(uint16_t *dst, ptrdiff_t dst_step,
+   const uint16_t *src, ptrdiff_t src_step,
+   int len, int radius);
+} FFBoxblurDSPContext;
+
+/* C initializers */
+void ff_boxblur_dsp_init(FFBoxblurDSPContext *dsp);
+void ff_boxblur_dsp_init_aarch64(FFBoxblurDSPContext *dsp);
+void ff_boxblur_dsp_init_x86(FFBoxblurDSPContext *dsp);
+
+#endif /* AVFILTER_BOXBLUR_DSP_H */
+
+
diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile
index b485c10fbe..a89e9e4b78 100644
--- a/libavfilter/x86/Makefile
+++ b/libavfilter/x86/Makefile
@@ -14,6 +14,7 @@ OBJS-$(CONFIG_EQ_FILTER) += 
x86/vf_eq_init.o
 OBJS-$(CONFIG_FSPP_FILTER)   += x86/vf_fspp_init.o
 OBJS-$(CONFIG_GBLUR_FILTER)  += x86/vf_gblur_init.o
 OBJS-$(CONFIG_GRADFUN_FILTER)+= x86/vf_gradfun_init.o
+OBJS-$(CONFIG_BOXBLUR_FILTER)+= x86/vf_boxblur_init.o
 OBJS-$(CONFIG_FRAMERATE_FILTER)  += x86/vf_framerate_init.o
 OBJS-$(CONFIG_HALDCLUT_FILTER)   += x86/vf_lut3d_init.o
 OBJS-$(CONFIG_HFLIP_FILTER)  += x86/vf_hflip_init.o
@@ -63,6 +64,7 @@ X86ASM-OBJS-$(CONFIG_FRAMERATE_FILTER)   += 
x86/vf_framerate.o
 X86ASM-OBJS-$(CONFIG_FSPP_FILTER)+= x86/vf_fspp.o
 X86ASM-OBJS-$(CONFIG_GBLUR_FILTER)   += x86/vf_gblur.o
 X86ASM-OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun.o
+X86ASM-OBJS-$(CONFIG_BOXBLUR_FILTER) += x86/vf_boxblur.o
 X86ASM-OBJS-$(CONFIG_HALDCLUT_FILTER)+= x86/vf_lut3d.o
 X86ASM-OBJS-$(CONFIG_HFLIP_FILTER)   += x86/vf_hflip.o
 X86ASM-OBJS-$(CONFIG_HQDN3D_FILTER)  += x86/vf_hqdn3d.o
diff --git a/libavfilter/x86/vf_boxblur.asm b/libavfilter/x86/vf_boxblur.asm
new file mode 100644
index 00..48bd64d8f6
--- /dev/null
+++ b/libavfilter/x86/vf_boxblur.asm
@@ -0,0 +1,575 @@
+;*
+;* x86 AVX2-optimized functions for boxblur 1D row blur
+;*
+;* Copyright (C) 2025 Makar Kuznietsov
+;*
+;* This fi

[FFmpeg-devel] [PATCH] avfilter/boxblur: add AVX2 assembly (PR #20770)

2025-10-27 Thread MakarDev via ffmpeg-devel
PR #20770 opened by MakarDev
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20770
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20770.patch

AVX2 assembly implementation of the boxblur filter. As the boxblur filter has a 
dependency chain over sum, it can't be fully vectorized, but speedup was 
achieved through vectorizing all the other operations in the filter. Also, 
assembly is written only for the "steady-state" middle part of the image, to 
which boxblur is applied.

Benchmarking results
tests/checkasm/checkasm --test=vf_boxblur --bench
AVX2:
 - vf_boxblur.boxblur_blur8  [OK]
 - vf_boxblur.boxblur_blur16 [OK]
checkasm: all 2 tests passed
boxblur_blur8_c:  1396.9 ( 1.00x)
boxblur_blur8_avx2:541.1 ( 2.58x)
boxblur_blur16_c: 1256.0 ( 1.00x)
boxblur_blur16_avx2:   504.2 ( 2.49x)


>From 26e836c1ebf2bfcd3c02f9e7d7a46dd135ee6174 Mon Sep 17 00:00:00 2001
From: MakarDev 
Date: Thu, 16 Oct 2025 22:44:31 -0700
Subject: [PATCH] avfilter/boxblur: add AVX2 assembly

---
 libavfilter/Makefile  |   2 +-
 libavfilter/boxblur.h |   9 ++
 libavfilter/boxblur_dsp.c |  37 ++
 libavfilter/vf_boxblur.c  |  93 ++---
 libavfilter/vf_boxblur_dsp.h  |  46 +++
 libavfilter/x86/Makefile  |   2 +
 libavfilter/x86/vf_boxblur.asm| 213 ++
 libavfilter/x86/vf_boxblur_init.c |  50 +++
 tests/checkasm/Makefile   |   1 +
 tests/checkasm/checkasm.c |   3 +
 tests/checkasm/checkasm.h |   1 +
 tests/checkasm/vf_boxblur.c   | 148 +
 tests/fate/checkasm.mak   |   1 +
 13 files changed, 585 insertions(+), 21 deletions(-)
 create mode 100644 libavfilter/boxblur_dsp.c
 create mode 100644 libavfilter/vf_boxblur_dsp.h
 create mode 100644 libavfilter/x86/vf_boxblur.asm
 create mode 100644 libavfilter/x86/vf_boxblur_init.c
 create mode 100644 tests/checkasm/vf_boxblur.c

diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 69d74183b2..00f956dc19 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -217,7 +217,7 @@ OBJS-$(CONFIG_BLEND_VULKAN_FILTER)   += 
vf_blend_vulkan.o framesync.o vu
 OBJS-$(CONFIG_BLOCKDETECT_FILTER)+= vf_blockdetect.o
 OBJS-$(CONFIG_BLURDETECT_FILTER) += vf_blurdetect.o edge_common.o
 OBJS-$(CONFIG_BM3D_FILTER)   += vf_bm3d.o framesync.o
-OBJS-$(CONFIG_BOXBLUR_FILTER)+= vf_boxblur.o boxblur.o
+OBJS-$(CONFIG_BOXBLUR_FILTER)+= vf_boxblur.o boxblur.o 
boxblur_dsp.o
 OBJS-$(CONFIG_BOXBLUR_OPENCL_FILTER) += vf_avgblur_opencl.o opencl.o \
 opencl/avgblur.o boxblur.o
 OBJS-$(CONFIG_BWDIF_FILTER)  += vf_bwdif.o bwdifdsp.o 
yadif_common.o
diff --git a/libavfilter/boxblur.h b/libavfilter/boxblur.h
index 214d4e0c93..16ca377600 100644
--- a/libavfilter/boxblur.h
+++ b/libavfilter/boxblur.h
@@ -44,4 +44,13 @@ int ff_boxblur_eval_filter_params(AVFilterLink *inlink,
   FilterParam *chroma_param,
   FilterParam *alpha_param);
 
+/* Forward declaration */
+typedef struct FFBoxblurDSPContext FFBoxblurDSPContext;
+
+/* Blur functions - used for testing and internally */
+void ff_boxblur_blur8(uint8_t *dst, int dst_step, const uint8_t *src,
+  int src_step, int len, int radius, FFBoxblurDSPContext 
*dsp);
+void ff_boxblur_blur16(uint16_t *dst, int dst_step, const uint16_t *src,
+   int src_step, int len, int radius, FFBoxblurDSPContext 
*dsp);
+
 #endif // AVFILTER_BOXBLUR_H
diff --git a/libavfilter/boxblur_dsp.c b/libavfilter/boxblur_dsp.c
new file mode 100644
index 00..9633cd1062
--- /dev/null
+++ b/libavfilter/boxblur_dsp.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2025 Makar Kuznietsov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "vf_boxblur_dsp.h"
+
+#if ARCH_X86_64
+void ff_boxblur_dsp_init_x86(FFBoxblurDSPContext *dsp, int depth);
+#endif
+
+av_cold void ff_boxblur_dsp_i