On 3/31/18 10:08 PM Michael Niedermayer wrote:
> doesnt apply anymore
Fixed.
From 2b78da0f9d2e49d3ae5d5c8c751371666ded85f3 Mon Sep 17 00:00:00 2001
From: Gabriel Machado
Date: Sat, 31 Mar 2018 23:03:18 -0300
Subject: [PATCH] avfilter: add OpenCL scale filter
---
configure | 1 +
libavfilter/Makefile | 1 +
libavfilter/allfilters.c | 1 +
libavfilter/opencl/scale.cl | 67 ++
libavfilter/opencl_source.h | 1 +
libavfilter/vf_scale_opencl.c | 544 ++
6 files changed, 615 insertions(+)
create mode 100644 libavfilter/opencl/scale.cl
create mode 100644 libavfilter/vf_scale_opencl.c
diff --git a/configure b/configure
index 08d6fc5..9bd896b 100755
--- a/configure
+++ b/configure
@@ -2877,6 +2877,7 @@ v4l2_m2m_deps_any="linux_videodev2_h"
hwupload_cuda_filter_deps="ffnvcodec"
scale_npp_filter_deps="ffnvcodec libnpp"
+scale_opencl_filter_deps="opencl"
scale_cuda_filter_deps="cuda_sdk"
thumbnail_cuda_filter_deps="cuda_sdk"
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index a90ca30..6303cbd 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -302,6 +302,7 @@ OBJS-$(CONFIG_SAB_FILTER)+= vf_sab.o
OBJS-$(CONFIG_SCALE_FILTER) += vf_scale.o scale.o
OBJS-$(CONFIG_SCALE_CUDA_FILTER) += vf_scale_cuda.o
vf_scale_cuda.ptx.o
OBJS-$(CONFIG_SCALE_NPP_FILTER) += vf_scale_npp.o scale.o
+OBJS-$(CONFIG_SCALE_OPENCL_FILTER) += vf_scale_opencl.o opencl.o
opencl/scale.o
OBJS-$(CONFIG_SCALE_QSV_FILTER) += vf_scale_qsv.o
OBJS-$(CONFIG_SCALE_VAAPI_FILTER)+= vf_scale_vaapi.o scale.o
vaapi_vpp.o
OBJS-$(CONFIG_SCALE2REF_FILTER) += vf_scale.o scale.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 6eac828..3073881 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -293,6 +293,7 @@ extern AVFilter ff_vf_sab;
extern AVFilter ff_vf_scale;
extern AVFilter ff_vf_scale_cuda;
extern AVFilter ff_vf_scale_npp;
+extern AVFilter ff_vf_scale_opencl;
extern AVFilter ff_vf_scale_qsv;
extern AVFilter ff_vf_scale_vaapi;
extern AVFilter ff_vf_scale2ref;
diff --git a/libavfilter/opencl/scale.cl b/libavfilter/opencl/scale.cl
new file mode 100644
index 000..68f7431
--- /dev/null
+++ b/libavfilter/opencl/scale.cl
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Gabriel Machado
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+__kernel void neighbor(__write_only image2d_t dst,
+ __read_only image2d_t src)
+{
+const sampler_t sampler = (CLK_NORMALIZED_COORDS_TRUE |
+ CLK_ADDRESS_CLAMP_TO_EDGE |
+ CLK_FILTER_NEAREST);
+
+int2 dst_pos = {get_global_id(0), get_global_id(1)};
+float2 dst_size = {get_global_size(0), get_global_size(1)};
+
+float2 src_coord = (convert_float2(dst_pos) + 0.5) / dst_size;
+
+float4 c = read_imagef(src, sampler, src_coord);
+write_imagef(dst, dst_pos, c);
+}
+
+__kernel void scale(__write_only image2d_t dst,
+__read_only image2d_t src,
+__constant float*cx,
+__constant float*cy,
+ int2 flt_size)
+{
+const sampler_t s_img = (CLK_NORMALIZED_COORDS_FALSE |
+ CLK_ADDRESS_CLAMP_TO_EDGE |
+ CLK_FILTER_NEAREST);
+
+int2 dst_pos = {get_global_id(0), get_global_id(1)};
+
+float2 dst_size = {get_global_size(0), get_global_size(1)};
+float2 src_size = convert_float2(get_image_dim(src));
+
+float2 src_coord = (convert_float2(dst_pos) + 0.5) * src_size / dst_size;
+
+int2 src_pos = convert_int2(floor(src_coord - 0.5));
+
+float4 col = 0;
+for (int i = 0; i < flt_size.y; ++i) {
+float4 s = 0;
+for (int j = 0; j < flt_size.x; ++j) {
+float4 c = read_imagef(src, s_img, src_pos + (int2){flt_size.x/2 -
j, flt_size.y/2 - i});
+s += c * cx[dst_pos.x * flt_size.x + j];
+}
+col += s * cy[dst_pos.y * flt_size.y + i];
+}
+
+write_imagef(dst, dst_pos, col);
+}
diff