Re: [FFmpeg-devel] [PATCH v5] avfilter: add OpenCL scale filter

2018-04-08 Thread Song, Ruiling
> --- /dev/null
> +++ b/libavfilter/opencl/scale.cl
> @@ -0,0 +1,67 @@
> +/*
> + * Copyright (c) 2018 Gabriel Machado
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
> USA
> + */
> +
> +__kernel void neighbor(__write_only image2d_t dst,
> +   __read_only  image2d_t src)
> +{
> +const sampler_t sampler = (CLK_NORMALIZED_COORDS_TRUE |
> +   CLK_ADDRESS_CLAMP_TO_EDGE |
> +   CLK_FILTER_NEAREST);
> +
> +int2 dst_pos = {get_global_id(0), get_global_id(1)};
> +float2 dst_size = {get_global_size(0), get_global_size(1)};
> +
> +float2 src_coord = (convert_float2(dst_pos) + 0.5) / dst_size;
For the floating point constant, it is better to add suffix 'f' to the number 
to tell the compiler that this is a single floating point value (like 0.5f).
The reason is if no suffix provided, floating point constants will be treated 
as double precision float number.
OpenCL C language derive this feature from C language. And most GPUs are 
performance sensitive to single floating point/ double floating point 
operations.
If you intentionally need double precision, it is ok to leave it as it is.
There are also some other places like this in the OpenCL file.

Thanks!
Ruiling
> +
> +float4 c = read_imagef(src, sampler, src_coord);
> +write_imagef(dst, dst_pos, c);
> +}
> +
> +__kernel void scale(__write_only image2d_t dst,
> +__read_only  image2d_t src,
> +__constant   float*cx,
> +__constant   float*cy,
> + int2  flt_size)
> +{
> +const sampler_t s_img = (CLK_NORMALIZED_COORDS_FALSE |
> + CLK_ADDRESS_CLAMP_TO_EDGE |
> + CLK_FILTER_NEAREST);
> +
> +int2 dst_pos = {get_global_id(0), get_global_id(1)};
> +
> +float2 dst_size = {get_global_size(0), get_global_size(1)};
> +float2 src_size = convert_float2(get_image_dim(src));
> +
> +float2 src_coord = (convert_float2(dst_pos) + 0.5) * src_size / dst_size;
> +
> +int2 src_pos = convert_int2(floor(src_coord - 0.5));
> +
> +float4 col = 0;
> +for (int i = 0; i < flt_size.y; ++i) {
> +float4 s = 0;
> +for (int j = 0; j < flt_size.x; ++j) {
> +float4 c = read_imagef(src, s_img, src_pos + (int2){flt_size.x/2 
> - j,
> flt_size.y/2 - i});
> +s += c * cx[dst_pos.x * flt_size.x + j];
> +}
> +col += s * cy[dst_pos.y * flt_size.y + i];
> +}
> +
> +write_imagef(dst, dst_pos, col);
> +}

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH v5] avfilter: add OpenCL scale filter

2018-03-31 Thread Gabriel Machado
On 3/31/18 10:08 PM Michael Niedermayer wrote:
> doesnt apply anymore

Fixed.

From 2b78da0f9d2e49d3ae5d5c8c751371666ded85f3 Mon Sep 17 00:00:00 2001
From: Gabriel Machado 
Date: Sat, 31 Mar 2018 23:03:18 -0300
Subject: [PATCH] avfilter: add OpenCL scale filter

---
 configure |   1 +
 libavfilter/Makefile  |   1 +
 libavfilter/allfilters.c  |   1 +
 libavfilter/opencl/scale.cl   |  67 ++
 libavfilter/opencl_source.h   |   1 +
 libavfilter/vf_scale_opencl.c | 544 ++
 6 files changed, 615 insertions(+)
 create mode 100644 libavfilter/opencl/scale.cl
 create mode 100644 libavfilter/vf_scale_opencl.c

diff --git a/configure b/configure
index 08d6fc5..9bd896b 100755
--- a/configure
+++ b/configure
@@ -2877,6 +2877,7 @@ v4l2_m2m_deps_any="linux_videodev2_h"
 
 hwupload_cuda_filter_deps="ffnvcodec"
 scale_npp_filter_deps="ffnvcodec libnpp"
+scale_opencl_filter_deps="opencl"
 scale_cuda_filter_deps="cuda_sdk"
 thumbnail_cuda_filter_deps="cuda_sdk"
 
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index a90ca30..6303cbd 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -302,6 +302,7 @@ OBJS-$(CONFIG_SAB_FILTER)+= vf_sab.o
 OBJS-$(CONFIG_SCALE_FILTER)  += vf_scale.o scale.o
 OBJS-$(CONFIG_SCALE_CUDA_FILTER) += vf_scale_cuda.o 
vf_scale_cuda.ptx.o
 OBJS-$(CONFIG_SCALE_NPP_FILTER)  += vf_scale_npp.o scale.o
+OBJS-$(CONFIG_SCALE_OPENCL_FILTER)   += vf_scale_opencl.o opencl.o 
opencl/scale.o
 OBJS-$(CONFIG_SCALE_QSV_FILTER)  += vf_scale_qsv.o
 OBJS-$(CONFIG_SCALE_VAAPI_FILTER)+= vf_scale_vaapi.o scale.o 
vaapi_vpp.o
 OBJS-$(CONFIG_SCALE2REF_FILTER)  += vf_scale.o scale.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 6eac828..3073881 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -293,6 +293,7 @@ extern AVFilter ff_vf_sab;
 extern AVFilter ff_vf_scale;
 extern AVFilter ff_vf_scale_cuda;
 extern AVFilter ff_vf_scale_npp;
+extern AVFilter ff_vf_scale_opencl;
 extern AVFilter ff_vf_scale_qsv;
 extern AVFilter ff_vf_scale_vaapi;
 extern AVFilter ff_vf_scale2ref;
diff --git a/libavfilter/opencl/scale.cl b/libavfilter/opencl/scale.cl
new file mode 100644
index 000..68f7431
--- /dev/null
+++ b/libavfilter/opencl/scale.cl
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Gabriel Machado
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+__kernel void neighbor(__write_only image2d_t dst,
+   __read_only  image2d_t src)
+{
+const sampler_t sampler = (CLK_NORMALIZED_COORDS_TRUE |
+   CLK_ADDRESS_CLAMP_TO_EDGE |
+   CLK_FILTER_NEAREST);
+
+int2 dst_pos = {get_global_id(0), get_global_id(1)};
+float2 dst_size = {get_global_size(0), get_global_size(1)};
+
+float2 src_coord = (convert_float2(dst_pos) + 0.5) / dst_size;
+
+float4 c = read_imagef(src, sampler, src_coord);
+write_imagef(dst, dst_pos, c);
+}
+
+__kernel void scale(__write_only image2d_t dst,
+__read_only  image2d_t src,
+__constant   float*cx,
+__constant   float*cy,
+ int2  flt_size)
+{
+const sampler_t s_img = (CLK_NORMALIZED_COORDS_FALSE |
+ CLK_ADDRESS_CLAMP_TO_EDGE |
+ CLK_FILTER_NEAREST);
+
+int2 dst_pos = {get_global_id(0), get_global_id(1)};
+
+float2 dst_size = {get_global_size(0), get_global_size(1)};
+float2 src_size = convert_float2(get_image_dim(src));
+
+float2 src_coord = (convert_float2(dst_pos) + 0.5) * src_size / dst_size;
+
+int2 src_pos = convert_int2(floor(src_coord - 0.5));
+
+float4 col = 0;
+for (int i = 0; i < flt_size.y; ++i) {
+float4 s = 0;
+for (int j = 0; j < flt_size.x; ++j) {
+float4 c = read_imagef(src, s_img, src_pos + (int2){flt_size.x/2 - 
j, flt_size.y/2 - i});
+s += c * cx[dst_pos.x * flt_size.x + j];
+}
+col += s * cy[dst_pos.y * flt_size.y + i];
+}
+
+write_imagef(dst, dst_pos, col);
+}
diff