This patch is for the support of derain filter project in GSoC. It adds 
supports for the following operations: 




 (1) Conv padding method: "SAME" and "VALID"

 (2) Dilation

 (3) Activation: "NONE" and "LEAKY_RELU"




These operations are all needed in derain filter. And if modify the dnn native 
mode in FFmpeg, the generation process of Super Resolution model should be 
changed accordingly, e.g. add padding method parameter (= 0) and dilation 
parameter (= 1).




In addition, I have a question about the Super Resulotion implementation. The 
model training process of SR uses "VALID" method. According to my understanding 
of "VALID" mode in tensorflow, the size of output image should be smaller than 
the current design in SR. Because pixels near the boundary are not processed in 
"VALID" mode, however, these unprocessed pixels are filled with adjacent pixels 
in current dnn native mode. I wonder why to do like this here.




From 4d92ef21a5acf064122c51f442d0e2f5437b3343 Mon Sep 17 00:00:00 2001
From: Xuewei Meng <xwm...@pku.edu.cn>
Date: Sun, 28 Apr 2019 17:21:35 +0800
Subject: [PATCH] Add operation supports in dnn_native

Signed-off-by: Xuewei Meng <xwm...@pku.edu.cn>
---
 libavfilter/dnn_backend_native.c | 36 +++++++++++++++++++++-----------
 libavfilter/dnn_backend_native.h |  6 +++++-
 2 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/libavfilter/dnn_backend_native.c b/libavfilter/dnn_backend_native.c
index 70d857f5f2..0e3ef5d64d 100644
--- a/libavfilter/dnn_backend_native.c
+++ b/libavfilter/dnn_backend_native.c
@@ -157,13 +157,15 @@ DNNModel *ff_dnn_load_model_native(const char 
*model_filename)
                 ff_dnn_free_model_native(&model);
                 return NULL;
             }
+            conv_params->dilation = (int32_t)avio_rl32(model_file_context);
+            conv_params->padding_method = 
(int32_t)avio_rl32(model_file_context);
             conv_params->activation = (int32_t)avio_rl32(model_file_context);
             conv_params->input_num = (int32_t)avio_rl32(model_file_context);
             conv_params->output_num = (int32_t)avio_rl32(model_file_context);
             conv_params->kernel_size = (int32_t)avio_rl32(model_file_context);
             kernel_size = conv_params->input_num * conv_params->output_num *
                           conv_params->kernel_size * conv_params->kernel_size;
-            dnn_size += 16 + (kernel_size + conv_params->output_num << 2);
+            dnn_size += 24 + (kernel_size + conv_params->output_num << 2);
             if (dnn_size > file_size || conv_params->input_num <= 0 ||
                 conv_params->output_num <= 0 || conv_params->kernel_size <= 0){
                 avio_closep(&model_file_context);
@@ -221,23 +223,28 @@ DNNModel *ff_dnn_load_model_native(const char 
*model_filename)
 
 static void convolve(const float *input, float *output, const 
ConvolutionalParams *conv_params, int width, int height)
 {
-    int y, x, n_filter, ch, kernel_y, kernel_x;
     int radius = conv_params->kernel_size >> 1;
     int src_linesize = width * conv_params->input_num;
     int filter_linesize = conv_params->kernel_size * conv_params->input_num;
     int filter_size = conv_params->kernel_size * filter_linesize;
+    int pad_size = (conv_params->padding_method == VALID) ? 
(conv_params->kernel_size - 1) / 2 * conv_params->dilation : 0;
 
-    for (y = 0; y < height; ++y){
-        for (x = 0; x < width; ++x){
-            for (n_filter = 0; n_filter < conv_params->output_num; ++n_filter){
+    for (int y = pad_size; y < height - pad_size; ++y){
+        for (int x = pad_size; x < width - pad_size; ++x){
+            for (int n_filter = 0; n_filter < conv_params->output_num; 
++n_filter){
                 output[n_filter] = conv_params->biases[n_filter];
-                for (ch = 0; ch < conv_params->input_num; ++ch){
-                    for (kernel_y = 0; kernel_y < conv_params->kernel_size; 
++kernel_y){
-                        for (kernel_x = 0; kernel_x < 
conv_params->kernel_size; ++kernel_x){
-                            output[n_filter] += input[CLAMP_TO_EDGE(y + 
kernel_y - radius, height) * src_linesize +
-                                                      CLAMP_TO_EDGE(x + 
kernel_x - radius, width) * conv_params->input_num + ch] *
-                                                conv_params->kernel[n_filter * 
filter_size + kernel_y * filter_linesize +
-                                                                    kernel_x * 
conv_params->input_num + ch];
+
+                for (int ch = 0; ch < conv_params->input_num; ++ch){
+                    for (int kernel_y = 0; kernel_y < 
conv_params->kernel_size; ++kernel_y){
+                        for (int kernel_x = 0; kernel_x < 
conv_params->kernel_size; ++kernel_x){
+                            int y_pos = y + (kernel_y - radius) * 
conv_params->dilation;
+                            int x_pos = x + (kernel_x - radius) * 
conv_params->dilation;
+
+                            float input_pel = (x_pos < 0 || x_pos >= width || 
y_pos < 0 || y_pos >= height) ? 0.0 : 
+                                               input[y_pos * src_linesize + 
x_pos * conv_params->input_num + ch];
+
+                            output[n_filter] += input_pel * 
conv_params->kernel[n_filter * filter_size + kernel_y * filter_linesize +
+                                                                               
 kernel_x * conv_params->input_num + ch];
                         }
                     }
                 }
@@ -250,6 +257,11 @@ static void convolve(const float *input, float *output, 
const ConvolutionalParam
                     break;
                 case SIGMOID:
                     output[n_filter] = 1.0f / (1.0f + exp(-output[n_filter]));
+                    break;
+                case NONE:
+                    break;
+                case LEAKY_RELU:
+                    output[n_filter] = FFMAX(output[n_filter], 0.0) + 0.2 * 
FFMIN(output[n_filter], 0.0);
                 }
             }
             output += conv_params->output_num;
diff --git a/libavfilter/dnn_backend_native.h b/libavfilter/dnn_backend_native.h
index 51d4cac955..f7d4eb823b 100644
--- a/libavfilter/dnn_backend_native.h
+++ b/libavfilter/dnn_backend_native.h
@@ -32,7 +32,9 @@
 
 typedef enum {INPUT, CONV, DEPTH_TO_SPACE} DNNLayerType;
 
-typedef enum {RELU, TANH, SIGMOID} DNNActivationFunc;
+typedef enum {RELU, TANH, SIGMOID, NONE, LEAKY_RELU} DNNActivationFunc;
+
+typedef enum {VALID, SAME} DNNPaddingFunc;
 
 typedef struct Layer{
     DNNLayerType type;
@@ -43,6 +45,8 @@ typedef struct Layer{
 typedef struct ConvolutionalParams{
     int32_t input_num, output_num, kernel_size;
     DNNActivationFunc activation;
+    DNNPaddingFunc padding_method;
+    int32_t dilation;
     float *kernel;
     float *biases;
 } ConvolutionalParams;
-- 
2.17.1



 








_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Reply via email to