Re: [FFmpeg-devel] [PATCH V2 1/4] dnn: add tf.nn.conv2d support for native model

2019-10-30 Thread Pedro Arthur
Em seg, 21 de out de 2019 às 09:44, Guo, Yejun 
escreveu:

> Unlike other tf.*.conv2d layers, tf.nn.conv2d does not create many
> nodes (within a scope) in the graph, it just acts like other layers.
> tf.nn.conv2d only creates one node in the graph, and no internal
> nodes such as 'kernel' are created.
>
> The format of native model file is also changed, a flag named
> has_bias is added, so change the version number.
>
> Signed-off-by: Guo, Yejun 
> ---
>  libavfilter/dnn/dnn_backend_native.c  |  2 +-
>  libavfilter/dnn/dnn_backend_native_layer_conv2d.c | 37 +++-
>  libavfilter/dnn/dnn_backend_native_layer_conv2d.h |  1 +
>  tests/dnn/dnn-layer-conv2d-test.c |  2 +
>  tools/python/convert_from_tensorflow.py   | 54
> ---
>  tools/python/convert_header.py|  4 +-
>  6 files changed, 82 insertions(+), 18 deletions(-)
>
> diff --git a/libavfilter/dnn/dnn_backend_native.c
> b/libavfilter/dnn/dnn_backend_native.c
> index 06b010d..ff280b5 100644
> --- a/libavfilter/dnn/dnn_backend_native.c
> +++ b/libavfilter/dnn/dnn_backend_native.c
> @@ -98,7 +98,7 @@ DNNModel *ff_dnn_load_model_native(const char
> *model_filename)
>  char header_expected[] = "FFMPEGDNNNATIVE";
>  char *buf;
>  size_t size;
> -int version, header_size, major_version_expected = 0;
> +int version, header_size, major_version_expected = 1;
>  ConvolutionalNetwork *network = NULL;
>  AVIOContext *model_file_context;
>  int file_size, dnn_size, parsed_size;
> diff --git a/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
> b/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
> index 0de8902..6ec0fa7 100644
> --- a/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
> +++ b/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
> @@ -38,27 +38,41 @@ int dnn_load_layer_conv2d(Layer *layer, AVIOContext
> *model_file_context, int fil
>  conv_params->input_num = (int32_t)avio_rl32(model_file_context);
>  conv_params->output_num = (int32_t)avio_rl32(model_file_context);
>  conv_params->kernel_size = (int32_t)avio_rl32(model_file_context);
> +conv_params->has_bias = (int32_t)avio_rl32(model_file_context);
> +dnn_size += 28;
> +
>  kernel_size = conv_params->input_num * conv_params->output_num *
> -  conv_params->kernel_size * conv_params->kernel_size;
> -dnn_size += 24 + (kernel_size + conv_params->output_num << 2);
> +  conv_params->kernel_size * conv_params->kernel_size;
> +dnn_size += kernel_size * 4;
> +if (conv_params->has_bias)
> +dnn_size += conv_params->output_num * 4;
> +
>  if (dnn_size > file_size || conv_params->input_num <= 0 ||
>  conv_params->output_num <= 0 || conv_params->kernel_size <= 0){
>  av_freep(_params);
>  return 0;
>  }
> +
>  conv_params->kernel = av_malloc(kernel_size * sizeof(float));
> -conv_params->biases = av_malloc(conv_params->output_num *
> sizeof(float));
> -if (!conv_params->kernel || !conv_params->biases){
> -av_freep(_params->kernel);
> -av_freep(_params->biases);
> +if (!conv_params->kernel) {
>  av_freep(_params);
>  return 0;
>  }
> -for (int i = 0; i < kernel_size; ++i){
> +for (int i = 0; i < kernel_size; ++i) {
>  conv_params->kernel[i] =
> av_int2float(avio_rl32(model_file_context));
>  }
> -for (int i = 0; i < conv_params->output_num; ++i){
> -conv_params->biases[i] =
> av_int2float(avio_rl32(model_file_context));
> +
> +conv_params->biases = NULL;
> +if (conv_params->has_bias) {
> +conv_params->biases = av_malloc(conv_params->output_num *
> sizeof(float));
> +if (!conv_params->biases){
> +av_freep(_params->kernel);
> +av_freep(_params);
> +return 0;
> +}
> +for (int i = 0; i < conv_params->output_num; ++i){
> +conv_params->biases[i] =
> av_int2float(avio_rl32(model_file_context));
> +}
>  }
>
>  layer->params = conv_params;
> @@ -103,7 +117,10 @@ int dnn_execute_layer_conv2d(DnnOperand *operands,
> const int32_t *input_operand_
>  for (int y = pad_size; y < height - pad_size; ++y) {
>  for (int x = pad_size; x < width - pad_size; ++x) {
>  for (int n_filter = 0; n_filter < conv_params->output_num;
> ++n_filter) {
> -output[n_filter] = conv_params->biases[n_filter];
> +if (conv_params->has_bias)
> +output[n_filter] = conv_params->biases[n_filter];
> +else
> +output[n_filter] = 0.f;
>
>  for (int ch = 0; ch < conv_params->input_num; ++ch) {
>  for (int kernel_y = 0; kernel_y <
> conv_params->kernel_size; ++kernel_y) {
> diff --git a/libavfilter/dnn/dnn_backend_native_layer_conv2d.h
> b/libavfilter/dnn/dnn_backend_native_layer_conv2d.h
> index db90b2b..bf87264 

[FFmpeg-devel] [PATCH V2 1/4] dnn: add tf.nn.conv2d support for native model

2019-10-21 Thread Guo, Yejun
Unlike other tf.*.conv2d layers, tf.nn.conv2d does not create many
nodes (within a scope) in the graph, it just acts like other layers.
tf.nn.conv2d only creates one node in the graph, and no internal
nodes such as 'kernel' are created.

The format of native model file is also changed, a flag named
has_bias is added, so change the version number.

Signed-off-by: Guo, Yejun 
---
 libavfilter/dnn/dnn_backend_native.c  |  2 +-
 libavfilter/dnn/dnn_backend_native_layer_conv2d.c | 37 +++-
 libavfilter/dnn/dnn_backend_native_layer_conv2d.h |  1 +
 tests/dnn/dnn-layer-conv2d-test.c |  2 +
 tools/python/convert_from_tensorflow.py   | 54 ---
 tools/python/convert_header.py|  4 +-
 6 files changed, 82 insertions(+), 18 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_native.c 
b/libavfilter/dnn/dnn_backend_native.c
index 06b010d..ff280b5 100644
--- a/libavfilter/dnn/dnn_backend_native.c
+++ b/libavfilter/dnn/dnn_backend_native.c
@@ -98,7 +98,7 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename)
 char header_expected[] = "FFMPEGDNNNATIVE";
 char *buf;
 size_t size;
-int version, header_size, major_version_expected = 0;
+int version, header_size, major_version_expected = 1;
 ConvolutionalNetwork *network = NULL;
 AVIOContext *model_file_context;
 int file_size, dnn_size, parsed_size;
diff --git a/libavfilter/dnn/dnn_backend_native_layer_conv2d.c 
b/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
index 0de8902..6ec0fa7 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
+++ b/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
@@ -38,27 +38,41 @@ int dnn_load_layer_conv2d(Layer *layer, AVIOContext 
*model_file_context, int fil
 conv_params->input_num = (int32_t)avio_rl32(model_file_context);
 conv_params->output_num = (int32_t)avio_rl32(model_file_context);
 conv_params->kernel_size = (int32_t)avio_rl32(model_file_context);
+conv_params->has_bias = (int32_t)avio_rl32(model_file_context);
+dnn_size += 28;
+
 kernel_size = conv_params->input_num * conv_params->output_num *
-  conv_params->kernel_size * conv_params->kernel_size;
-dnn_size += 24 + (kernel_size + conv_params->output_num << 2);
+  conv_params->kernel_size * conv_params->kernel_size;
+dnn_size += kernel_size * 4;
+if (conv_params->has_bias)
+dnn_size += conv_params->output_num * 4;
+
 if (dnn_size > file_size || conv_params->input_num <= 0 ||
 conv_params->output_num <= 0 || conv_params->kernel_size <= 0){
 av_freep(_params);
 return 0;
 }
+
 conv_params->kernel = av_malloc(kernel_size * sizeof(float));
-conv_params->biases = av_malloc(conv_params->output_num * sizeof(float));
-if (!conv_params->kernel || !conv_params->biases){
-av_freep(_params->kernel);
-av_freep(_params->biases);
+if (!conv_params->kernel) {
 av_freep(_params);
 return 0;
 }
-for (int i = 0; i < kernel_size; ++i){
+for (int i = 0; i < kernel_size; ++i) {
 conv_params->kernel[i] = av_int2float(avio_rl32(model_file_context));
 }
-for (int i = 0; i < conv_params->output_num; ++i){
-conv_params->biases[i] = av_int2float(avio_rl32(model_file_context));
+
+conv_params->biases = NULL;
+if (conv_params->has_bias) {
+conv_params->biases = av_malloc(conv_params->output_num * 
sizeof(float));
+if (!conv_params->biases){
+av_freep(_params->kernel);
+av_freep(_params);
+return 0;
+}
+for (int i = 0; i < conv_params->output_num; ++i){
+conv_params->biases[i] = 
av_int2float(avio_rl32(model_file_context));
+}
 }
 
 layer->params = conv_params;
@@ -103,7 +117,10 @@ int dnn_execute_layer_conv2d(DnnOperand *operands, const 
int32_t *input_operand_
 for (int y = pad_size; y < height - pad_size; ++y) {
 for (int x = pad_size; x < width - pad_size; ++x) {
 for (int n_filter = 0; n_filter < conv_params->output_num; 
++n_filter) {
-output[n_filter] = conv_params->biases[n_filter];
+if (conv_params->has_bias)
+output[n_filter] = conv_params->biases[n_filter];
+else
+output[n_filter] = 0.f;
 
 for (int ch = 0; ch < conv_params->input_num; ++ch) {
 for (int kernel_y = 0; kernel_y < 
conv_params->kernel_size; ++kernel_y) {
diff --git a/libavfilter/dnn/dnn_backend_native_layer_conv2d.h 
b/libavfilter/dnn/dnn_backend_native_layer_conv2d.h
index db90b2b..bf87264 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_conv2d.h
+++ b/libavfilter/dnn/dnn_backend_native_layer_conv2d.h
@@ -31,6 +31,7 @@ typedef struct ConvolutionalParams{
 DNNActivationFunc activation;
 DNNConvPaddingParam padding_method;
 int32_t dilation;
+