[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-06-06 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r193410022
 
 

 ##
 File path: tests/python/quantization/test_quantization.py
 ##
 @@ -120,114 +128,126 @@ def check_requantize(shape, min_calib_range=None, 
max_calib_range=None):
 
 @with_seed()
 def test_quantized_conv():
-if mx.current_context().device_type != 'gpu':
-print('skipped testing quantized_conv on cpu since it is not 
implemented yet')
+if is_test_for_naive_cpu():
+print('skipped testing quantized_conv for naive cpu since it is not 
implemented yet')
 return
 
-def check_quantized_conv(data_shape, kernel, num_filter, pad, stride, 
no_bias):
-with mx.Context('gpu', 0):
-# run fp32 conv
-data = mx.sym.Variable(name='data', shape=data_shape, 
dtype='float32')
-conv2d = mx.sym.Convolution(data=data, kernel=kernel, 
num_filter=num_filter, pad=pad, stride=stride,
-no_bias=no_bias, cudnn_off=False, 
name='conv2d')
-arg_shapes, _, _ = conv2d.infer_shape(data=data_shape)
-arg_names = conv2d.list_arguments()
-conv_exe_fp32 = conv2d.simple_bind(ctx=mx.current_context(), 
grad_req='null')
-conv_exe_fp32.arg_dict[arg_names[0]][:] = 
mx.nd.random.uniform(low=-127.0, high=127.0,
-   
shape=data_shape).astype('int32')
-conv_exe_fp32.arg_dict[arg_names[1]][:] = 
mx.nd.random.uniform(low=-127.0, high=127.0,
-   
shape=arg_shapes[1]).astype('int32')
-if not no_bias:
-conv_exe_fp32.arg_dict[arg_names[2]][:] = 
mx.nd.random.uniform(low=-127.0, high=127.0,
-   
shape=arg_shapes[2]).astype('int32')
-output = conv_exe_fp32.forward()[0]
-
-# run quantized conv
-qdata = mx.sym.Variable(name='qdata', shape=data_shape, 
dtype='int8')
-qweight = mx.sym.Variable(name='qweight', dtype='int8')
-min_data = mx.sym.Variable(name='min_data')
-max_data = mx.sym.Variable(name='max_data')
-min_weight = mx.sym.Variable(name='min_weight')
-max_weight = mx.sym.Variable(name='max_weight')
-quantized_conv2d = mx.sym.contrib.quantized_conv(data=qdata, 
weight=qweight, min_data=min_data,
- 
max_data=max_data, min_weight=min_weight,
- 
max_weight=max_weight, kernel=kernel,
- 
num_filter=num_filter, pad=pad, stride=stride,
- no_bias=no_bias)
-qarg_names = quantized_conv2d.list_arguments()
-type_dict = None
-if not no_bias:
-type_dict = {qarg_names[2]: 'int8'}
-conv_exe_int8 = 
quantized_conv2d.simple_bind(ctx=mx.current_context(), type_dict=type_dict, 
grad_req='null')
-conv_exe_int8.arg_dict[qarg_names[0]][:] = 
conv_exe_fp32.arg_dict[arg_names[0]].astype('int8')
-conv_exe_int8.arg_dict[qarg_names[1]][:] = 
conv_exe_fp32.arg_dict[arg_names[1]].astype('int8')
-quantized_range = 127.0
-if no_bias:
-conv_exe_int8.arg_dict[qarg_names[2]][:] = -quantized_range
-conv_exe_int8.arg_dict[qarg_names[3]][:] = quantized_range
-conv_exe_int8.arg_dict[qarg_names[4]][:] = -quantized_range
-conv_exe_int8.arg_dict[qarg_names[5]][:] = quantized_range
-else:
-conv_exe_int8.arg_dict[qarg_names[2]][:] = 
conv_exe_fp32.arg_dict[arg_names[2]].astype('int8')
-conv_exe_int8.arg_dict[qarg_names[3]][:] = -quantized_range
-conv_exe_int8.arg_dict[qarg_names[4]][:] = quantized_range
-conv_exe_int8.arg_dict[qarg_names[5]][:] = -quantized_range
-conv_exe_int8.arg_dict[qarg_names[6]][:] = quantized_range
-conv_exe_int8.arg_dict[qarg_names[7]][:] = -quantized_range
-conv_exe_int8.arg_dict[qarg_names[8]][:] = quantized_range
-qoutput, min_range, max_range = conv_exe_int8.forward()
+if is_test_for_mkldnn():
+dtype = 'uint8'
 
 Review comment:
   Currently MKLDNN quantization only support uint8 as input data so we made 
separate logic for mkldnn.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, 

[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-06-06 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r193411184
 
 

 ##
 File path: tests/python/quantization/test_quantization.py
 ##
 @@ -409,11 +441,19 @@ def test_quantize_sym_with_calib():
 sym = get_fp32_sym()
 offline_params = [name for name in sym.list_arguments()
   if not name.startswith('data') and not 
name.endswith('label')]
-qsym = mx.contrib.quant._quantize_symbol(sym, 
offline_params=offline_params)
-requantize_op_names = ['requantize_conv', 'requantize_fc']
-th_dict = {'conv_output': (np.random.uniform(low=100.0, high=200.0), 
np.random.uniform(low=100.0, high=200.0)),
-   'fc_output': (np.random.uniform(low=100.0, high=200.0), 
np.random.uniform(low=100.0, high=200.0))}
-op_name_to_th_name = {'requantize_conv': 'conv_output', 'requantize_fc': 
'fc_output'}
+if is_test_for_mkldnn():
+dtype = 'uint8'
+requantize_op_names = ['requantize_conv']
+th_dict = {'conv_output': (np.random.uniform(low=100.0, high=200.0), 
np.random.uniform(low=100.0, high=200.0))}
+op_name_to_th_name = {'requantize_conv': 'conv_output'}
+else:
+dtype = 'int8'
+requantize_op_names = ['requantize_conv', 'requantize_fc']
+th_dict = {'conv_output': (np.random.uniform(low=100.0, high=200.0), 
np.random.uniform(low=100.0, high=200.0)),
+   'fc_output': (np.random.uniform(low=100.0, high=200.0), 
np.random.uniform(low=100.0, high=200.0))}
+op_name_to_th_name = {'requantize_conv': 'conv_output', 
'requantize_fc': 'fc_output'}
 
 Review comment:
   Currently mkldnn doesn't support int8 for fc layer so we made separate logic 
for mkldnn to reflect real MKLDNN use case, once it is supported we can use the 
same logic.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-06-06 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r193410215
 
 

 ##
 File path: tests/python/quantization/test_quantization.py
 ##
 @@ -319,6 +339,11 @@ def check_quantized_flatten(shape):
 
 @with_seed()
 def test_quantize_params():
+if is_test_for_mkldnn():
+dtype = 'uint8'
 
 Review comment:
   Currently MKLDNN quantization only support uint8 as input data so we made 
separate logic for mkldnn.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-06-06 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r193410323
 
 

 ##
 File path: tests/python/quantization/test_quantization.py
 ##
 @@ -409,11 +441,19 @@ def test_quantize_sym_with_calib():
 sym = get_fp32_sym()
 offline_params = [name for name in sym.list_arguments()
   if not name.startswith('data') and not 
name.endswith('label')]
-qsym = mx.contrib.quant._quantize_symbol(sym, 
offline_params=offline_params)
-requantize_op_names = ['requantize_conv', 'requantize_fc']
-th_dict = {'conv_output': (np.random.uniform(low=100.0, high=200.0), 
np.random.uniform(low=100.0, high=200.0)),
-   'fc_output': (np.random.uniform(low=100.0, high=200.0), 
np.random.uniform(low=100.0, high=200.0))}
-op_name_to_th_name = {'requantize_conv': 'conv_output', 'requantize_fc': 
'fc_output'}
+if is_test_for_mkldnn():
+dtype = 'uint8'
 
 Review comment:
   Currently MKLDNN quantization only support uint8 as input data so we made 
separate logic for mkldnn.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-06-06 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r193410255
 
 

 ##
 File path: tests/python/quantization/test_quantization.py
 ##
 @@ -353,6 +378,11 @@ def get_fp32_sym():
 
 @with_seed()
 def test_quantize_model():
+if is_test_for_mkldnn():
+dtype = 'uint8'
 
 Review comment:
   Currently MKLDNN quantization only support uint8 as input data so we made 
separate logic for mkldnn.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-06-06 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r193409350
 
 

 ##
 File path: tests/python/mkl/test_quantization_mkldnn.py
 ##
 @@ -0,0 +1,28 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import os
+import sys
+import mxnet as mx
+
+os.environ['USE_MKLDNN'] = '1'
 
 Review comment:
   @marcoabreu  Thanks for helping review. Agree that ideally we should have an 
API (from C level and python level) to differentiate various backend, we can 
revisit this after we have such API. And I will rename it to 
ENABLE_MKLDNN_QUANTIZATION_TEST for now.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-06-06 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r193410162
 
 

 ##
 File path: tests/python/quantization/test_quantization.py
 ##
 @@ -120,114 +128,126 @@ def check_requantize(shape, min_calib_range=None, 
max_calib_range=None):
 
 @with_seed()
 def test_quantized_conv():
-if mx.current_context().device_type != 'gpu':
-print('skipped testing quantized_conv on cpu since it is not 
implemented yet')
+if is_test_for_naive_cpu():
+print('skipped testing quantized_conv for naive cpu since it is not 
implemented yet')
 return
 
-def check_quantized_conv(data_shape, kernel, num_filter, pad, stride, 
no_bias):
-with mx.Context('gpu', 0):
-# run fp32 conv
-data = mx.sym.Variable(name='data', shape=data_shape, 
dtype='float32')
-conv2d = mx.sym.Convolution(data=data, kernel=kernel, 
num_filter=num_filter, pad=pad, stride=stride,
-no_bias=no_bias, cudnn_off=False, 
name='conv2d')
-arg_shapes, _, _ = conv2d.infer_shape(data=data_shape)
-arg_names = conv2d.list_arguments()
-conv_exe_fp32 = conv2d.simple_bind(ctx=mx.current_context(), 
grad_req='null')
-conv_exe_fp32.arg_dict[arg_names[0]][:] = 
mx.nd.random.uniform(low=-127.0, high=127.0,
-   
shape=data_shape).astype('int32')
-conv_exe_fp32.arg_dict[arg_names[1]][:] = 
mx.nd.random.uniform(low=-127.0, high=127.0,
-   
shape=arg_shapes[1]).astype('int32')
-if not no_bias:
-conv_exe_fp32.arg_dict[arg_names[2]][:] = 
mx.nd.random.uniform(low=-127.0, high=127.0,
-   
shape=arg_shapes[2]).astype('int32')
-output = conv_exe_fp32.forward()[0]
-
-# run quantized conv
-qdata = mx.sym.Variable(name='qdata', shape=data_shape, 
dtype='int8')
-qweight = mx.sym.Variable(name='qweight', dtype='int8')
-min_data = mx.sym.Variable(name='min_data')
-max_data = mx.sym.Variable(name='max_data')
-min_weight = mx.sym.Variable(name='min_weight')
-max_weight = mx.sym.Variable(name='max_weight')
-quantized_conv2d = mx.sym.contrib.quantized_conv(data=qdata, 
weight=qweight, min_data=min_data,
- 
max_data=max_data, min_weight=min_weight,
- 
max_weight=max_weight, kernel=kernel,
- 
num_filter=num_filter, pad=pad, stride=stride,
- no_bias=no_bias)
-qarg_names = quantized_conv2d.list_arguments()
-type_dict = None
-if not no_bias:
-type_dict = {qarg_names[2]: 'int8'}
-conv_exe_int8 = 
quantized_conv2d.simple_bind(ctx=mx.current_context(), type_dict=type_dict, 
grad_req='null')
-conv_exe_int8.arg_dict[qarg_names[0]][:] = 
conv_exe_fp32.arg_dict[arg_names[0]].astype('int8')
-conv_exe_int8.arg_dict[qarg_names[1]][:] = 
conv_exe_fp32.arg_dict[arg_names[1]].astype('int8')
-quantized_range = 127.0
-if no_bias:
-conv_exe_int8.arg_dict[qarg_names[2]][:] = -quantized_range
-conv_exe_int8.arg_dict[qarg_names[3]][:] = quantized_range
-conv_exe_int8.arg_dict[qarg_names[4]][:] = -quantized_range
-conv_exe_int8.arg_dict[qarg_names[5]][:] = quantized_range
-else:
-conv_exe_int8.arg_dict[qarg_names[2]][:] = 
conv_exe_fp32.arg_dict[arg_names[2]].astype('int8')
-conv_exe_int8.arg_dict[qarg_names[3]][:] = -quantized_range
-conv_exe_int8.arg_dict[qarg_names[4]][:] = quantized_range
-conv_exe_int8.arg_dict[qarg_names[5]][:] = -quantized_range
-conv_exe_int8.arg_dict[qarg_names[6]][:] = quantized_range
-conv_exe_int8.arg_dict[qarg_names[7]][:] = -quantized_range
-conv_exe_int8.arg_dict[qarg_names[8]][:] = quantized_range
-qoutput, min_range, max_range = conv_exe_int8.forward()
+if is_test_for_mkldnn():
+dtype = 'uint8'
+shift = 127
+else:
+dtype = 'int8'
+shift = 0
 
-if no_bias:
-assert_almost_equal(output.asnumpy(), qoutput.asnumpy())
-else:
-# with adding bias, accuracy loss should not be greater than 
one
-diff = mx.nd.abs(output - qoutput.astype(output.dtype))
-cond = 

[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-05-30 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r191751428
 
 

 ##
 File path: ci/docker/runtime_functions.sh
 ##
 @@ -382,7 +381,6 @@ unittest_ubuntu_python3_cpu() {
 #export MXNET_MKLDNN_DEBUG=1  # Ignored if not present
 export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0
 nosetests-3.4 --verbose tests/python/unittest
-nosetests-3.4 --verbose tests/python/quantization
 
 Review comment:
   @zhang-da @reminisce The difficulty is we need a way to separate the naive 
CPU quantization and MKLDNN quantization test if we want to share the same 
test_quantization.py (it's our aim to share code as @reminisce mentioned) since 
they are both use CPU context, I figured out a way to set a environment 
variable USE_MKLDNN in test_quantization_mkldnn.py and check this in 
test_quantization.py, for naive CPU path USE_MKLDNN will not be set so will go 
to naive path. We have added quantization test for CPU path back in new diff. 
Please help to review and check if there is any better way to do this. Thanks.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-05-30 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r191752075
 
 

 ##
 File path: ci/docker/runtime_functions.sh
 ##
 @@ -381,13 +381,12 @@ unittest_ubuntu_python3_cpu() {
 
 unittest_ubuntu_python3_cpu_mkldnn() {
 set -ex
-export PYTHONPATH=./python/ 
+export PYTHONPATH=./python/
 # MXNET_MKLDNN_DEBUG is buggy and produces false positives
 # https://github.com/apache/incubator-mxnet/issues/10026
 #export MXNET_MKLDNN_DEBUG=1  # Ignored if not present
 export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0
 nosetests-3.4 --verbose tests/python/unittest
-nosetests-3.4 --verbose tests/python/quantization
 
 Review comment:
   One note for removing "nosetests-3.4 --verbose tests/python/quantization" 
for unittest_ubuntu_python3_cpu_mkldnn: this is due to we used 
tests/python/mkl/test_quantization_mkldnn.py (will be tested when running 
"nosetests-3.4 --verbose tests/python/mkl") to test mkldnn quantization so 
could remove tests/python/quantization path.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-05-24 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r190770998
 
 

 ##
 File path: src/operator/quantization/quantize_graph_pass.cc
 ##
 @@ -198,12 +199,11 @@ Graph QuantizeGraph(Graph &) {
 NodePtr mirror_node = mirror_map.at(e.node.get());
 NodeEntry mirror_entry = NodeEntry{
   mirror_node, e.index, e.version};
-size_t num_outputs = e.node->num_outputs();
-uint32_t min_index = num_outputs + 2 * e.index;
-uint32_t max_index = num_outputs + 2 * e.index + 1;
-
 // if input node is quantized operator, add dequantize node
 if (NeedQuantize(e.node, excluded_nodes)) {
+  size_t num_outputs = mirror_node->num_outputs() - 2;
 
 Review comment:
   Added a comment for the assumption


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-05-24 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r190770962
 
 

 ##
 File path: src/operator/quantization/quantize_graph_pass.cc
 ##
 @@ -159,7 +159,7 @@ Graph QuantizeGraph(Graph &) {
 uint32_t min_index = 1;
 uint32_t max_index = 2;
 if (quantized_op_map.count(e.node->op())) {
-  size_t  num_outputs = e.node->num_outputs();
+  size_t  num_outputs = mirror_node->num_outputs() - 2;
 
 Review comment:
   Added a comment for the assumption


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-05-24 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r190491572
 
 

 ##
 File path: src/operator/quantization/dequantize.cc
 ##
 @@ -50,7 +70,12 @@ by keep zero centered for the quantized value:
 .set_num_outputs(1)
 .set_attr("FInferShape", DequantizeShape)
 .set_attr("FInferType", DequantizeType)
+.set_attr("FInferStorageType", DequantizeStorageType)
+#if MXNET_USE_MKLDNN == 1
+.set_attr("FComputeEx", MKLDNNDequantizeCompute)
+#else
 
 Review comment:
   else is not needed, will change.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-05-24 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r190491349
 
 

 ##
 File path: src/operator/quantization/quantize.cc
 ##
 @@ -61,7 +81,12 @@ where
   })
 .set_attr("FInferShape", QuantizeShape)
 .set_attr("FInferType", QuantizeType)
+.set_attr("FInferStorageType", QuantizeStorageType)
+#if MXNET_USE_MKLDNN == 1
+.set_attr("FComputeEx", MKLDNNQuantizeCompute)
+#else
 
 Review comment:
   else is not needed, will change.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-05-24 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r190488717
 
 

 ##
 File path: src/operator/quantization/quantized_pooling.cc
 ##
 @@ -78,19 +81,39 @@ bool QuantizedPoolingType(const nnvm::NodeAttrs& attrs,
   const PoolingParam& param = nnvm::get(attrs.parsed);
   CHECK_EQ(in_type->size(), 3U);
   CHECK_EQ(out_type->size(), 3U);
-  if (param.pool_type == pool_enum::kMaxPooling || param.pool_type == 
pool_enum::kAvgPooling) {
 
 Review comment:
   Will add MXNET_USE_MKLDNN check and only bypass the check for MKLDNN.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-05-24 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r190488062
 
 

 ##
 File path: src/operator/quantization/quantized_flatten-inl.h
 ##
 @@ -62,11 +62,21 @@ void QuantizedFlattenCompute(const nnvm::NodeAttrs& attrs,
   using namespace mxnet_op;
   Stream *s = ctx.get_stream();
 
-  typedef int8_t DstDType;
-  typedef int8_t  SrcDType;
-  Kernel::Launch(s, outputs[0].Size(),
-outputs[0].dptr(), outputs[1].dptr(), 
outputs[2].dptr(),
-inputs[0].dptr(), inputs[1].dptr(), 
inputs[2].dptr());
+  if (inputs[0].type_flag_ == mshadow::kUint8) {
 
 Review comment:
   Looks the kernel already have a SrcDtype/DstDtype template argument, the 
reason for the duplication is due to the SrcDtype/DstDtype is different from 
inputs[0].type_flag_ so we need a mapping in order to define SrcDtype/DstDtype, 
since the define is within if condition branch and has local scope so we need 
to call Kernel<>::launch twice in if condition.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-05-24 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r190488482
 
 

 ##
 File path: src/operator/quantization/quantized_flatten-inl.h
 ##
 @@ -96,10 +106,10 @@ inline bool QuantizedFlattenType(const nnvm::NodeAttrs& 
attrs,
  std::vector *out_attrs) {
   CHECK_EQ(in_attrs->size(), 3U);
   CHECK_EQ(out_attrs->size(), 3U);
-  TYPE_ASSIGN_CHECK(*in_attrs, 0, mshadow::kInt8);
   TYPE_ASSIGN_CHECK(*in_attrs, 1, mshadow::kFloat32);
   TYPE_ASSIGN_CHECK(*in_attrs, 2, mshadow::kFloat32);
-  TYPE_ASSIGN_CHECK(*out_attrs, 0, mshadow::kInt8);
+  const int dtype = (*in_attrs)[0];
 
 Review comment:
   Yes, that's a better way, will change.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-05-24 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r190484093
 
 

 ##
 File path: src/operator/quantization/quantized_conv.cc
 ##
 @@ -86,12 +89,10 @@ bool QuantizedConvType(const nnvm::NodeAttrs& attrs,
   const ConvolutionParam& param = nnvm::get(attrs.parsed);
   CHECK_EQ(in_type->size(), param.no_bias? 6U : 9U);
   CHECK_EQ(out_type->size(), 3U);
-  TYPE_ASSIGN_CHECK(*in_type, 0, mshadow::kInt8);
 
 Review comment:
   OK, will add MXNET_USE_MKLDNN check here


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-05-23 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r190447983
 
 

 ##
 File path: src/operator/quantization/quantize_graph_pass.cc
 ##
 @@ -159,7 +159,7 @@ Graph QuantizeGraph(Graph &) {
 uint32_t min_index = 1;
 uint32_t max_index = 2;
 if (quantized_op_map.count(e.node->op())) {
-  size_t  num_outputs = e.node->num_outputs();
+  size_t  num_outputs = mirror_node->num_outputs() - 2;
 
 Review comment:
   Yes, currently all quantized op only have 1 min and 1 max output so I think 
it's ok to make the assumption here? Or is there any better suggested way to 
handle this?


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-05-23 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r190448136
 
 

 ##
 File path: src/operator/quantization/quantize_graph_pass.cc
 ##
 @@ -198,12 +199,11 @@ Graph QuantizeGraph(Graph &) {
 NodePtr mirror_node = mirror_map.at(e.node.get());
 NodeEntry mirror_entry = NodeEntry{
   mirror_node, e.index, e.version};
-size_t num_outputs = e.node->num_outputs();
-uint32_t min_index = num_outputs + 2 * e.index;
-uint32_t max_index = num_outputs + 2 * e.index + 1;
-
 // if input node is quantized operator, add dequantize node
 if (NeedQuantize(e.node, excluded_nodes)) {
+  size_t num_outputs = mirror_node->num_outputs() - 2;
 
 Review comment:
   Same reason as above.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-05-20 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r189500075
 
 

 ##
 File path: src/operator/nn/mkldnn/mkldnn_convolution.cc
 ##
 @@ -162,73 +165,51 @@ static 
mkldnn::convolution_backward_weights::primitive_desc GetConvBwdWeights(
   }
 }
 
-class MKLDNNConvForward {
-  std::shared_ptr fwd;
-  std::shared_ptr data;
-  std::shared_ptr weight;
-  std::shared_ptr bias;
-  std::shared_ptr out;
+void MKLDNNConvForward::SetNewMem(const mkldnn::memory ,
+  const mkldnn::memory ,
+  const mkldnn::memory *bias,
+  const mkldnn::memory ) {
+  if (this->data_ == nullptr)
+this->data_ = std::shared_ptr(new mkldnn::memory(
+fwd_pd.src_primitive_desc(), data.get_data_handle()));
+  else
+this->data_->set_data_handle(data.get_data_handle());
 
- public:
-  mkldnn::convolution_forward::primitive_desc fwd_pd;
+  if (this->weight_ == nullptr)
+this->weight_ = std::shared_ptr(new mkldnn::memory(
+fwd_pd.weights_primitive_desc(), weight.get_data_handle()));
+  else
+this->weight_->set_data_handle(weight.get_data_handle());
 
-  MKLDNNConvForward(const ConvolutionParam& param, bool is_train,
-const NDArray , const NDArray ,
-const NDArray *bias, const NDArray ): fwd_pd(
-GetConvFwdImpl(param, is_train, data, weights, bias, 
output)) {
-  }
-
-  void SetNewMem(const mkldnn::memory , const mkldnn::memory ,
- const mkldnn::memory *bias, const mkldnn::memory ) {
-if (this->data == nullptr)
-  this->data = std::shared_ptr(new mkldnn::memory(
-  fwd_pd.src_primitive_desc(), data.get_data_handle()));
-else
-  this->data->set_data_handle(data.get_data_handle());
+  if (this->out_ == nullptr)
+this->out_ = std::shared_ptr(new mkldnn::memory(
+fwd_pd.dst_primitive_desc(), output.get_data_handle()));
+  else
+this->out_->set_data_handle(output.get_data_handle());
 
-if (this->weight == nullptr)
-  this->weight = std::shared_ptr(new mkldnn::memory(
-  fwd_pd.weights_primitive_desc(), weight.get_data_handle()));
+  if (bias != nullptr) {
+if (this->bias_ == nullptr)
+  this->bias_ = std::shared_ptr(new mkldnn::memory(
+  fwd_pd.bias_primitive_desc(), bias->get_data_handle()));
 else
-  this->weight->set_data_handle(weight.get_data_handle());
-
-if (this->out == nullptr)
-  this->out = std::shared_ptr(new mkldnn::memory(
-  fwd_pd.dst_primitive_desc(), output.get_data_handle()));
-else
-  this->out->set_data_handle(output.get_data_handle());
-
-if (bias != nullptr) {
-  if (this->bias == nullptr)
-this->bias = std::shared_ptr(new mkldnn::memory(
-fwd_pd.bias_primitive_desc(), bias->get_data_handle()));
-  else
-this->bias->set_data_handle(bias->get_data_handle());
-  if (this->fwd == nullptr)
-this->fwd = std::shared_ptr(
-new mkldnn::convolution_forward(fwd_pd, 
mkldnn::primitive::at(*this->data),
-
mkldnn::primitive::at(*this->weight),
-mkldnn::primitive::at(*this->bias),
-*this->out));
-} else if (this->fwd == nullptr) {
-  this->fwd = std::shared_ptr(
-  new mkldnn::convolution_forward(fwd_pd, 
mkldnn::primitive::at(*this->data),
-  mkldnn::primitive::at(*this->weight),
-  *this->out));
-}
+  this->bias_->set_data_handle(bias->get_data_handle());
+if (this->fwd_ == nullptr)
+  this->fwd_ = std::shared_ptr(
+  new mkldnn::convolution_forward(fwd_pd, 
mkldnn::primitive::at(*this->data_),
+  
mkldnn::primitive::at(*this->weight_),
+  mkldnn::primitive::at(*this->bias_),
+  *this->out_));
+  } else if (this->fwd_ == nullptr) {
+this->fwd_ = std::shared_ptr(
+new mkldnn::convolution_forward(fwd_pd, 
mkldnn::primitive::at(*this->data_),
+mkldnn::primitive::at(*this->weight_),
+*this->out_));
 
 Review comment:
   Sure, will fix.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-05-20 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r18948
 
 

 ##
 File path: src/operator/quantization/quantize_graph_pass.cc
 ##
 @@ -198,12 +199,11 @@ Graph QuantizeGraph(Graph &) {
 NodePtr mirror_node = mirror_map.at(e.node.get());
 NodeEntry mirror_entry = NodeEntry{
   mirror_node, e.index, e.version};
-size_t num_outputs = e.node->num_outputs();
-uint32_t min_index = num_outputs + 2 * e.index;
-uint32_t max_index = num_outputs + 2 * e.index + 1;
-
 // if input node is quantized operator, add dequantize node
 if (NeedQuantize(e.node, excluded_nodes)) {
+  size_t num_outputs = mirror_node->num_outputs() - 2;
+  uint32_t min_index = num_outputs + 2 * e.index;
+  uint32_t max_index = num_outputs + 2 * e.index + 1;
 
 Review comment:
   The num_outputs/min_index/max_index calculation logic is used for "If 
(NeedQuantize(e.node, excluded_nodes))" branch so I moved the logic under If 
branch to simplify logic, the num_outputs calculation logic change is the same 
reason as above comment.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-05-20 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r189499693
 
 

 ##
 File path: src/operator/quantization/quantize_graph_pass.cc
 ##
 @@ -159,7 +159,7 @@ Graph QuantizeGraph(Graph &) {
 uint32_t min_index = 1;
 uint32_t max_index = 2;
 if (quantized_op_map.count(e.node->op())) {
-  size_t  num_outputs = e.node->num_outputs();
+  size_t  num_outputs = mirror_node->num_outputs() - 2;
 
 Review comment:
   I think this change should apply for all cases, since here we want to add 
min/max data (output from new_node's previous node) to the quantized new_node's 
input, so looks it is more proper to use previous node (also quantized version 
node, in this case is mirror_node) to calculate the number_outputs and the 
min/max index (it's possible one OP's FP32 and INT8 version can have different 
output so using FP32 version to calculate may not be able to represent INT8's 
case, for example, for CPU, FP32 and INT8 pooling can have different output 
number)
   @reminisce Could you help to review the logic here?


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-05-16 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r188569030
 
 

 ##
 File path: include/mxnet/c_api.h
 ##
 @@ -1423,13 +1423,15 @@ MXNET_DLL int MXSymbolInferType(SymbolHandle sym,
  * \param excluded_symbols array of symbols to be excluded from being quantized
  * \param num_offline number of parameters that are quantized offline
  * \param offline_params array of c strings representing the names of params 
quantized offline
+ * \param dev_type device type 
  */
 MXNET_DLL int MXQuantizeSymbol(SymbolHandle sym_handle,
SymbolHandle *ret_sym_handle,
const mx_uint num_excluded_symbols,
const SymbolHandle *excluded_symbols,
const mx_uint num_offline,
-   const char **offline_params);
+   const char **offline_params,
+   int dev_type);
 
 Review comment:
   @reminisce 
   I understand that for other kind of CPU it may need requantize, how about 
below approach:
   (1) add a few options for ```imagenet_gen_qsym.py``` to configure some 
features, like ```use_uint8``` (default off, to indicate whether quantize data 
to uint8), ```use_requantize``` (default on, to indicate whether we need 
requantize OP, to be added later), ```calib_input_data``` (default off, this is 
a feature we plan to add to enable input data calibration for calib layers to 
improve performance, to be added later), so users can pass different options to 
the quantization script based on their need and in most cases the default 
values will just work fine 
   (2) instead of passing dev_type in ```MXQuantizeSymbol()```, we can add 
parameters to pass specific feature/function on/off configuration, like 
```use_uint8```, ```use_requantize```, ```calib_input_data``` which mapped to 
quantization script options in (1)
   (3) in C++ handling function ```MXQuantizeSymbol()```, define different 
graph attributes for above feature/function configurations, and in 
```QuantizeGraph()``` perform quantize graph logic based on graph attribute 
values, this could make code reuse so we don't need to define seperate quantize 
graph function for CPU path.
   
   Please let me know your comments/suggestions, thanks.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-05-16 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r188534806
 
 

 ##
 File path: src/operator/nn/mkldnn/mkldnn_convolution-inl.h
 ##
 @@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file mkldnn_convolution-inl.h
+ * \brief
+*/
+
+#ifndef MXNET_OPERATOR_NN_MKLDNN_MKLDNN_CONVOLUTION_INL_H_
+#define MXNET_OPERATOR_NN_MKLDNN_MKLDNN_CONVOLUTION_INL_H_
+
+#if MXNET_USE_MKLDNN == 1
+
+#include 
+#include "../convolution-inl.h"
+#include "./mkldnn_ops-inl.h"
+#include "./mkldnn_base-inl.h"
+
+namespace mxnet {
+namespace op {
+
+mkldnn::convolution_forward::primitive_desc GetConvFwdImpl(
+const ConvolutionParam& param, bool is_train, const NDArray ,
+const NDArray , const NDArray *bias, const NDArray );
 
 Review comment:
   We would like to check if bias is a nullptr and goes with different code 
path so we pass bias as a pointer.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-05-16 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r188510242
 
 

 ##
 File path: src/operator/quantization/requantize-inl.h
 ##
 @@ -68,6 +68,24 @@ inline bool RequantizeType(const nnvm::NodeAttrs& attrs,
   return (*in_attrs)[0] != -1;
 }
 
+bool RequantizeStorageType(const nnvm::NodeAttrs& attrs,
+ const int dev_mask,
+ DispatchMode* dispatch_mode,
+ std::vector *in_attrs,
+ std::vector *out_attrs) {
+#if MXNET_USE_MKLDNN == 1
+  *dispatch_mode = DispatchMode::kFComputeEx;
+  if (dev_mask == mshadow::cpu::kDevMask)
+*dispatch_mode = DispatchMode::kFComputeEx;
+  else
+#endif
+*dispatch_mode = DispatchMode::kFCompute;
 
 Review comment:
   Please simplify dispatch_mode set logic here to ease code readability, you 
can refer to current quantize/dequantize implementation.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-04-27 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r184622443
 
 

 ##
 File path: src/operator/quantization/mkldnn/mkldnn_quantized_conv.cc
 ##
 @@ -0,0 +1,229 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file mkldnn_quantized_onvolution.cc
+ * \brief
+ * \author Wenting Jiang
+*/
+
+#if MXNET_USE_MKLDNN == 1
+#include "../../nn/mkldnn/mkldnn_base-inl.h"
+#include "../../nn/convolution-inl.h"
+#include "../quantization_utils.h"
+#include "../../tensor/matrix_op-inl.h"
+#include "../../elemwise_op_common.h"
+namespace mxnet {
+namespace op {
+
+static mkldnn::convolution_forward::primitive_desc GetConvFwdImpl(
 
 Review comment:
   I totally agree, the mainly difference is for int8 inference is_train will 
be always false but this could share code with FP32 version. I will update a 
new version to share the code with FP32 convolution/pooling.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-04-27 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r184622611
 
 

 ##
 File path: src/operator/quantization/mkldnn/mkldnn_quantized_pooling.cc
 ##
 @@ -0,0 +1,234 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file mkldnn_quantized_pooling.cc
+ * \brief
+ * \author Tao Lv
+*/
+
+#if MXNET_USE_MKLDNN == 1
+
+#include "./mkldnn_quantized_pooling-inl.h"
+
+namespace mxnet {
+namespace op {
+
+void MKLDNNQuantizedPoolingFwd::Init(const mxnet::NDArray , const 
mxnet::NDArray ,
+const int kernel_h,  const int kernel_w,
 
 Review comment:
   OK, will update a new version to share code between FP32 and INT8.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-04-27 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r184621143
 
 

 ##
 File path: src/operator/quantization/mkldnn/mkldnn_dequantize-inl.h
 ##
 @@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file mkldnn_quantized_conv-inl.h
+ * \author Wenting Jiang
+ * \brief
+ */
+
+#ifndef MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_DEQUANTIZE_INL_H_
+#define MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_DEQUANTIZE_INL_H_
+#if MXNET_USE_MKLDNN == 1
+#include 
+#include 
+#include 
+#include "../../nn/mkldnn/mkldnn_base-inl.h"
+
+namespace mxnet {
+namespace op {
+
+template
+void MKLDNNDequantizeComputeKer(const std::vector ,
+const std::vector ,
+const std::vector ) {
+  using namespace mshadow;
+  using namespace mxnet_op;
+  using red::limits::MaxValue;
+  using red::limits::MinValue;
+  float real_range = 0.0;
+  float quantized_range = 0.0;
+  if (inputs[0].dtype() == mshadow::kUint8) {
+quantized_range = MaxAbs(MaxValue(), MinValue());
+real_range = MaxAbs(*inputs[1].data().dptr(), 
*inputs[2].data().dptr());
+  } else if (inputs[0].dtype() == mshadow::kInt8) {
+quantized_range = MinAbs(MaxValue(), MinValue());
+real_range = MaxAbs(*inputs[1].data().dptr(), 
*inputs[2].data().dptr());
+  } else {
+LOG(FATAL) << "mkldnn dequantize op only supports int8 and uint8 as output 
type";
+  }
+  float scale = real_range / quantized_range;
+  primitive_attr attr;
+  const int mask = 0;
+  std::vector scales = {scale};
 
 Review comment:
   Yes, theoretically DstType could be something other than float, and scale 
type is always float


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-04-27 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r184622800
 
 

 ##
 File path: src/operator/quantization/quantize_graph_pass.cc
 ##
 @@ -129,7 +129,7 @@ Graph QuantizeGraph(Graph &) {
  mirror_node->op()->name != "_contrib_quantize")) {
   NodePtr quantize_node = InsertNode("_contrib_quantize",
 e.node->attrs.name + "_quantize", new_node, mirror_entry);
-  quantize_node->attrs.dict["out_type"] = "int8";
+quantize_node->attrs.dict["out_type"] = "int8";
 
 Review comment:
   My bad eyes, will change back in the diff.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-04-24 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r183241106
 
 

 ##
 File path: src/operator/quantization/mkldnn/mkldnn_quantize-inl.h
 ##
 @@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file mkldnn_quantize-inl.h
+ * \brief
+ * \author Wenting Jiang
+ */
+
+#ifndef MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_QUANTIZE_INL_H_
+#define MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_QUANTIZE_INL_H_
+#if MXNET_USE_MKLDNN == 1
+#include 
+#include 
+#include 
+#include "../quantize-inl.h"
+#include "../../nn/mkldnn/mkldnn_base-inl.h"
+
+namespace mxnet {
+namespace op {
+
+template
+void MKLDNNQuantizeComputeKer(const std::vector& inputs,
+  const std::vector& outputs,
+  const QuantizeParam& param,
+  const std::vector ) {
+  using namespace mshadow;
+  using namespace mxnet_op;
+  using red::limits::MaxValue;
+  using red::limits::MinValue;
+  float real_range = 0.0;
+  float quantized_range = 0.0;
+  if (param.out_type == mshadow::kUint8) {
+real_range = MaxAbs(*inputs[1].data().dptr(), 
*inputs[2].data().dptr());
 
 Review comment:
   Should use float since inputs/outputs[1-2] are min/max which are always 
float.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-04-24 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r183241662
 
 

 ##
 File path: src/operator/quantization/quantize_graph_pass.cc
 ##
 @@ -129,7 +129,7 @@ Graph QuantizeGraph(Graph &) {
  mirror_node->op()->name != "_contrib_quantize")) {
   NodePtr quantize_node = InsertNode("_contrib_quantize",
 e.node->attrs.name + "_quantize", new_node, mirror_entry);
-  quantize_node->attrs.dict["out_type"] = "int8";
+quantize_node->attrs.dict["out_type"] = "int8";
 
 Review comment:
   After the change,  the changed line(quantize_node->..) has aligned with 
above line (e.node->...).


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-04-24 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r183759204
 
 

 ##
 File path: src/operator/quantization/dequantize.cc
 ##
 @@ -23,11 +23,32 @@
  * \brief
  */
 #include "./dequantize-inl.h"
+#if MXNET_USE_MKLDNN == 1
+#include "./mkldnn/mkldnn_dequantize-inl.h"
+#endif
 
 namespace mxnet {
 namespace op {
 DMLC_REGISTER_PARAMETER(DequantizeParam);
 
+bool DequantizeStorageType(const nnvm::NodeAttrs& attrs,
+   const int dev_mask,
+   DispatchMode* dispatch_mode,
+   std::vector *in_attrs,
+   std::vector *out_attrs) {
+#if MXNET_USE_MKLDNN == 1
+  *dispatch_mode = DispatchMode::kFComputeEx;
+  if (dev_mask == mshadow::cpu::kDevMask)
+*dispatch_mode = DispatchMode::kFComputeEx;
+  else
+#endif
+*dispatch_mode = DispatchMode::kFCompute;
 
 Review comment:
   @reminisce Yes, that's a good suggestion, we will change that later.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-04-24 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r183758965
 
 

 ##
 File path: include/mxnet/c_api.h
 ##
 @@ -1423,13 +1423,15 @@ MXNET_DLL int MXSymbolInferType(SymbolHandle sym,
  * \param excluded_symbols array of symbols to be excluded from being quantized
  * \param num_offline number of parameters that are quantized offline
  * \param offline_params array of c strings representing the names of params 
quantized offline
+ * \param dev_type device type 
  */
 MXNET_DLL int MXQuantizeSymbol(SymbolHandle sym_handle,
SymbolHandle *ret_sym_handle,
const mx_uint num_excluded_symbols,
const SymbolHandle *excluded_symbols,
const mx_uint num_offline,
-   const char **offline_params);
+   const char **offline_params,
+   int dev_type);
 
 Review comment:
   @reminisce Currently the difference is mainly signed vs unsigned, but there 
maybe more difference in the future between CPU and GPU (like we may don't need 
requantize for CPU context), so looks we should use QuantizeGraphCPU as the 
name instead of QuantizeGraphUnsigned.  And the default value has set to cpu 
context in _quantize_symbol().


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-04-24 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r183753288
 
 

 ##
 File path: src/operator/quantization/quantize_graph_pass.cc
 ##
 @@ -198,7 +198,7 @@ Graph QuantizeGraph(Graph &) {
 NodePtr mirror_node = mirror_map.at(e.node.get());
 NodeEntry mirror_entry = NodeEntry{
   mirror_node, e.index, e.version};
-size_t num_outputs = e.node->num_outputs();
+size_t num_outputs = mirror_node->num_outputs() - 2;
 
 Review comment:
   I checked the logic, the reason for this change is the output number (if 
exclude min/max) of a OP maybe different for FP32 version and INT8 version for 
CPU, like pooling, for FP32 there may have 2 outputs (one for data one for 
workspace), but for INT8 version there is only 1 output (data). So here we need 
to get the quantized OP output number instead of original FP32 OP's. 
mirror_node is the quantized OP in this case and need to subtract 2 (for 
min/max) to get min/max's start index. I will add comment here to ease 
understanding.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-04-22 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r183241376
 
 

 ##
 File path: src/operator/quantization/mkldnn/mkldnn_quantized_conv.cc
 ##
 @@ -0,0 +1,229 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file mkldnn_quantized_onvolution.cc
+ * \brief
+ * \author Wenting Jiang
+*/
+
+#if MXNET_USE_MKLDNN == 1
+#include "../../nn/mkldnn/mkldnn_base-inl.h"
+#include "../../nn/convolution-inl.h"
+#include "../quantization_utils.h"
+#include "../../tensor/matrix_op-inl.h"
+#include "../../elemwise_op_common.h"
+namespace mxnet {
+namespace op {
+
+static mkldnn::convolution_forward::primitive_desc GetConvFwdImpl(
+const ConvolutionParam& param, const NDArray ,
+const NDArray , const NDArray *bias, const NDArray ) {
+  auto prop = mkldnn::prop_kind::forward_scoring;
+  auto data_md = GetMemDesc(data);
+  auto weight_md = GetWeightDesc(weights, param.num_group);
+  auto out_md = GetMemDesc(output);
+  auto engine = CpuEngine::Get()->get_engine();
+  mkldnn::memory::dims strides{0, 0};
+  if (param.stride.ndim() == 2) {
+strides[0] = param.stride[0];
+strides[1] = param.stride[1];
+  }
+  mkldnn::memory::dims padding{0, 0};
+  if (param.pad.ndim() == 2) {
+padding[0] = param.pad[0];
+padding[1] = param.pad[1];
+  }
+  if (param.dilate.ndim() == 0 && bias == nullptr) {
+mkldnn::convolution_forward::desc desc(prop, 
mkldnn::algorithm::convolution_direct,
+data_md, weight_md, out_md, strides, padding, padding, 
mkldnn::padding_kind::zero);
+return mkldnn::convolution_forward::primitive_desc(desc, engine);
+  } else if (param.dilate.ndim() == 0) {
+auto bias_md = GetMemDesc(*bias);
+mkldnn::convolution_forward::desc desc(prop, 
mkldnn::algorithm::convolution_direct,
+data_md, weight_md, bias_md, out_md, strides, padding, padding,
+mkldnn::padding_kind::zero);
+return mkldnn::convolution_forward::primitive_desc(desc, engine);
+  } else {
+mkldnn::memory::dims dilates{0, 0};
+if (param.dilate.ndim() == 2) {
+  dilates[0] = param.dilate[0] - 1;
+  dilates[1] = param.dilate[1] - 1;
+}
+if (bias == nullptr) {
+  mkldnn::convolution_forward::desc desc(prop, 
mkldnn::algorithm::convolution_direct,
+  data_md, weight_md, out_md, strides, dilates, padding, padding,
+  mkldnn::padding_kind::zero);
+  return mkldnn::convolution_forward::primitive_desc(desc, engine);
+} else {
+  auto bias_md = GetMemDesc(*bias);
+  mkldnn::convolution_forward::desc desc(prop, 
mkldnn::algorithm::convolution_direct,
+ data_md, weight_md, bias_md, 
out_md, strides,
+ dilates, padding, padding,
+ mkldnn::padding_kind::zero);
+  return mkldnn::convolution_forward::primitive_desc(desc, engine);
+}
+  }
+}
+
+class MKLDNNConvForward {
+  std::shared_ptr fwd;
+  std::shared_ptr data;
+  std::shared_ptr weight;
+  std::shared_ptr bias;
+  std::shared_ptr out;
+
+ public:
+  mkldnn::convolution_forward::primitive_desc fwd_pd;
+
+  MKLDNNConvForward(const ConvolutionParam& param,
+const NDArray , const NDArray ,
+const NDArray *bias, const NDArray ): fwd_pd(
+GetConvFwdImpl(param, data, weights, bias, output)) {
+  }
+
+  void SetNewMem(const mkldnn::memory , const mkldnn::memory ,
+ const mkldnn::memory *bias, const mkldnn::memory ) {
+if (this->data == nullptr)
+  this->data = std::shared_ptr(new mkldnn::memory(
+  fwd_pd.src_primitive_desc(), data.get_data_handle()));
+else
+  this->data->set_data_handle(data.get_data_handle());
+
+if (this->weight == nullptr)
+  this->weight = std::shared_ptr(new mkldnn::memory(
+  fwd_pd.weights_primitive_desc(), weight.get_data_handle()));
+else
+  this->weight->set_data_handle(weight.get_data_handle());
+
+if (this->out == nullptr)
+  this->out = std::shared_ptr(new mkldnn::memory(
+ 

[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-04-22 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r183241805
 
 

 ##
 File path: src/operator/quantization/quantize_graph_pass.cc
 ##
 @@ -198,7 +198,7 @@ Graph QuantizeGraph(Graph &) {
 NodePtr mirror_node = mirror_map.at(e.node.get());
 NodeEntry mirror_entry = NodeEntry{
   mirror_node, e.index, e.version};
-size_t num_outputs = e.node->num_outputs();
+size_t num_outputs = mirror_node->num_outputs() - 2;
 
 Review comment:
   This need to be reverted and only applies for the new added pass function, I 
will double check the logic (applied operators and whether should use 1 or 2) 
and get back to you later.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-04-22 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r183241122
 
 

 ##
 File path: src/operator/quantization/mkldnn/mkldnn_quantize-inl.h
 ##
 @@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file mkldnn_quantize-inl.h
+ * \brief
+ * \author Wenting Jiang
+ */
+
+#ifndef MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_QUANTIZE_INL_H_
+#define MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_QUANTIZE_INL_H_
+#if MXNET_USE_MKLDNN == 1
+#include 
+#include 
+#include 
+#include "../quantize-inl.h"
+#include "../../nn/mkldnn/mkldnn_base-inl.h"
+
+namespace mxnet {
+namespace op {
+
+template
+void MKLDNNQuantizeComputeKer(const std::vector& inputs,
+  const std::vector& outputs,
+  const QuantizeParam& param,
+  const std::vector ) {
+  using namespace mshadow;
+  using namespace mxnet_op;
+  using red::limits::MaxValue;
+  using red::limits::MinValue;
+  float real_range = 0.0;
+  float quantized_range = 0.0;
+  if (param.out_type == mshadow::kUint8) {
+real_range = MaxAbs(*inputs[1].data().dptr(), 
*inputs[2].data().dptr());
+quantized_range = MaxAbs(MaxValue(), MinValue());
+*outputs[1].data().dptr() = *inputs[1].data().dptr();
+*outputs[2].data().dptr() = *inputs[2].data().dptr();
+  } else if (param.out_type == mshadow::kInt8) {
+real_range = MaxAbs(*inputs[1].data().dptr(), 
*inputs[2].data().dptr());
+quantized_range = MinAbs(MaxValue(), MinValue());
+*outputs[1].data().dptr() = -real_range;
+*outputs[2].data().dptr() = real_range;
+  } else {
 
 Review comment:
   The same reason as above.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-04-22 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r183241596
 
 

 ##
 File path: src/operator/quantization/mkldnn/mkldnn_requantize-inl.h
 ##
 @@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/* \file mkldnn_requantize-inl.h
+ * \brief
+ * \author Jin Huang
+ */
+
+#ifndef MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_REQUANTIZE_INL_H_
+#define MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_REQUANTIZE_INL_H_
+#if MXNET_USE_MKLDNN == 1
+#include 
+#include 
+#include 
+#include "../requantize-inl.h"
+#include "../../nn/mkldnn/mkldnn_base-inl.h"
+
+namespace mxnet {
+namespace op {
+
+void MKLDNNRequantizeForwardKer(const nnvm::NodeAttrs& attrs,
+const OpContext& ctx,
+const std::vector& inputs,
+const std::vector& req,
+const std::vector& outputs,
 
 Review comment:
   Yes, we should use NDArray, will change it in next commit.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-04-22 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r183241614
 
 

 ##
 File path: src/operator/quantization/mkldnn/mkldnn_requantize-inl.h
 ##
 @@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/* \file mkldnn_requantize-inl.h
+ * \brief
+ * \author Jin Huang
+ */
+
+#ifndef MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_REQUANTIZE_INL_H_
+#define MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_REQUANTIZE_INL_H_
+#if MXNET_USE_MKLDNN == 1
+#include 
+#include 
+#include 
+#include "../requantize-inl.h"
+#include "../../nn/mkldnn/mkldnn_base-inl.h"
+
+namespace mxnet {
+namespace op {
+
+void MKLDNNRequantizeForwardKer(const nnvm::NodeAttrs& attrs,
+const OpContext& ctx,
+const std::vector& inputs,
+const std::vector& req,
+const std::vector& outputs,
+const float real_range) {
+  using namespace mshadow;
+  using namespace mxnet_op;
+  using red::limits::MaxValue;
+  using red::limits::MinValue;
+  typedef int32_t SrcDType;
+  typedef int8_t  DstDType;
 
 Review comment:
   Yes, requantize always convert int32 (quantized conv output) to int8.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-04-22 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r183241662
 
 

 ##
 File path: src/operator/quantization/quantize_graph_pass.cc
 ##
 @@ -129,7 +129,7 @@ Graph QuantizeGraph(Graph &) {
  mirror_node->op()->name != "_contrib_quantize")) {
   NodePtr quantize_node = InsertNode("_contrib_quantize",
 e.node->attrs.name + "_quantize", new_node, mirror_entry);
-  quantize_node->attrs.dict["out_type"] = "int8";
+quantize_node->attrs.dict["out_type"] = "int8";
 
 Review comment:
   Will change it back in next commit.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-04-22 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r183241827
 
 

 ##
 File path: src/operator/quantization/quantize_graph_pass.cc
 ##
 @@ -250,6 +250,165 @@ Graph QuantizeGraph(Graph &) {
   return ret;
 }
 
+#if MXNET_USE_MKLDNN == 1
+// QuantizeGraphUnsigned pass with uint8 dtype
+Graph QuantizeGraphUnsigned(Graph &) {
+  static auto& quantized_op_map = 
Op::GetAttr("FQuantizedOp");
+  static auto& need_requantize_map = 
Op::GetAttr("FNeedRequantize");
+  auto offline_params = 
src.GetAttr("offline_params");
+  auto excluded_nodes = 
src.GetAttr("excluded_nodes");
+
+  // mirror_map stores the mapping from the currently visited graph to the 
newly created quantized
+  // graph. Key is the currently visited graph's node pointer, and value is a 
copied node of the key
+  // node. The existing key's value may be updated with the newly created 
quantize/dequantize op.
+  std::unordered_map mirror_map;
+  DFSVisit(src.outputs, [&](const NodePtr& node) {
+NodePtr new_node = Node::Create();
+// If the currently visited node needs quantization, insert a quantize op 
node before the
+// current node and replace the current node with the quantized version in 
the new graph.
+if (NeedQuantize(node, excluded_nodes)) {
+  auto fquantized_op = quantized_op_map[node->op()];
+  // If the currently visited node's op registered the FQuantizedOp 
property, new_node is a
+  // quantizated version of a that op, such as quantized_conv2d.
+  new_node = fquantized_op(node->attrs);
+
+  // add data into quantized op input
+  for (const auto& e : node->inputs) {
+NodePtr mirror_node = mirror_map.at(e.node.get());
+NodeEntry mirror_entry = NodeEntry{
+  mirror_node, e.index, e.version};
+// If the NodeEntry e's node does not need quantization, and (the 
mirror_node is a variable,
+// or the mirror_node's op is not a quantize op), create quantize op, 
min op, and max op
+// taking mirror_entry as input to generate a quantized NDArray. Save 
the mapping between
+// e's source node and the newly created quantize op so that the 
quantize op can be
+// reused next time when the same entry is visited again.
+if (!NeedQuantize(e.node, excluded_nodes) &&
+(mirror_node->op() == nullptr ||
+ mirror_node->op()->name != "_contrib_quantize")) {
+  NodePtr quantize_node = InsertNode("_contrib_quantize",
+e.node->attrs.name + "_quantize", new_node, mirror_entry);
+quantize_node->attrs.dict["out_type"] = "uint8";
+  quantize_node->op()->attr_parser(&(quantize_node->attrs));
+
+  NodePtr min_node = InsertNode("min",
+  e.node->attrs.name + "_min", quantize_node, mirror_entry);
+  min_node->op()->attr_parser(&(min_node->attrs));
+
+  NodePtr max_node = InsertNode("max",
+  e.node->attrs.name + "_max", quantize_node, mirror_entry);
+  max_node->op()->attr_parser(&(max_node->attrs));
+
+  mirror_map[e.node.get()] = std::move(quantize_node);
+} else {
+  // If the entry e's node needs quantization, or mirror_entry is from 
a quantize op,
+  // simply add mirror_entry to the input of the new_node.
+  new_node->inputs.emplace_back(mirror_entry);
+}
+// the input should be `quantize` or quantized version op now
+  }
+
+  // add min and max into quantized op input assume order of quantized op 
inputs is:
+  // data1, data2, ..., min1, max1, min2, max2, ...
+  for (const auto& e : node->inputs) {
+NodePtr mirror_node = mirror_map.at(e.node.get());
+NodeEntry mirror_entry = NodeEntry{
+  mirror_node, e.index, e.version};
+// for quantize node
+uint32_t min_index = 1;
+uint32_t max_index = 2;
+if (quantized_op_map.count(e.node->op())) {
+  size_t  num_outputs = mirror_node->num_outputs() - 2;
+  min_index = num_outputs + 2 * e.index;
+  max_index = num_outputs + 2 * e.index + 1;
+} else {
+  CHECK(mirror_node->op()->name == "_contrib_quantize")
+<< "The input is not quantize or quantized_op";
+}
+new_node->inputs.emplace_back(NodeEntry{mirror_node, min_index, 0});
+new_node->inputs.emplace_back(NodeEntry{mirror_node, max_index, 0});
+  }
+
+  // If the new_node op registered attr FNeedRequantize, insert requantize 
node after it.
+  // Here it's assumed that the quantized_op node only produces three 
outputs:
+  // out_data, min_range, and max_range.
+  if (need_requantize_map.count(new_node->op()) > 0
+  && need_requantize_map[new_node->op()](new_node->attrs)) {
+NodePtr requantize_node = 

[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-04-22 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r183241566
 
 

 ##
 File path: src/operator/quantization/mkldnn/mkldnn_quantized_pooling.cc
 ##
 @@ -0,0 +1,234 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file mkldnn_quantized_pooling.cc
+ * \brief
+ * \author Tao Lv
+*/
+
+#if MXNET_USE_MKLDNN == 1
+
+#include "./mkldnn_quantized_pooling-inl.h"
+
+namespace mxnet {
+namespace op {
+
+void MKLDNNQuantizedPoolingFwd::Init(const mxnet::NDArray , const 
mxnet::NDArray ,
+const int kernel_h,  const int kernel_w,
+const int stride_h,  const int stride_w,
+const int padding_t, const int padding_b,
+const int padding_l, const int padding_r) {
+  // mkldnn::memory::desc
+  auto src_md = input.GetMKLDNNData()->get_primitive_desc().desc();
+  mkldnn::memory::dims dims = {src_md.data.dims[0],
+   src_md.data.dims[1],
+   static_cast(output.shape()[2]),
+   static_cast(output.shape()[3])};
+  auto dst_md = mkldnn::memory::desc({dims},
+ 
static_cast(src_md.data.data_type),
+ 
static_cast(src_md.data.format));
+  const mkldnn::engine engine = CpuEngine::Get()->get_engine();
+  const mkldnn::algorithm alg_kind = this->alg_kind_;
+  if (alg_kind != mkldnn::algorithm::pooling_max &&
+  alg_kind != mkldnn::algorithm::pooling_avg &&
+  alg_kind != mkldnn::algorithm::pooling_avg_include_padding &&
+  alg_kind != mkldnn::algorithm::pooling_avg_exclude_padding) {
+LOG(FATAL) << "MKLDNN Pooling: algorithm is not supported";
+  }
+
+  mkldnn::prop_kind prop = mkldnn::prop_kind::forward_scoring;
+
+  const mkldnn::memory::dims strides = {stride_h,  stride_w  };
+  const mkldnn::memory::dims pad_l   = {padding_t, padding_l };
+  const mkldnn::memory::dims pad_r   = {padding_b, padding_r };
+  const mkldnn::memory::dims kernel  = {kernel_h,  kernel_w  };
+  // mkldnn::pooling_forward::desc
+  const auto fwd_desc = mkldnn::pooling_forward::desc(prop, alg_kind, src_md, 
dst_md,
+  strides, kernel, pad_l, 
pad_r,
+  
mkldnn::padding_kind::zero);
+  this->fwd_pd_.reset(new mkldnn::pooling_forward::primitive_desc(fwd_desc, 
engine));
+  this->data_.reset(new 
mkldnn::memory(input.GetMKLDNNData()->get_primitive_desc()));
+  this->out_.reset(new mkldnn::memory(this->fwd_pd_->dst_primitive_desc()));
+  this->fwd_.reset(new mkldnn::pooling_forward(*(this->fwd_pd_),
+ 
mkldnn::primitive::at(*(this->data_)),
+ *(this->out_)));
+  return;
+}
+
+void MKLDNNQuantizedPoolingFwd::SetDataHandle(const mxnet::NDArray ,
+ const mxnet::NDArray ) {
+  // mkldnn::memory
+  auto data_mem = data.GetMKLDNNData();
+  auto out_mem = const_cast(output).CreateMKLDNNData(
+  
this->fwd_pd_->dst_primitive_desc());
+  this->data_->set_data_handle(data_mem->get_data_handle());
+  this->out_->set_data_handle(out_mem->get_data_handle());
+}
+
+void MKLDNNQuantizedPoolingFwd::Execute() {
+  if (this->fwd_) {
+MKLDNNStream::Get()->RegisterPrim(*(this->fwd_));
+MKLDNNStream::Get()->Submit();
+  } else {
+LOG(FATAL) << "MKLDNN Pooling: forward primitive is nullptr";
+  }
+}
+
+mkldnn::algorithm GetMKLDNNQuantizedPoolAlgo(const PoolingParam ) {
+  switch (param.pool_type) {
+case pool_enum::kMaxPooling:
+  return mkldnn::algorithm::pooling_max;
+  break;
+case pool_enum::kAvgPooling:
+  return mkldnn::algorithm::pooling_avg_include_padding;
+  break;
+default:
+  LOG(FATAL) << "MKLDNN Pooling: Unknown pooling method.";
+  return mkldnn::algorithm::pooling_max;
+  }
+}
+
+mkldnn::pooling_forward::primitive_desc 

[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-04-22 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r183241525
 
 

 ##
 File path: src/operator/quantization/mkldnn/mkldnn_quantized_pooling.cc
 ##
 @@ -0,0 +1,234 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file mkldnn_quantized_pooling.cc
+ * \brief
+ * \author Tao Lv
+*/
+
+#if MXNET_USE_MKLDNN == 1
+
+#include "./mkldnn_quantized_pooling-inl.h"
+
+namespace mxnet {
+namespace op {
+
+void MKLDNNQuantizedPoolingFwd::Init(const mxnet::NDArray , const 
mxnet::NDArray ,
+const int kernel_h,  const int kernel_w,
+const int stride_h,  const int stride_w,
+const int padding_t, const int padding_b,
+const int padding_l, const int padding_r) {
+  // mkldnn::memory::desc
+  auto src_md = input.GetMKLDNNData()->get_primitive_desc().desc();
+  mkldnn::memory::dims dims = {src_md.data.dims[0],
+   src_md.data.dims[1],
+   static_cast(output.shape()[2]),
+   static_cast(output.shape()[3])};
+  auto dst_md = mkldnn::memory::desc({dims},
+ 
static_cast(src_md.data.data_type),
+ 
static_cast(src_md.data.format));
+  const mkldnn::engine engine = CpuEngine::Get()->get_engine();
+  const mkldnn::algorithm alg_kind = this->alg_kind_;
+  if (alg_kind != mkldnn::algorithm::pooling_max &&
+  alg_kind != mkldnn::algorithm::pooling_avg &&
+  alg_kind != mkldnn::algorithm::pooling_avg_include_padding &&
+  alg_kind != mkldnn::algorithm::pooling_avg_exclude_padding) {
+LOG(FATAL) << "MKLDNN Pooling: algorithm is not supported";
+  }
+
+  mkldnn::prop_kind prop = mkldnn::prop_kind::forward_scoring;
+
+  const mkldnn::memory::dims strides = {stride_h,  stride_w  };
+  const mkldnn::memory::dims pad_l   = {padding_t, padding_l };
+  const mkldnn::memory::dims pad_r   = {padding_b, padding_r };
+  const mkldnn::memory::dims kernel  = {kernel_h,  kernel_w  };
+  // mkldnn::pooling_forward::desc
+  const auto fwd_desc = mkldnn::pooling_forward::desc(prop, alg_kind, src_md, 
dst_md,
+  strides, kernel, pad_l, 
pad_r,
+  
mkldnn::padding_kind::zero);
+  this->fwd_pd_.reset(new mkldnn::pooling_forward::primitive_desc(fwd_desc, 
engine));
+  this->data_.reset(new 
mkldnn::memory(input.GetMKLDNNData()->get_primitive_desc()));
+  this->out_.reset(new mkldnn::memory(this->fwd_pd_->dst_primitive_desc()));
+  this->fwd_.reset(new mkldnn::pooling_forward(*(this->fwd_pd_),
+ 
mkldnn::primitive::at(*(this->data_)),
+ *(this->out_)));
+  return;
+}
+
+void MKLDNNQuantizedPoolingFwd::SetDataHandle(const mxnet::NDArray ,
+ const mxnet::NDArray ) {
+  // mkldnn::memory
+  auto data_mem = data.GetMKLDNNData();
+  auto out_mem = const_cast(output).CreateMKLDNNData(
+  
this->fwd_pd_->dst_primitive_desc());
+  this->data_->set_data_handle(data_mem->get_data_handle());
+  this->out_->set_data_handle(out_mem->get_data_handle());
+}
+
+void MKLDNNQuantizedPoolingFwd::Execute() {
+  if (this->fwd_) {
+MKLDNNStream::Get()->RegisterPrim(*(this->fwd_));
+MKLDNNStream::Get()->Submit();
+  } else {
+LOG(FATAL) << "MKLDNN Pooling: forward primitive is nullptr";
+  }
+}
+
+mkldnn::algorithm GetMKLDNNQuantizedPoolAlgo(const PoolingParam ) {
+  switch (param.pool_type) {
+case pool_enum::kMaxPooling:
+  return mkldnn::algorithm::pooling_max;
+  break;
+case pool_enum::kAvgPooling:
+  return mkldnn::algorithm::pooling_avg_include_padding;
+  break;
+default:
+  LOG(FATAL) << "MKLDNN Pooling: Unknown pooling method.";
+  return mkldnn::algorithm::pooling_max;
+  }
+}
+
+mkldnn::pooling_forward::primitive_desc 

[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-04-22 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r183240995
 
 

 ##
 File path: src/operator/quantization/mkldnn/mkldnn_dequantize-inl.h
 ##
 @@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file mkldnn_quantized_conv-inl.h
+ * \author Wenting Jiang
+ * \brief
+ */
+
+#ifndef MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_DEQUANTIZE_INL_H_
+#define MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_DEQUANTIZE_INL_H_
+#if MXNET_USE_MKLDNN == 1
+#include 
+#include 
+#include 
+#include "../../nn/mkldnn/mkldnn_base-inl.h"
+
+namespace mxnet {
+namespace op {
+
+template
+void MKLDNNDequantizeComputeKer(const std::vector ,
+const std::vector ,
+const std::vector ) {
+  using namespace mshadow;
+  using namespace mxnet_op;
+  using red::limits::MaxValue;
+  using red::limits::MinValue;
+  float real_range = 0.0;
+  float quantized_range = 0.0;
+  if (inputs[0].dtype() == mshadow::kUint8) {
+quantized_range = MaxAbs(MaxValue(), MinValue());
+real_range = MaxAbs(*inputs[1].data().dptr(), 
*inputs[2].data().dptr());
+  } else if (inputs[0].dtype() == mshadow::kInt8) {
+quantized_range = MinAbs(MaxValue(), MinValue());
+real_range = MaxAbs(*inputs[1].data().dptr(), 
*inputs[2].data().dptr());
 
 Review comment:
   For uint8, we use MaxAbs to calculate quantized_range but for int8 we use 
MinAbs, the reason is for uint8 we want quantized_range to be 255 while for 
int8 we want 127.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-04-22 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r183241106
 
 

 ##
 File path: src/operator/quantization/mkldnn/mkldnn_quantize-inl.h
 ##
 @@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file mkldnn_quantize-inl.h
+ * \brief
+ * \author Wenting Jiang
+ */
+
+#ifndef MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_QUANTIZE_INL_H_
+#define MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_QUANTIZE_INL_H_
+#if MXNET_USE_MKLDNN == 1
+#include 
+#include 
+#include 
+#include "../quantize-inl.h"
+#include "../../nn/mkldnn/mkldnn_base-inl.h"
+
+namespace mxnet {
+namespace op {
+
+template
+void MKLDNNQuantizeComputeKer(const std::vector& inputs,
+  const std::vector& outputs,
+  const QuantizeParam& param,
+  const std::vector ) {
+  using namespace mshadow;
+  using namespace mxnet_op;
+  using red::limits::MaxValue;
+  using red::limits::MinValue;
+  float real_range = 0.0;
+  float quantized_range = 0.0;
+  if (param.out_type == mshadow::kUint8) {
+real_range = MaxAbs(*inputs[1].data().dptr(), 
*inputs[2].data().dptr());
 
 Review comment:
   SrcType will be better, will change later.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-04-22 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r183240951
 
 

 ##
 File path: src/operator/nn/mkldnn/mkldnn_base.cc
 ##
 @@ -211,6 +211,7 @@ mkldnn_memory_format_t 
GetDefaultFormat(mkldnn::memory::desc desc) {
   case mkldnn_hwio:
   case mkldnn_OIhw8i8o:
   case mkldnn_OIhw16i16o:
+  case mkldnn_OIhw4i16o4i:
 
 Review comment:
   OK, will take it out later.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-04-22 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r183241649
 
 

 ##
 File path: src/operator/quantization/mkldnn/mkldnn_requantize-inl.h
 ##
 @@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/* \file mkldnn_requantize-inl.h
+ * \brief
+ * \author Jin Huang
+ */
+
+#ifndef MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_REQUANTIZE_INL_H_
+#define MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_REQUANTIZE_INL_H_
+#if MXNET_USE_MKLDNN == 1
+#include 
+#include 
+#include 
+#include "../requantize-inl.h"
+#include "../../nn/mkldnn/mkldnn_base-inl.h"
+
+namespace mxnet {
+namespace op {
+
+void MKLDNNRequantizeForwardKer(const nnvm::NodeAttrs& attrs,
+const OpContext& ctx,
+const std::vector& inputs,
+const std::vector& req,
+const std::vector& outputs,
+const float real_range) {
+  using namespace mshadow;
+  using namespace mxnet_op;
+  using red::limits::MaxValue;
+  using red::limits::MinValue;
+  typedef int32_t SrcDType;
+  typedef int8_t  DstDType;
+  // check shapes
+  size_t i_dim = inputs[0].ndim();
+  size_t o_dim = outputs[0].ndim();
+  CHECK_EQ(i_dim, o_dim);
+  unsigned int total_len = 1;
+  memory::dims tensor_shape;
+  for (size_t i = 0; i < i_dim; ++i) {
+CHECK_EQ(inputs[0].size(i), outputs[0].size(i));
+total_len *= inputs[0].size(i);
+  }
+  tensor_shape.push_back(total_len);
+  float first_quantized_range = MinAbs(MinValue(),
+   MaxValue());
+  float first_real_range = MaxAbs(*inputs[1].dptr(),
+  *inputs[2].dptr());
 
 Review comment:
   inputs[1-2] are quantization min/max parameters which are always float.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-04-22 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r183241432
 
 

 ##
 File path: src/operator/quantization/mkldnn/mkldnn_quantized_pooling.cc
 ##
 @@ -0,0 +1,234 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file mkldnn_quantized_pooling.cc
+ * \brief
+ * \author Tao Lv
+*/
+
+#if MXNET_USE_MKLDNN == 1
+
+#include "./mkldnn_quantized_pooling-inl.h"
+
+namespace mxnet {
+namespace op {
+
+void MKLDNNQuantizedPoolingFwd::Init(const mxnet::NDArray , const 
mxnet::NDArray ,
+const int kernel_h,  const int kernel_w,
 
 Review comment:
   Yes, we also plan to share code, we would like to perform the code sharing 
change in later PR.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-04-22 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r183241679
 
 

 ##
 File path: src/operator/quantization/quantize_graph_pass.cc
 ##
 @@ -159,7 +159,7 @@ Graph QuantizeGraph(Graph &) {
 uint32_t min_index = 1;
 uint32_t max_index = 2;
 if (quantized_op_map.count(e.node->op())) {
-  size_t  num_outputs = e.node->num_outputs();
+  size_t  num_outputs = mirror_node->num_outputs() - 2;
 
 Review comment:
   This should be reverted, we used a separate pass function for CPU so don't 
need to change this function.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-04-22 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r183241068
 
 

 ##
 File path: src/operator/quantization/mkldnn/mkldnn_dequantize-inl.h
 ##
 @@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file mkldnn_quantized_conv-inl.h
+ * \author Wenting Jiang
+ * \brief
+ */
+
+#ifndef MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_DEQUANTIZE_INL_H_
+#define MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_DEQUANTIZE_INL_H_
+#if MXNET_USE_MKLDNN == 1
+#include 
+#include 
+#include 
+#include "../../nn/mkldnn/mkldnn_base-inl.h"
+
+namespace mxnet {
+namespace op {
+
+template
+void MKLDNNDequantizeComputeKer(const std::vector ,
+const std::vector ,
+const std::vector ) {
+  using namespace mshadow;
+  using namespace mxnet_op;
+  using red::limits::MaxValue;
+  using red::limits::MinValue;
+  float real_range = 0.0;
+  float quantized_range = 0.0;
+  if (inputs[0].dtype() == mshadow::kUint8) {
+quantized_range = MaxAbs(MaxValue(), MinValue());
+real_range = MaxAbs(*inputs[1].data().dptr(), 
*inputs[2].data().dptr());
+  } else if (inputs[0].dtype() == mshadow::kInt8) {
+quantized_range = MinAbs(MaxValue(), MinValue());
+real_range = MaxAbs(*inputs[1].data().dptr(), 
*inputs[2].data().dptr());
+  } else {
+LOG(FATAL) << "mkldnn dequantize op only supports int8 and uint8 as output 
type";
+  }
+  float scale = real_range / quantized_range;
+  primitive_attr attr;
+  const int mask = 0;
+  std::vector scales = {scale};
 
 Review comment:
   Should use float, since scale is always float type, theoretically,  
dequantize destination type could be other type besides float as well.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-04-22 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r183240942
 
 

 ##
 File path: src/c_api/c_api_symbolic.cc
 ##
 @@ -595,7 +596,12 @@ int MXQuantizeSymbol(SymbolHandle sym_handle,
 offline.emplace(offline_params[i]);
   }
   g.attrs["offline_params"] = std::make_shared(std::move(offline));
-  g = ApplyPass(std::move(g), "QuantizeGraph");
+#if MXNET_USE_MKLDNN == 1
+  if (dev_type == Context::kCPU)
+g = ApplyPass(std::move(g), "QuantizeGraphUnsigned");
+  else
+#endif
+g = ApplyPass(std::move(g), "QuantizeGraph");
 
 Review comment:
   Currently, the main difference is the quantized convolution input type (CPU 
requires uint8 while GPU requires int8) and min/max node index calculation, but 
it's possible more differences in the future so we use a different function.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-04-22 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r183240784
 
 

 ##
 File path: include/mxnet/c_api.h
 ##
 @@ -1409,13 +1409,17 @@ MXNET_DLL int MXSymbolInferType(SymbolHandle sym,
  * \param excluded_symbols array of symbols to be excluded from being quantized
  * \param num_offline number of parameters that are quantized offline
  * \param offline_params array of c strings representing the names of params 
quantized offline
+ * \param dev_type device type 
+ * \param dev_id device id
  */
 MXNET_DLL int MXQuantizeSymbol(SymbolHandle sym_handle,
SymbolHandle *ret_sym_handle,
const mx_uint num_excluded_symbols,
const SymbolHandle *excluded_symbols,
const mx_uint num_offline,
-   const char **offline_params);
+   const char **offline_params,
 
 Review comment:
   Yes, we want to implement different logic for CPU and GPU context so need to 
pass the dev_type.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-04-22 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r183240802
 
 

 ##
 File path: python/mxnet/contrib/quantization.py
 ##
 @@ -72,7 +72,7 @@ def _quantize_params(qsym, params):
 return quantized_params
 
 
-def _quantize_symbol(sym, excluded_symbols=None, offline_params=None):
+def _quantize_symbol(sym, excluded_symbols=None, offline_params=None, 
context=cpu()):
 
 Review comment:
   Yes, that's the reason.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-04-06 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r179790587
 
 

 ##
 File path: src/operator/nn/mkldnn/mkldnn_base.cc
 ##
 @@ -211,6 +211,7 @@ mkldnn_memory_format_t 
GetDefaultFormat(mkldnn::memory::desc desc) {
   case mkldnn_hwio:
   case mkldnn_OIhw8i8o:
   case mkldnn_OIhw16i16o:
+  case mkldnn_OIhw4i16o4i:
 
 Review comment:
   Should remove this un-related change from this PR?


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN support for model quantization

2018-04-06 Thread GitBox
jinhuang415 commented on a change in pull request #10433: [MXNET-290] MKLDNN 
support for model quantization
URL: https://github.com/apache/incubator-mxnet/pull/10433#discussion_r179790862
 
 

 ##
 File path: src/operator/nn/pooling.cc
 ##
 @@ -368,7 +368,11 @@ height, width)*.
 })
 .set_attr("FListOutputNames",
 [](const NodeAttrs& attrs) {
-  return std::vector{"output"};
+  const PoolingParam  = nnvm::get(attrs.parsed);
+  if (GetNumOutputs(param) == 2)
+return std::vector{"output", "workspace"};
+  else
+return std::vector{"output"};
 
 Review comment:
   This change is already included in PR #10410, better remove from this PR?


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services