This is an automated email from the ASF dual-hosted git repository.
patriczhao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new ce62873 Add quantization support for GluonCV (#15754)
ce62873 is described below
commit ce62873ae24299242a9c809fd7b83060a0374163
Author: Xinyu Chen <[email protected]>
AuthorDate: Wed Aug 7 13:29:43 2019 +0800
Add quantization support for GluonCV (#15754)
* enhance quantization api
* integrate gluoncv solution
* support gluon ssd
* enhance api
* [TODO]split to another PR
* enhance example script
* add wildcard match for exclude layers
* support int8 dtype parameter
* enable dataiter api
* use try method
* add unit test for quantize gluon
* fix lint
* fix lint 2
* fix temporary directory in python2
* fix lint
* fix try import and add todo
* trigger
---
python/mxnet/contrib/quantization.py | 212 ++++++++++++++++++++++++-
tests/python/mkl/test_quantization_mkldnn.py | 5 +-
tests/python/quantization/test_quantization.py | 48 ++++++
3 files changed, 261 insertions(+), 4 deletions(-)
diff --git a/python/mxnet/contrib/quantization.py
b/python/mxnet/contrib/quantization.py
index fa2ab18..262cc07 100644
--- a/python/mxnet/contrib/quantization.py
+++ b/python/mxnet/contrib/quantization.py
@@ -26,6 +26,7 @@ except ImportError:
import ctypes
import logging
import os
+import shutil
import numpy as np
from ..base import _LIB, check_call, py_str
from ..base import c_array, c_str, mx_uint, c_str_array
@@ -34,8 +35,9 @@ from ..symbol import Symbol
from ..symbol import load as sym_load
from .. import ndarray
from ..ndarray import load as nd_load
+from ..ndarray import save as nd_save
from ..ndarray import NDArray
-from ..io import DataIter
+from ..io import DataIter, DataDesc, DataBatch
from ..context import cpu, Context
from ..module import Module
@@ -420,6 +422,44 @@ def _load_params(params, logger=logging):
raise ValueError('Unsupported params provided. Must be either a path
to the param file or'
' a pair of dictionaries representing arg_params and
aux_params')
+# pylint: disable=super-init-not-called
+class _DataIterWrapper(DataIter):
+ """DataIter wrapper for general iterator, e.g., gluon dataloader"""
+ def __init__(self, calib_data):
+ self._data = calib_data
+ try:
+ calib_iter = iter(calib_data)
+ except TypeError as e:
+ raise TypeError('calib_data is not a valid iterator.
{}'.format(str(e)))
+ data_example = next(calib_iter)
+ if isinstance(data_example, (list, tuple)):
+ data_example = list(data_example)
+ else:
+ data_example = [data_example]
+ # suppose there must be one label in data_example
+ num_data = len(data_example)
+ assert num_data > 0
+ self.provide_data = [DataDesc(name='data',
shape=(data_example[0].shape))]
+ self.provide_data += [DataDesc(name='data{}'.format(i), shape=x.shape)
for i, x in enumerate(data_example[1:])]
+ self.batch_size = data_example[0].shape[0]
+ self.reset()
+
+ def reset(self):
+ self._iter = iter(self._data)
+
+ def next(self):
+ return DataBatch(data=next(self._iter))
+# pylint: enable=super-init-not-called
+
+def _as_data_iter(calib_data):
+ """Convert normal iterator to mx.io.DataIter while parsing the
data_shapes"""
+ if isinstance(calib_data, DataIter):
+ # already validated DataIter, just return
+ return calib_data, calib_data.provide_data
+
+ calib_data = _DataIterWrapper(calib_data)
+ return calib_data, calib_data.provide_data
+
def quantize_model(sym, arg_params, aux_params,
data_names=('data',), label_names=('softmax_label',),
ctx=cpu(), excluded_sym_names=None, calib_mode='entropy',
@@ -780,3 +820,173 @@ def calib_graph(qsym, arg_params, aux_params, collector,
qarg_params = _quantize_params(qsym, arg_params, th_dict)
return qsym, qarg_params, aux_params
+
+def quantize_net(network, quantized_dtype='auto', exclude_layers=None,
exclude_layers_match=None, calib_data=None,
+ data_shapes=None, calib_mode='none', num_calib_examples=None,
ctx=cpu(), logger=logging):
+ """User-level API for Gluon users to generate a quantized SymbolBlock from
a FP32 HybridBlock w/ or w/o calibration.
+ The backend quantized operators are only enabled for Linux systems. Please
do not run
+ inference using the quantized models on Windows for now.
+ The quantization implementation adopts the TensorFlow's approach:
+ https://www.tensorflow.org/performance/quantization.
+ The calibration implementation borrows the idea of Nvidia's 8-bit
Inference with TensorRT:
+
http://on-demand.gputechconf.com/gtc/2017/presentation/s7310-8-bit-inference-with-tensorrt.pdf
+ and adapts the method to MXNet.
+
+ Parameters
+ ----------
+ network : Gluon HybridBlock
+ Defines the structure of a neural network for FP32 data types.
+ quantized_dtype : str
+ The quantized destination type for input data. Currently support 'int8'
+ , 'uint8' and 'auto'. 'auto' means automatically select output type
according to calibration result.
+ Default value is 'int8'.
+ exclude_layers : list of strings
+ A list of strings representing the names of the symbols that users
want to excluding
+ exclude_layers_match : list of strings
+ A list of strings wildcard matching the names of the symbols that
users want to excluding
+ from being quantized.
+ calib_data : mx.io.DataIter or gluon.DataLoader
+ A iterable data loading object.
+ data_shapes : list
+ List of DataDesc, required if calib_data is not provided
+ calib_mode : str
+ If calib_mode='none', no calibration will be used and the thresholds
for
+ requantization after the corresponding layers will be calculated at
runtime by
+ calling min and max operators. The quantized models generated in this
+ mode are normally 10-20% slower than those with calibrations during
inference.
+ If calib_mode='naive', the min and max values of the layer outputs
from a calibration
+ dataset will be directly taken as the thresholds for quantization.
+ If calib_mode='entropy' (default mode), the thresholds for
quantization will be
+ derived such that the KL divergence between the distributions of FP32
layer outputs and
+ quantized layer outputs is minimized based upon the calibration
dataset.
+ calib_layer : function
+ Given a layer's output name in string, return True or False for
deciding whether to
+ calibrate this layer. If yes, the statistics of the layer's output
will be collected;
+ otherwise, no information of the layer's output will be collected. If
not provided,
+ all the layers' outputs that need requantization will be collected.
+ num_calib_examples : int or None
+ The maximum number of examples that user would like to use for
calibration. If not provided,
+ the whole calibration dataset will be used.
+ ctx : Context
+ Defines the device that users want to run forward propagation on the
calibration
+ dataset for collecting layer output statistics. Currently, only
supports single context.
+ logger : Object
+ A logging object for printing information during the process of
quantization.
+
+ Returns
+ -------
+ network : Gluon SymbolBlock
+ Defines the structure of a neural network for INT8 data types.
+ -------
+ """
+
+ logger.info('Export HybridBlock')
+ network.hybridize()
+ import mxnet as mx
+ if calib_data is not None:
+ if isinstance(calib_data, DataIter):
+ dshapes = calib_data.provide_data
+ else:
+ calib_data, dshapes = _as_data_iter(calib_data)
+ if not data_shapes:
+ data_shapes = dshapes
+ if not data_shapes:
+ raise ValueError('data_shapes required')
+ data_nd = []
+ for shape in data_shapes:
+ data_nd.append(mx.nd.zeros(shape.shape))
+ while True:
+ try:
+ network(*data_nd)
+ except TypeError:
+ del data_nd[-1]
+ del calib_data.provide_data[-1]
+ continue
+ else:
+ break
+
+ import tempfile
+ try:
+ from tempfile import TemporaryDirectory
+ except ImportError:
+ # really simple implementation of TemporaryDirectory
+ class TemporaryDirectory(object):
+ def __init__(self, suffix='', prefix='', dir=''):
+ self._dirname = tempfile.mkdtemp(suffix, prefix, dir)
+
+ def __enter__(self):
+ return self._dirname
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ shutil.rmtree(self._dirname)
+ # TODO(xinyu-intel): tmp solution to save and reload for mxnet.mod.Module.
+ # will enhance `export` function to return `sym, args, auxs` directly.
+ with TemporaryDirectory() as tmpdirname:
+ prefix = os.path.join(tmpdirname, 'tmp')
+ network.export(prefix, epoch=0)
+ symnet, args, auxs = mx.model.load_checkpoint(prefix, 0)
+
+ if exclude_layers is None:
+ exclude_layers = []
+ if exclude_layers_match is None:
+ exclude_layers_match = []
+ for name_match in exclude_layers_match:
+ for layers in list(symnet.get_internals()):
+ if layers.name.find(name_match) != -1:
+ exclude_layers.append(layers.name)
+ logger.info('These layers have been excluded %s' % exclude_layers)
+
+ if ctx == mx.cpu():
+ symnet = symnet.get_backend_symbol('MKLDNN_QUANTIZE')
+
+ qsym, qarg_params, aux_params, collector = quantize_graph(
+ sym=symnet, arg_params=args, aux_params=auxs,
excluded_sym_names=exclude_layers,
+ calib_mode=calib_mode, calib_layer=None,
quantized_dtype=quantized_dtype, logger=logger)
+
+ if calib_mode is not None and calib_mode != 'none':
+ if not isinstance(ctx, Context):
+ raise ValueError(
+ 'currently only supports single ctx, while received %s' %
str(ctx))
+ if calib_data is None:
+ raise ValueError(
+ 'calib_data must be provided when calib_mode=%s' % calib_mode)
+ if calib_mode in ['naive', 'entropy']:
+ data_names = [pair[0] for pair in calib_data.provide_data]
+ mod = Module(symbol=symnet, context=ctx,
+ data_names=data_names, label_names=None)
+ mod.bind(for_training=False, data_shapes=data_shapes)
+ mod.set_params(args, auxs, allow_missing=False, force_init=True)
+ num_examples = _collect_layer_statistics(mod, calib_data,
collector,
+ num_calib_examples,
logger)
+ logger.info('Collected layer output values from FP32 model using
%d examples'
+ % num_examples)
+ qsym, qarg_params, aux_params = calib_graph(
+ qsym=qsym, arg_params=args, aux_params=auxs,
collector=collector,
+ calib_mode=calib_mode, quantized_dtype=quantized_dtype,
logger=logger)
+ else:
+ raise ValueError(
+ 'please set calibration mode to naive or entropy.')
+ elif calib_mode is not None and calib_mode == 'none':
+ data_names = [pair[0] for pair in data_shapes]
+
+ if ctx == mx.cpu():
+ qsym = qsym.get_backend_symbol('MKLDNN_QUANTIZE')
+
+ from ..gluon import SymbolBlock
+ data_sym = []
+ for name in data_names:
+ data_sym.append(mx.sym.var(name))
+ net = SymbolBlock(qsym, data_sym)
+ # TODO(xinyu-intel): tmp solution to save param_dict and reload for
SymbolBlock
+ # will enhance SymbolBlock to load args, auxs directly.
+ with TemporaryDirectory() as tmpdirname:
+ prefix = os.path.join(tmpdirname, 'tmp')
+ param_name = '%s-%04d.params' % (prefix + 'net-quantized', 0)
+ save_dict = {('arg:%s' % k): v.as_in_context(cpu())
+ for k, v in qarg_params.items()}
+ save_dict.update({('aux:%s' % k): v.as_in_context(cpu())
+ for k, v in aux_params.items()})
+ nd_save(param_name, save_dict)
+ net.collect_params().load(param_name, cast_dtype=True,
dtype_source='saved')
+ net.collect_params().reset_ctx(ctx)
+ return net
diff --git a/tests/python/mkl/test_quantization_mkldnn.py
b/tests/python/mkl/test_quantization_mkldnn.py
index 3c8cee4..8ba2f2b 100644
--- a/tests/python/mkl/test_quantization_mkldnn.py
+++ b/tests/python/mkl/test_quantization_mkldnn.py
@@ -27,6 +27,5 @@ from test_quantization import *
if __name__ == '__main__':
import nose
nose.runmodule()
-
-del os.environ['ENABLE_MKLDNN_QUANTIZATION_TEST']
-del os.environ['MXNET_SUBGRAPH_BACKEND']
+ del os.environ['ENABLE_MKLDNN_QUANTIZATION_TEST']
+ del os.environ['MXNET_SUBGRAPH_BACKEND']
diff --git a/tests/python/quantization/test_quantization.py
b/tests/python/quantization/test_quantization.py
index 294e107..a991417 100644
--- a/tests/python/quantization/test_quantization.py
+++ b/tests/python/quantization/test_quantization.py
@@ -21,6 +21,7 @@ Ref:
http://images.nvidia.com/content/pdf/tesla/184457-Tesla-P4-Datasheet-NV-Fin
import os
import mxnet as mx
import numpy as np
+from mxnet.gluon.model_zoo import vision
from mxnet.test_utils import assert_almost_equal, assert_exception,
rand_ndarray, rand_shape_nd, same, DummyIter
from common import with_seed
from mxnet.module import Module
@@ -898,6 +899,53 @@ def test_quantize_model_with_forward():
for qdtype in ['int8', 'uint8']:
check_quantize_model(qdtype)
+@with_seed()
+def test_quantize_gluon_with_forward():
+ def check_quantize_net(qdtype):
+ if is_test_for_native_cpu():
+ print('skipped testing test_quantize_model_with_forward for native
cpu since it is not supported yet')
+ return
+ elif qdtype == 'uint8' and is_test_for_gpu():
+ print('skipped testing test_quantize_model_with_forward for gpu
uint8 since it is not supported yet')
+ return
+
+ data_shape = (32, 3, 224, 224)
+ data_shapes = [mx.io.DataDesc(name='data', shape=data_shape)]
+ label_shape = (32, 1)
+ batch_size = 1
+ resnet18_v1 = vision.resnet18_v1(pretrained=True)
+ resnet18_v1.collect_params().reset_ctx(mx.current_context())
+ excluded_names_match = []
+ if mx.current_context() == mx.gpu():
+ excluded_names_match += ['activation', 'relu', 'conv0']
+ num_calib_examples = 5
+
+ random_data = mx.random.uniform(shape=data_shape)
+ random_label = mx.random.uniform(shape=label_shape)
+ dataset = mx.gluon.data.dataset.ArrayDataset(random_data, random_label)
+ calib_data = mx.gluon.data.DataLoader(dataset, batch_size=batch_size)
+
+ quantized_resnet18_v1 = mx.contrib.quant.quantize_net(resnet18_v1,
quantized_dtype=qdtype,
+
exclude_layers=None,
+
exclude_layers_match=excluded_names_match,
+
calib_mode='none',
+
data_shapes=data_shapes,
+
ctx=mx.current_context())
+ quantized_resnet18_v1.hybridize(static_alloc=True, static_shape=True)
+ quantized_resnet18_v1(random_data)
+
+ quantized_resnet18_v1 = mx.contrib.quant.quantize_net(resnet18_v1,
quantized_dtype=qdtype,
+
exclude_layers=None,
+
exclude_layers_match=excluded_names_match,
+
calib_data=calib_data,
+
calib_mode='naive',
+
num_calib_examples=num_calib_examples,
+
ctx=mx.current_context())
+ quantized_resnet18_v1.hybridize(static_alloc=True, static_shape=True)
+ quantized_resnet18_v1(random_data)
+
+ for qdtype in ['int8', 'uint8']:
+ check_quantize_net(qdtype)
@with_seed()
def test_quantize_sym_with_calib():