zhreshold commented on a change in pull request #15754: Add quantization
support for GluonCV
URL: https://github.com/apache/incubator-mxnet/pull/15754#discussion_r310777249
##########
File path: python/mxnet/contrib/quantization.py
##########
@@ -780,3 +820,170 @@ def calib_graph(qsym, arg_params, aux_params, collector,
qarg_params = _quantize_params(qsym, arg_params, th_dict)
return qsym, qarg_params, aux_params
+
+def quantize_net(network, quantized_dtype='auto', exclude_layers=None,
exclude_layers_match=None, calib_data=None,
+ data_shapes=None, calib_mode='none', num_calib_examples=None,
ctx=cpu(), logger=logging):
+ """User-level API for Gluon users to generate a quantized SymbolBlock from
a FP32 HybridBlock w/ or w/o calibration.
+ The backend quantized operators are only enabled for Linux systems. Please
do not run
+ inference using the quantized models on Windows for now.
+ The quantization implementation adopts the TensorFlow's approach:
+ https://www.tensorflow.org/performance/quantization.
+ The calibration implementation borrows the idea of Nvidia's 8-bit
Inference with TensorRT:
+
http://on-demand.gputechconf.com/gtc/2017/presentation/s7310-8-bit-inference-with-tensorrt.pdf
+ and adapts the method to MXNet.
+
+ Parameters
+ ----------
+ network : Gluon HybridBlock
+ Defines the structure of a neural network for FP32 data types.
+ quantized_dtype : str
+ The quantized destination type for input data. Currently support 'int8'
+ , 'uint8' and 'auto'. 'auto' means automatically select output type
according to calibration result.
+ Default value is 'int8'.
+ exclude_layers : list of strings
+ A list of strings representing the names of the symbols that users
want to excluding
+ exclude_layers_match : list of strings
+ A list of strings wildcard matching the names of the symbols that
users want to excluding
+ from being quantized.
+ calib_data : mx.io.DataIter or gluon.DataLoader
+ A iterable data loading object.
+ data_shapes : list
+ List of DataDesc, required if calib_data is not provided
+ calib_mode : str
+ If calib_mode='none', no calibration will be used and the thresholds
for
+ requantization after the corresponding layers will be calculated at
runtime by
+ calling min and max operators. The quantized models generated in this
+ mode are normally 10-20% slower than those with calibrations during
inference.
+ If calib_mode='naive', the min and max values of the layer outputs
from a calibration
+ dataset will be directly taken as the thresholds for quantization.
+ If calib_mode='entropy' (default mode), the thresholds for
quantization will be
+ derived such that the KL divergence between the distributions of FP32
layer outputs and
+ quantized layer outputs is minimized based upon the calibration
dataset.
+ calib_layer : function
+ Given a layer's output name in string, return True or False for
deciding whether to
+ calibrate this layer. If yes, the statistics of the layer's output
will be collected;
+ otherwise, no information of the layer's output will be collected. If
not provided,
+ all the layers' outputs that need requantization will be collected.
+ num_calib_examples : int or None
+ The maximum number of examples that user would like to use for
calibration. If not provided,
+ the whole calibration dataset will be used.
+ ctx : Context
+ Defines the device that users want to run forward propagation on the
calibration
+ dataset for collecting layer output statistics. Currently, only
supports single context.
+ logger : Object
+ A logging object for printing information during the process of
quantization.
+
+ Returns
+ -------
+ network : Gluon SymbolBlock
+ Defines the structure of a neural network for INT8 data types.
+ -------
+ """
+
+ logger.info('Export HybridBlock')
+ network.hybridize()
+ import mxnet as mx
+ if calib_data is not None:
+ if isinstance(calib_data, DataIter):
+ dshapes = calib_data.provide_data
+ else:
+ calib_data, dshapes = _as_data_iter(calib_data)
+ if not data_shapes:
+ data_shapes = dshapes
+ if not data_shapes:
+ raise ValueError('data_shapes required')
+ data_nd = []
+ for shape in data_shapes:
+ data_nd.append(mx.nd.zeros(shape.shape))
+ while True:
+ try:
+ network(*data_nd)
+ except TypeError:
+ del data_nd[-1]
+ del calib_data.provide_data[-1]
+ continue
+ else:
+ break
+
+ import tempfile
+ try:
+ from tempfile import TemporaryDirectory
+ except AttributeError:
+ # really simple implementation of TemporaryDirectory
+ class TemporaryDirectory(object):
+ def __init__(self, suffix='', prefix='', dir=''):
+ self._dirname = tempfile.mkdtemp(suffix, prefix, dir)
+
+ def __enter__(self):
+ return self._dirname
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ shutil.rmtree(self._dirname)
+
+ with TemporaryDirectory() as tmpdirname:
+ prefix = os.path.join(tmpdirname, 'tmp')
+ network.export(prefix, epoch=0)
Review comment:
It's no mandatory and urgent now, but gluoncv also has the same
implementation
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services