zhreshold commented on a change in pull request #15754: Add quantization 
support for GluonCV
URL: https://github.com/apache/incubator-mxnet/pull/15754#discussion_r310776345
 
 

 ##########
 File path: python/mxnet/contrib/quantization.py
 ##########
 @@ -780,3 +820,170 @@ def calib_graph(qsym, arg_params, aux_params, collector,
     qarg_params = _quantize_params(qsym, arg_params, th_dict)
 
     return qsym, qarg_params, aux_params
+
+def quantize_net(network, quantized_dtype='auto', exclude_layers=None, 
exclude_layers_match=None, calib_data=None,
+                 data_shapes=None, calib_mode='none', num_calib_examples=None, 
ctx=cpu(), logger=logging):
+    """User-level API for Gluon users to generate a quantized SymbolBlock from 
a FP32 HybridBlock w/ or w/o calibration.
+    The backend quantized operators are only enabled for Linux systems. Please 
do not run
+    inference using the quantized models on Windows for now.
+    The quantization implementation adopts the TensorFlow's approach:
+    https://www.tensorflow.org/performance/quantization.
+    The calibration implementation borrows the idea of Nvidia's 8-bit 
Inference with TensorRT:
+    
http://on-demand.gputechconf.com/gtc/2017/presentation/s7310-8-bit-inference-with-tensorrt.pdf
+    and adapts the method to MXNet.
+
+    Parameters
+    ----------
+    network : Gluon HybridBlock
+        Defines the structure of a neural network for FP32 data types.
+    quantized_dtype : str
+        The quantized destination type for input data. Currently support 'int8'
+        , 'uint8' and 'auto'. 'auto' means automatically select output type 
according to calibration result.
+        Default value is 'int8'.
+    exclude_layers : list of strings
+        A list of strings representing the names of the symbols that users 
want to excluding
+    exclude_layers_match : list of strings
+        A list of strings wildcard matching the names of the symbols that 
users want to excluding
+        from being quantized.
+    calib_data : mx.io.DataIter or gluon.DataLoader
+        A iterable data loading object.
+    data_shapes : list
+        List of DataDesc, required if calib_data is not provided
+    calib_mode : str
+        If calib_mode='none', no calibration will be used and the thresholds 
for
+        requantization after the corresponding layers will be calculated at 
runtime by
+        calling min and max operators. The quantized models generated in this
+        mode are normally 10-20% slower than those with calibrations during 
inference.
+        If calib_mode='naive', the min and max values of the layer outputs 
from a calibration
+        dataset will be directly taken as the thresholds for quantization.
+        If calib_mode='entropy' (default mode), the thresholds for 
quantization will be
+        derived such that the KL divergence between the distributions of FP32 
layer outputs and
+        quantized layer outputs is minimized based upon the calibration 
dataset.
+    calib_layer : function
+        Given a layer's output name in string, return True or False for 
deciding whether to
+        calibrate this layer. If yes, the statistics of the layer's output 
will be collected;
+        otherwise, no information of the layer's output will be collected. If 
not provided,
+        all the layers' outputs that need requantization will be collected.
+    num_calib_examples : int or None
+        The maximum number of examples that user would like to use for 
calibration. If not provided,
+        the whole calibration dataset will be used.
+    ctx : Context
+        Defines the device that users want to run forward propagation on the 
calibration
+        dataset for collecting layer output statistics. Currently, only 
supports single context.
+    logger : Object
+        A logging object for printing information during the process of 
quantization.
+
+    Returns
+    -------
+    network : Gluon SymbolBlock
+        Defines the structure of a neural network for INT8 data types.
+    -------
+    """
+
+    logger.info('Export HybridBlock')
+    network.hybridize()
+    import mxnet as mx
+    if calib_data is not None:
+        if isinstance(calib_data, DataIter):
+            dshapes = calib_data.provide_data
+        else:
+            calib_data, dshapes = _as_data_iter(calib_data)
+    if not data_shapes:
+        data_shapes = dshapes
+    if not data_shapes:
+        raise ValueError('data_shapes required')
+    data_nd = []
+    for shape in data_shapes:
+        data_nd.append(mx.nd.zeros(shape.shape))
+    while True:
+        try:
+            network(*data_nd)
+        except TypeError:
+            del data_nd[-1]
+            del calib_data.provide_data[-1]
+            continue
+        else:
+            break
+
+    import tempfile
+    try:
+        from tempfile import TemporaryDirectory
+    except AttributeError:
+        # really simple implementation of TemporaryDirectory
+        class TemporaryDirectory(object):
+            def __init__(self, suffix='', prefix='', dir=''):
+                self._dirname = tempfile.mkdtemp(suffix, prefix, dir)
+
+            def __enter__(self):
+                return self._dirname
+
+            def __exit__(self, exc_type, exc_value, traceback):
+                shutil.rmtree(self._dirname)
+
+    with TemporaryDirectory() as tmpdirname:
+        prefix = os.path.join(tmpdirname, 'tmp')
+        network.export(prefix, epoch=0)
 
 Review comment:
   I feel like it's more convenient to allow export return (sym, arg_params, 
aux_params) so we can avoid using temporary file.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to