This is an automated email from the ASF dual-hosted git repository.

patriczhao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new ce62873  Add quantization support for GluonCV (#15754)
ce62873 is described below

commit ce62873ae24299242a9c809fd7b83060a0374163
Author: Xinyu Chen <[email protected]>
AuthorDate: Wed Aug 7 13:29:43 2019 +0800

    Add quantization support for GluonCV (#15754)
    
    * enhance quantization api
    
    * integrate gluoncv solution
    
    * support gluon ssd
    
    * enhance api
    
    * [TODO]split to another PR
    
    * enhance example script
    
    * add wildcard match for exclude layers
    
    * support int8 dtype parameter
    
    * enable dataiter api
    
    * use try method
    
    * add unit test for quantize gluon
    
    * fix lint
    
    * fix lint 2
    
    * fix temporary directory in python2
    
    * fix lint
    
    * fix try import and add todo
    
    * trigger
---
 python/mxnet/contrib/quantization.py           | 212 ++++++++++++++++++++++++-
 tests/python/mkl/test_quantization_mkldnn.py   |   5 +-
 tests/python/quantization/test_quantization.py |  48 ++++++
 3 files changed, 261 insertions(+), 4 deletions(-)

diff --git a/python/mxnet/contrib/quantization.py 
b/python/mxnet/contrib/quantization.py
index fa2ab18..262cc07 100644
--- a/python/mxnet/contrib/quantization.py
+++ b/python/mxnet/contrib/quantization.py
@@ -26,6 +26,7 @@ except ImportError:
 import ctypes
 import logging
 import os
+import shutil
 import numpy as np
 from ..base import _LIB, check_call, py_str
 from ..base import c_array, c_str, mx_uint, c_str_array
@@ -34,8 +35,9 @@ from ..symbol import Symbol
 from ..symbol import load as sym_load
 from .. import ndarray
 from ..ndarray import load as nd_load
+from ..ndarray import save as nd_save
 from ..ndarray import NDArray
-from ..io import DataIter
+from ..io import DataIter, DataDesc, DataBatch
 from ..context import cpu, Context
 from ..module import Module
 
@@ -420,6 +422,44 @@ def _load_params(params, logger=logging):
         raise ValueError('Unsupported params provided. Must be either a path 
to the param file or'
                          ' a pair of dictionaries representing arg_params and 
aux_params')
 
+# pylint: disable=super-init-not-called
+class _DataIterWrapper(DataIter):
+    """DataIter wrapper for general iterator, e.g., gluon dataloader"""
+    def __init__(self, calib_data):
+        self._data = calib_data
+        try:
+            calib_iter = iter(calib_data)
+        except TypeError as e:
+            raise TypeError('calib_data is not a valid iterator. 
{}'.format(str(e)))
+        data_example = next(calib_iter)
+        if isinstance(data_example, (list, tuple)):
+            data_example = list(data_example)
+        else:
+            data_example = [data_example]
+        # suppose there must be one label in data_example
+        num_data = len(data_example)
+        assert num_data > 0
+        self.provide_data = [DataDesc(name='data', 
shape=(data_example[0].shape))]
+        self.provide_data += [DataDesc(name='data{}'.format(i), shape=x.shape) 
for i, x in enumerate(data_example[1:])]
+        self.batch_size = data_example[0].shape[0]
+        self.reset()
+
+    def reset(self):
+        self._iter = iter(self._data)
+
+    def next(self):
+        return DataBatch(data=next(self._iter))
+# pylint: enable=super-init-not-called
+
+def _as_data_iter(calib_data):
+    """Convert normal iterator to mx.io.DataIter while parsing the 
data_shapes"""
+    if isinstance(calib_data, DataIter):
+        # already validated DataIter, just return
+        return calib_data, calib_data.provide_data
+
+    calib_data = _DataIterWrapper(calib_data)
+    return calib_data, calib_data.provide_data
+
 def quantize_model(sym, arg_params, aux_params,
                    data_names=('data',), label_names=('softmax_label',),
                    ctx=cpu(), excluded_sym_names=None, calib_mode='entropy',
@@ -780,3 +820,173 @@ def calib_graph(qsym, arg_params, aux_params, collector,
     qarg_params = _quantize_params(qsym, arg_params, th_dict)
 
     return qsym, qarg_params, aux_params
+
+def quantize_net(network, quantized_dtype='auto', exclude_layers=None, 
exclude_layers_match=None, calib_data=None,
+                 data_shapes=None, calib_mode='none', num_calib_examples=None, 
ctx=cpu(), logger=logging):
+    """User-level API for Gluon users to generate a quantized SymbolBlock from 
a FP32 HybridBlock w/ or w/o calibration.
+    The backend quantized operators are only enabled for Linux systems. Please 
do not run
+    inference using the quantized models on Windows for now.
+    The quantization implementation adopts the TensorFlow's approach:
+    https://www.tensorflow.org/performance/quantization.
+    The calibration implementation borrows the idea of Nvidia's 8-bit 
Inference with TensorRT:
+    
http://on-demand.gputechconf.com/gtc/2017/presentation/s7310-8-bit-inference-with-tensorrt.pdf
+    and adapts the method to MXNet.
+
+    Parameters
+    ----------
+    network : Gluon HybridBlock
+        Defines the structure of a neural network for FP32 data types.
+    quantized_dtype : str
+        The quantized destination type for input data. Currently support 'int8'
+        , 'uint8' and 'auto'. 'auto' means automatically select output type 
according to calibration result.
+        Default value is 'int8'.
+    exclude_layers : list of strings
+        A list of strings representing the names of the symbols that users 
want to excluding
+    exclude_layers_match : list of strings
+        A list of strings wildcard matching the names of the symbols that 
users want to excluding
+        from being quantized.
+    calib_data : mx.io.DataIter or gluon.DataLoader
+        A iterable data loading object.
+    data_shapes : list
+        List of DataDesc, required if calib_data is not provided
+    calib_mode : str
+        If calib_mode='none', no calibration will be used and the thresholds 
for
+        requantization after the corresponding layers will be calculated at 
runtime by
+        calling min and max operators. The quantized models generated in this
+        mode are normally 10-20% slower than those with calibrations during 
inference.
+        If calib_mode='naive', the min and max values of the layer outputs 
from a calibration
+        dataset will be directly taken as the thresholds for quantization.
+        If calib_mode='entropy' (default mode), the thresholds for 
quantization will be
+        derived such that the KL divergence between the distributions of FP32 
layer outputs and
+        quantized layer outputs is minimized based upon the calibration 
dataset.
+    calib_layer : function
+        Given a layer's output name in string, return True or False for 
deciding whether to
+        calibrate this layer. If yes, the statistics of the layer's output 
will be collected;
+        otherwise, no information of the layer's output will be collected. If 
not provided,
+        all the layers' outputs that need requantization will be collected.
+    num_calib_examples : int or None
+        The maximum number of examples that user would like to use for 
calibration. If not provided,
+        the whole calibration dataset will be used.
+    ctx : Context
+        Defines the device that users want to run forward propagation on the 
calibration
+        dataset for collecting layer output statistics. Currently, only 
supports single context.
+    logger : Object
+        A logging object for printing information during the process of 
quantization.
+
+    Returns
+    -------
+    network : Gluon SymbolBlock
+        Defines the structure of a neural network for INT8 data types.
+    -------
+    """
+
+    logger.info('Export HybridBlock')
+    network.hybridize()
+    import mxnet as mx
+    if calib_data is not None:
+        if isinstance(calib_data, DataIter):
+            dshapes = calib_data.provide_data
+        else:
+            calib_data, dshapes = _as_data_iter(calib_data)
+    if not data_shapes:
+        data_shapes = dshapes
+    if not data_shapes:
+        raise ValueError('data_shapes required')
+    data_nd = []
+    for shape in data_shapes:
+        data_nd.append(mx.nd.zeros(shape.shape))
+    while True:
+        try:
+            network(*data_nd)
+        except TypeError:
+            del data_nd[-1]
+            del calib_data.provide_data[-1]
+            continue
+        else:
+            break
+
+    import tempfile
+    try:
+        from tempfile import TemporaryDirectory
+    except ImportError:
+        # really simple implementation of TemporaryDirectory
+        class TemporaryDirectory(object):
+            def __init__(self, suffix='', prefix='', dir=''):
+                self._dirname = tempfile.mkdtemp(suffix, prefix, dir)
+
+            def __enter__(self):
+                return self._dirname
+
+            def __exit__(self, exc_type, exc_value, traceback):
+                shutil.rmtree(self._dirname)
+    # TODO(xinyu-intel): tmp solution to save and reload for mxnet.mod.Module.
+    # will enhance `export` function to return `sym, args, auxs` directly.
+    with TemporaryDirectory() as tmpdirname:
+        prefix = os.path.join(tmpdirname, 'tmp')
+        network.export(prefix, epoch=0)
+        symnet, args, auxs = mx.model.load_checkpoint(prefix, 0)
+
+    if exclude_layers is None:
+        exclude_layers = []
+    if exclude_layers_match is None:
+        exclude_layers_match = []
+    for name_match in exclude_layers_match:
+        for layers in list(symnet.get_internals()):
+            if layers.name.find(name_match) != -1:
+                exclude_layers.append(layers.name)
+    logger.info('These layers have been excluded %s' % exclude_layers)
+
+    if ctx == mx.cpu():
+        symnet = symnet.get_backend_symbol('MKLDNN_QUANTIZE')
+
+    qsym, qarg_params, aux_params, collector = quantize_graph(
+        sym=symnet, arg_params=args, aux_params=auxs, 
excluded_sym_names=exclude_layers,
+        calib_mode=calib_mode, calib_layer=None, 
quantized_dtype=quantized_dtype, logger=logger)
+
+    if calib_mode is not None and calib_mode != 'none':
+        if not isinstance(ctx, Context):
+            raise ValueError(
+                'currently only supports single ctx, while received %s' % 
str(ctx))
+        if calib_data is None:
+            raise ValueError(
+                'calib_data must be provided when calib_mode=%s' % calib_mode)
+        if calib_mode in ['naive', 'entropy']:
+            data_names = [pair[0] for pair in calib_data.provide_data]
+            mod = Module(symbol=symnet, context=ctx,
+                         data_names=data_names, label_names=None)
+            mod.bind(for_training=False, data_shapes=data_shapes)
+            mod.set_params(args, auxs, allow_missing=False, force_init=True)
+            num_examples = _collect_layer_statistics(mod, calib_data, 
collector,
+                                                     num_calib_examples, 
logger)
+            logger.info('Collected layer output values from FP32 model using 
%d examples'
+                        % num_examples)
+            qsym, qarg_params, aux_params = calib_graph(
+                qsym=qsym, arg_params=args, aux_params=auxs, 
collector=collector,
+                calib_mode=calib_mode, quantized_dtype=quantized_dtype, 
logger=logger)
+        else:
+            raise ValueError(
+                'please set calibration mode to naive or entropy.')
+    elif calib_mode is not None and calib_mode == 'none':
+        data_names = [pair[0] for pair in data_shapes]
+
+    if ctx == mx.cpu():
+        qsym = qsym.get_backend_symbol('MKLDNN_QUANTIZE')
+
+    from ..gluon import SymbolBlock
+    data_sym = []
+    for name in data_names:
+        data_sym.append(mx.sym.var(name))
+    net = SymbolBlock(qsym, data_sym)
+    # TODO(xinyu-intel): tmp solution to save param_dict and reload for 
SymbolBlock
+    # will enhance SymbolBlock to load args, auxs directly.
+    with TemporaryDirectory() as tmpdirname:
+        prefix = os.path.join(tmpdirname, 'tmp')
+        param_name = '%s-%04d.params' % (prefix + 'net-quantized', 0)
+        save_dict = {('arg:%s' % k): v.as_in_context(cpu())
+                     for k, v in qarg_params.items()}
+        save_dict.update({('aux:%s' % k): v.as_in_context(cpu())
+                          for k, v in aux_params.items()})
+        nd_save(param_name, save_dict)
+        net.collect_params().load(param_name, cast_dtype=True, 
dtype_source='saved')
+        net.collect_params().reset_ctx(ctx)
+    return net
diff --git a/tests/python/mkl/test_quantization_mkldnn.py 
b/tests/python/mkl/test_quantization_mkldnn.py
index 3c8cee4..8ba2f2b 100644
--- a/tests/python/mkl/test_quantization_mkldnn.py
+++ b/tests/python/mkl/test_quantization_mkldnn.py
@@ -27,6 +27,5 @@ from test_quantization import *
 if __name__ == '__main__':
     import nose
     nose.runmodule()
-
-del os.environ['ENABLE_MKLDNN_QUANTIZATION_TEST']
-del os.environ['MXNET_SUBGRAPH_BACKEND']
+    del os.environ['ENABLE_MKLDNN_QUANTIZATION_TEST']
+    del os.environ['MXNET_SUBGRAPH_BACKEND']
diff --git a/tests/python/quantization/test_quantization.py 
b/tests/python/quantization/test_quantization.py
index 294e107..a991417 100644
--- a/tests/python/quantization/test_quantization.py
+++ b/tests/python/quantization/test_quantization.py
@@ -21,6 +21,7 @@ Ref: 
http://images.nvidia.com/content/pdf/tesla/184457-Tesla-P4-Datasheet-NV-Fin
 import os
 import mxnet as mx
 import numpy as np
+from mxnet.gluon.model_zoo import vision
 from mxnet.test_utils import assert_almost_equal, assert_exception, 
rand_ndarray, rand_shape_nd, same, DummyIter
 from common import with_seed
 from mxnet.module import Module
@@ -898,6 +899,53 @@ def test_quantize_model_with_forward():
     for qdtype in ['int8', 'uint8']:
         check_quantize_model(qdtype)
 
+@with_seed()
+def test_quantize_gluon_with_forward():
+    def check_quantize_net(qdtype):
+        if is_test_for_native_cpu():
+            print('skipped testing test_quantize_model_with_forward for native 
cpu since it is not supported yet')
+            return
+        elif qdtype == 'uint8' and is_test_for_gpu():
+            print('skipped testing test_quantize_model_with_forward for gpu 
uint8 since it is not supported yet')
+            return
+
+        data_shape = (32, 3, 224, 224)
+        data_shapes = [mx.io.DataDesc(name='data', shape=data_shape)]
+        label_shape = (32, 1)
+        batch_size = 1
+        resnet18_v1 = vision.resnet18_v1(pretrained=True)
+        resnet18_v1.collect_params().reset_ctx(mx.current_context())
+        excluded_names_match = []
+        if mx.current_context() == mx.gpu():
+            excluded_names_match += ['activation', 'relu', 'conv0']
+        num_calib_examples = 5
+
+        random_data = mx.random.uniform(shape=data_shape)
+        random_label = mx.random.uniform(shape=label_shape)
+        dataset = mx.gluon.data.dataset.ArrayDataset(random_data, random_label)
+        calib_data = mx.gluon.data.DataLoader(dataset, batch_size=batch_size)
+
+        quantized_resnet18_v1 = mx.contrib.quant.quantize_net(resnet18_v1, 
quantized_dtype=qdtype,
+                                                              
exclude_layers=None,
+                                                              
exclude_layers_match=excluded_names_match,
+                                                              
calib_mode='none',
+                                                              
data_shapes=data_shapes,
+                                                              
ctx=mx.current_context())
+        quantized_resnet18_v1.hybridize(static_alloc=True, static_shape=True)
+        quantized_resnet18_v1(random_data)
+
+        quantized_resnet18_v1 = mx.contrib.quant.quantize_net(resnet18_v1, 
quantized_dtype=qdtype,
+                                                              
exclude_layers=None,
+                                                              
exclude_layers_match=excluded_names_match,
+                                                              
calib_data=calib_data,
+                                                              
calib_mode='naive',
+                                                              
num_calib_examples=num_calib_examples,
+                                                              
ctx=mx.current_context())
+        quantized_resnet18_v1.hybridize(static_alloc=True, static_shape=True)
+        quantized_resnet18_v1(random_data)
+
+    for qdtype in ['int8', 'uint8']:
+        check_quantize_net(qdtype)
 
 @with_seed()
 def test_quantize_sym_with_calib():

Reply via email to