This is an automated email from the ASF dual-hosted git repository.

haibin pushed a change to branch fit-api
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git.


 discard 588730c  [Fit-API] Adress PR comments (#14885)
    omit 33e8845  [Fit API] update estimator (#14849)
    omit 0748b47  [MXNET-1396][Fit-API] Update default handler logic (#14765)
    omit 7e10355  [Fit API] improve event handlers (#14685)
    omit d4bf975  move to gluon contrib (#14635)
    omit ffad48a  move estimator to contrib (#14633)
    omit d76234b  [MXNET-1344, 1346][FIT API] Retrieve Batch size and Logging 
verbose support for Gluon fit() API (#14587)
    omit 213ee5f  [MXNet-1343][Fit API]Add CNN integration test for fit() API 
(#14405)
    omit 3629611  [MXNet-1375][Fit API]Added RNN integration test for fit() API 
(#14547)
    omit 42a7061  [MXNet-1340][Fit API]Update train stats (#14494)
    omit 8c8d015  [MXNet-1349][Fit API]Add validation support and unit tests 
for fit() API (#14442)
    omit 7f06bfa  Fixed issue where the estimator was printing beyond the 
dataset size … (#14464)
    omit f250e1e  [MXNet-1334][Fit API]base class for estimator and 
eventhandler (#14346)
     add b68f18c  Fix dockerized GPU builds in dev_menu (#14603)
     add dab4ffc  replace std::random_shuffle to std::shuffle (#14523)
     add 3781816  Add exception handling support for waitall (#14397)
     add daabe5c  split_and_load can now handle num_ctx > num_data. Issue 
#13909 (#14607)
     add 1dafa1a  Comment out test_unix_python3_tensorrt_gpu step (#14642)
     add 9dda62f  The folder python-howto was removed in an earlier commit. The 
reference to that folder was not removed. Making a PR to remove the reference 
to this folder to keep documents consistent (#14573)
     add 4530ad8  Fix aspect ratio sampling for RandomResizedCrop (#14585)
     add 74e71e9  [MXNET-400] support string type for kvstore key in 
cpp-package (#10792)
     add 733e54c  Added repeats for github status updates (#14530)
     add 9443ae6  Fix warning on macro expansion using defined. (#14598)
     add c6516cc  [MXNET-1359] Adds a multiclass-MCC metric derived from 
Pearson (#14461)
     add d596b59  Support SSD f32/int8 evaluation on COCO dataset (#14646)
     add fde4963  [WIP][Dependency Update] Upgrade the libtiff to 4.0.10 
(#14623)
     add 8f0878c  [MXNET-1287] Miscellaneous Scala warning fixes (#14658)
     add 2c5d7f7  Optimize transpose operator with MKL-DNN (#14545)
     add 52e2e8e  Fix scalastyle (#14669)
     add e701e71  Avoid secondary deployment of package to local (#14647)
     add 26b14bc  [MXNET-1287] Up scala comp (#14667)
     add 596ef3a  Add nd.power and sym.pow (#14606)
     add 6a93bda  Fix scaladoc scalastyle violations in Infer package (#14671)
     add 0284236  [Clojure] enhance draw bounding box (#14567)
     add 800590e  Add MXEnginePushAsync and MXEnginePushSync C APIs (#14615)
     add c788804  Updates tolerances for test_layer_bidirectional (#14682)
     add 30d479f  Tweak the copy for the cudnn autotuning warning. (#14680)
     add 5fc5c27  Fix profiler check (#14677)
     add 1c49e40  Change RNN OP to stateful (#14476)
     add c437d5b  use mkl sparse matrix to improve performance (#14492)
     add 273ebc7  [DEP] upgrade dmlc-core (#14510)
     add a5db391  [Clojure] Add methods based on NDArrayAPI/SymbolAPI (#14195)
     add c2ba51b  [Clojure] Clojure BERT QA example (#14691)
     add b3b952f  fp16 safe norm operator (#14616)
     add f90d1c0  Use ubuntu_rat container for rat check (#14678)
     add 413fe97  Avoid uneccesary vector copies in imperative_utils.cc (#14665)
     add 1f84682  Properly handling custom op exception by modify engine 
(#14693)
     add 52a3553  [docstring] improve docstring and indentation in `module.clj` 
(#14705)
     add 3f3ba92  [numpy] Support zero-dim and zero-size tensors in MXNet 
(#14661)
     add 51d3291  Updated docs for R-package installation (#14269)
     add 8e04b88  Update inception_inference.cpp (#14674)
     add ff04de0  Add vim-nox to ci/docker/install/ubuntu_core.sh (#14632)
     add a26ad37  Disable USE_GPERFTOOLS (#14711)
     add 3e2f752  Reference engine from chunk via weak pointer (#14591)
     add 42e929b  Fix spelling in threaded_engine_test (#14709)
     add 3b23c2d  Fix documentation for bilinear upsampling and add unit test 
(#14035)
     add a083a61  [MKLDNN]Improve quantizeV2 and dequantize latency (#14641)
     add 0da4b67  [MKLDNN]Add quantized relu (#14604)
     add 18d4051  Add publish test of PyPi cu100mkl (#14637)
     add 93238a2  [contrib][op] fix MultiBoxPrior confusing results if first 
ratio is not 1.0 (#13763)
     add 391a1be  Set idx2name for Optimizer object (#14703)
     add 818be02  [MXNET-1377] Add static-dependencies licenses (#14726)
     add 5b6e25b  [MXNET-1287] Feat dep (#14668)
     add dc48cd2  License Googletest and Appendix (#14687)
     add 153d2f4  [MXNET-1385] Improved Scala Init and Macros warning messages 
(#14656)
     add 5331933  Reenable TensorRT step (#14654)
     add dd1004b  fix pi instructions (#14746)
     add 100586a  Change size_t to int within for loop to fix windows build 
error (#14740)
     add 68efc15  fix custom op fork test (#14753)
     add 494c29e  [BUGFIX] fix ELU function will appear nan when calculating 
the gradient (#14673)
     add da7fff7  fix min max on zero-sized ndarray (#14745)
     add a1b0a3a  [Clojure] Better api docstrings by replacing newlines (#14752)
     add 0f63659  add a compiler flag to use int64 as tensor size (#14570)
     add 3b39c56  fix shape index bug (#14518)
     add 2fd4720  fix acc_type_switch macro with extra tests (#14773)
     add 014ca13  [DOC] Update ubuntu install instructions from source (#14534)
     add 8604c3c  [Mxnet-1397] Support symbolic api for requantize and 
dequantize (#14749)
     add 8cae72e  julia/ndarray: fix flaky test cases for `clamp` (#14776)
     add 587d480  Use DEFAULT macro in C APIs (#14767)
     add 22377ed  Fix GELU backward possible NaN (#14782)
     add acf53fd  Mention additional language bindings and add links (#14798)
     add 3d3803e  Updates python setup.py for recent license changes (#14778)
     add 97e09f2  Improve CMake handling of sse2 and sse3 (#14757)
     add 6aeb97e  change mxnet_option behavior (#14743)
     add 680bade  Use correct stash name when running nightly tests (#14809)
     add 5dd9fa2  [clojure][generator] ndarray/symbol api random merged (#14800)
     add 369b66d  Improve cached_op performance for static mode (#14785)
     add 40e3d7c  data preparation file moved in example (#14781)
     add 6cbc273  Scala/Java Predict API fix #14756 (#14804)
     add 003800c  clean up submodule (#14645)
     add 6c60025  [MKLDNN]Refactor requantize to speed up execution (#14608)
     add c18381d  [MXNET-1398] Enable zero-copy from numpy to MXNet NDArray 
(#14733)
     add 64287dd  Speed up SequenceReverse (#14627)
     add 1af29e9  Fixes for wine detection tutorial (#13886)
     add 3a46980  Use USE_SIGNAL_HANDLER by default set to ON in CMakeLists.txt 
(#14599)
     add 07aef13  Add unpooled gpu memory type (#14716)
     add 1238aa0  Revert "use mkl sparse matrix to improve performance 
(#14492)" (#14806)
     add c7577e5  added extraction/generation of diagonal and triangonal 
matrices to linalg (#14501)
     add 5fda0a5  add clojure tutorials to index (#14814)
     add 488fad2  Fix iterator over symbol when multiple children have the same 
name (#14597)
     add cdd7087  Fix Clojure BERT example's context argument (#14843)
     add 84c1635  [MKLDNN] add quantized sum (#14614)
     add bde1b84  [int8] Add MobileNetV2_1.0 & ResNet18 Quantization (#14823)
     add 1c874cf  reformat trt to use subgraph API, add fp16 support (#14040)
     add 5e5a59e  Upgrade Pylint version to 2.3.1 (#14807)
     add 977e558  Make docblocks for Gluon BatchNorm and SyncBatchNorm 
consistent with the code (#14840)
     add 381a9da  Print reproduction command on CI failure (#14815)
     add e17b7e2  [MXNET-13578] Fix cmake installation failed (#14692)
     add 36c3306  Update base CUDA image for CI to v10.0 cuDNN 7.3.1 (#14513)
     add 1540a84  [Clojure] Remove unneeded test files (#14813)
     add 372f531  [DEV] update code owner (#14862)
     add 204f3f2  Revert "Improve cached_op performance for static mode 
(#14785)" (#14868)
     add d09f68a  Update lstm_crf.py (#14865)
     add 5ba285b  Fix sample_multinomial number of outputs bug (#14873)
     add 4d7bae1  Add the Gluon Implementation of Deformable Convolution 
(#14810)
     add 621b391  Refactor ImageRecordIter (#14824)
     add 25ba1d1   Prevent crashes for opencv exception and std::exception 
(#14433)
     add b30949f  [MXNet-1211] Factor and "Like" modes in BilinearResize2D 
operator (#13226)
     add 2113cb7  [Clojure] Add Fine Tuning Sentence Pair Classification BERT 
Example (#14769)
     add f6ef206  [Bugfix] Fix layer norm for large input shape (#14870)
     add 42ede50  rewrite test_custom_op_exc (#14878)
     add a722db4  [Dependency Update] Upgrade openssl to 1.1.1b (#14837)
     add 0255dd6  [Dependency Update] Upgrade cuDNN & NCCL (#14884)
     add fdd45cf  Add mkldnn_version.h to pip package (#14899)
     add 5bda980  fix add_n bug: when input mem overlap with output mem, 
results is wrong (#14889)
     add 08895b7  Fix the return type of sparse.clip operator (#14856)
     add 527573e  Add support for fast variable-length LSTM (#14208)
     add 0ddef13  Revert "[Dependency Update] Upgrade cuDNN & NCCL (#14884)" 
(#14910)
     add 2e03e9f  Adds additional CUDA build environments (#14909)
     add 8dddac0  [MXNET-1400] adding tests cases to verify large tensor 
support for depth_to_space and space_to_depth (#14797)
     add da02488  upgrade the libpng to 1.6.35 (#14620)
     add 13f81a0  Improve dev_menu virtualenv handling (#14788)
     add 874fb89  Add API documentation for upsampling operator with examples 
(#14919)
     add f4598e7  Pins version of scikit-learn for python2 due to drop in 
support (#14928)
     add 4796851  Deprecate NDArrayCollector and instead use ResourceScope 
(#14780)
     add b22ee95  [MXNET-857] Add initial NVTX profiler implementation (#12328)
     add 8a4ad9f  upgrade the version to 2.0.2 (#14621)
     add 669ab2c  Updates to cudnn package installation (#14923)
     add d577b6f  [MXNET-1352] Allow dynamic shape in while_loop and if 
conditionals (#14393)
     add f67d067  Add numpy linspace (#14927)
     add 13d6ee6  Fix reshape to add in-place back (#14903)
     add 31225485 use mx.context.num_gpus instead of mx.test_utils.list_gpus in 
MF recommender example (#14926)
     add 1eba37a  Re-enable static cached_op optimization (#14931)
     add 99f5f66  Disables TensorRT build step (#14958)
     add f7b7163  Fixed and re-enables TensorRT steps (#14960)
     add 8b7e374  Fixes call to build ubuntu gpu in nightly tests (#14964)
     add 1b4e604  [MXNet-1334][Fit API]base class for estimator and 
eventhandler (#14346)
     add 5b1eb20  Fixed issue where the estimator was printing beyond the 
dataset size … (#14464)
     add 02e7c9b  [MXNet-1349][Fit API]Add validation support and unit tests 
for fit() API (#14442)
     add 92c3c21  [MXNet-1340][Fit API]Update train stats (#14494)
     add b11114a  [MXNet-1375][Fit API]Added RNN integration test for fit() API 
(#14547)
     add ca2d884  [MXNet-1343][Fit API]Add CNN integration test for fit() API 
(#14405)
     add fcee290  [MXNET-1344, 1346][FIT API] Retrieve Batch size and Logging 
verbose support for Gluon fit() API (#14587)
     add 768470e  move estimator to contrib (#14633)
     add 6c455ef  move to gluon contrib (#14635)
     add 900b449  [Fit API] improve event handlers (#14685)
     add 5ac7751  [MXNET-1396][Fit-API] Update default handler logic (#14765)
     add d57a712  [Fit API] update estimator (#14849)
     add 3b17837  [Fit-API] Adress PR comments (#14885)

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (588730c)
            \
             N -- N -- N   refs/heads/fit-api (3b17837)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

No new revisions were added by this update.

Summary of changes:
 .gitignore                                         |    2 +-
 3rdparty/dmlc-core                                 |    2 +-
 3rdparty/googletest                                |    2 +-
 3rdparty/mshadow                                   |    2 +-
 3rdparty/onnx-tensorrt                             |    2 +-
 CMakeLists.txt                                     |   50 +-
 CODEOWNERS                                         |   25 +-
 CONTRIBUTORS.md                                    |    4 +
 LICENSE                                            |   42 +-
 Makefile                                           |   12 +-
 R-package/src/ndarray.cc                           |    6 +-
 R-package/src/symbol.cc                            |   20 +-
 README.md                                          |    2 +-
 amalgamation/amalgamation.py                       |   27 +-
 ci/Jenkinsfile_utils.groovy                        |   33 +-
 ci/build.py                                        |    1 +
 ci/docker/Dockerfile.build.centos7_gpu             |    6 +-
 ci/docker/Dockerfile.build.ubuntu_base_gpu         |    7 +-
 ci/docker/Dockerfile.build.ubuntu_build_cuda       |    6 +-
 ...buntu_gpu => Dockerfile.build.ubuntu_gpu_cu100} |    6 +-
 ...ubuntu_gpu => Dockerfile.build.ubuntu_gpu_cu80} |    2 +-
 ...ubuntu_gpu => Dockerfile.build.ubuntu_gpu_cu90} |    6 +-
 ...ubuntu_gpu => Dockerfile.build.ubuntu_gpu_cu92} |    6 +-
 ci/docker/Dockerfile.build.ubuntu_gpu_tensorrt     |    7 +-
 ci/docker/Dockerfile.build.ubuntu_nightly_gpu      |    7 +-
 ci/docker/install/centos7_cudnn.sh                 |   59 +
 ci/docker/install/tensorrt.sh                      |    3 +-
 ci/docker/install/ubuntu_core.sh                   |    1 +
 .../install/{ubuntu_python.sh => ubuntu_cudnn.sh}  |   36 +-
 ci/docker/install/ubuntu_nvidia.sh                 |    2 +-
 ci/docker/install/ubuntu_publish.sh                |    4 +-
 ci/docker/install/ubuntu_python.sh                 |    4 +-
 ci/docker/install/ubuntu_tutorials.sh              |    4 +-
 ci/docker/runtime_functions.sh                     |   68 +-
 ci/jenkins/Jenkins_steps.groovy                    |   83 +-
 ci/jenkins/Jenkinsfile_unix_cpu                    |    3 +-
 ci/jenkins/Jenkinsfile_unix_gpu                    |    4 +-
 .../Modules/FindNVTX.cmake                         |   27 +-
 cmake/Utils.cmake                                  |    2 +-
 contrib/clojure-package/.gitignore                 |    6 +
 contrib/clojure-package/examples/bert/.gitignore   |   18 +
 contrib/clojure-package/examples/bert/README.md    |  156 +++
 .../examples/bert/fine-tune-bert.ipynb             |  510 +++++++
 .../examples/bert/fine-tune-bert.md                |  371 +++++
 .../clojure-package/examples/bert/get_bert_data.sh |   32 +
 .../{infer/objectdetector => bert}/project.clj     |   24 +-
 .../examples/bert/squad-samples.edn                |   39 +
 .../bert/src/bert/bert_sentence_classification.clj |  160 +++
 .../examples/bert/src/bert/infer.clj               |  129 ++
 .../examples/bert/src/bert/util.clj                |   52 +
 .../bert/bert_sentence_classification_test.clj     |   86 ++
 .../examples/bert/test/bert/infer_test.clj         |   43 +
 .../examples/infer/objectdetector/project.clj      |    1 -
 .../infer/objectdetector/src/infer/draw.clj        |   44 -
 .../src/infer/objectdetector_example.clj           |   59 +-
 .../test/infer/objectdetector_example_test.clj     |    6 +-
 contrib/clojure-package/integration-tests.sh       |    2 +-
 contrib/clojure-package/src/dev/generator.clj      |  592 ++++++--
 .../src/org/apache/clojure_mxnet/callback.clj      |    9 +-
 .../src/org/apache/clojure_mxnet/image.clj         |   26 +-
 .../src/org/apache/clojure_mxnet/module.clj        |  544 +++++---
 .../{callback.clj => ndarray_api.clj}              |   29 +-
 .../{callback.clj => ndarray_random_api.clj}       |   25 +-
 .../src/org/apache/clojure_mxnet/symbol_api.clj    |   32 +
 .../org/apache/clojure_mxnet/symbol_random_api.clj |   32 +
 .../src/org/apache/clojure_mxnet/util.clj          |    8 +-
 .../clojure-package/test/dev/generator_test.clj    |  183 ++-
 .../clojure-package/test/good-test-ndarray-api.clj |  170 +++
 .../test/good-test-ndarray-random-api.clj          |   95 ++
 .../clojure-package/test/good-test-symbol-api.clj  |  192 +++
 .../test/good-test-symbol-random-api.clj           |  118 ++
 .../test/org/apache/clojure_mxnet/conv_test.clj    |   24 +-
 .../test/org/apache/clojure_mxnet/image_test.clj   |   16 +-
 .../org/apache/clojure_mxnet/ndarray_api_test.clj  |  415 ++++++
 .../org/apache/clojure_mxnet/symbol_api_test.clj   |   61 +
 cpp-package/example/charRNN.cpp                    |    5 +-
 .../example/inference/inception_inference.cpp      |    2 +-
 cpp-package/example/resnet.cpp                     |    2 +-
 cpp-package/example/test_kvstore.cpp               |  201 +++
 cpp-package/include/mxnet-cpp/kvstore.h            |   13 +-
 cpp-package/include/mxnet-cpp/kvstore.hpp          |   78 +-
 cpp-package/include/mxnet-cpp/ndarray.hpp          |   14 +-
 cpp-package/include/mxnet-cpp/symbol.hpp           |   32 +-
 cpp-package/tests/ci_test.sh                       |    7 +-
 cpp-package/tests/travis/setup.sh                  |    2 +-
 dev_menu.py                                        |   33 +-
 docs/api/python/gluon/contrib.md                   |   15 +
 docs/api/python/ndarray/linalg.md                  |    6 +-
 docs/api/python/profiler/profiler.md               |    4 +-
 docs/api/python/symbol/linalg.md                   |    6 +-
 docs/architecture/exception_handling.md            |    3 -
 docs/architecture/note_data_loading.md             |    8 +-
 docs/faq/env_var.md                                |    9 +-
 docs/install/index.md                              |   38 +-
 docs/install/requirements.txt                      |    2 +-
 docs/install/ubuntu_setup.md                       |   97 +-
 docs/tutorials/embedded/wine_detector.md           |   51 +-
 docs/tutorials/index.md                            |   21 +-
 docs/tutorials/python/profiler.md                  |   25 +-
 docs/tutorials/python/profiler_nvprof.png          |  Bin 0 -> 235747 bytes
 docs/tutorials/python/profiler_nvprof_zoomed.png   |  Bin 0 -> 254663 bytes
 docs/tutorials/python/profiler_winograd.png        |  Bin 0 -> 75450 bytes
 example/README.md                                  |    1 -
 example/gluon/{super_resolution => }/data.py       |    0
 example/gluon/lstm_crf/lstm_crf.py                 |    2 +-
 example/quantization/README.md                     |   70 +-
 example/quantization/imagenet_gen_qsym_mkldnn.py   |   20 +-
 example/quantization/imagenet_inference.py         |   54 +-
 example/recommenders/demo1-MF.ipynb                |    2 +-
 example/ssd/README.md                              |   56 +-
 example/ssd/dataset/mscoco.py                      |   10 +-
 example/ssd/dataset/names/mscoco.names             |   12 +-
 example/ssd/symbol/legacy_vgg16_ssd_300.py         |    3 +-
 example/ssd/symbol/legacy_vgg16_ssd_512.py         |    3 +-
 example/ssd/symbol/symbol_builder.py               |    3 +-
 example/ssd/train.py                               |    5 +-
 example/ssd/train/metric.py                        |   11 +
 example/ssd/train/train_net.py                     |    8 +-
 include/mxnet/c_api.h                              |  285 +++-
 include/mxnet/c_api_error.h                        |   25 +-
 include/mxnet/engine.h                             |    6 +-
 include/mxnet/imperative.h                         |   16 +
 include/mxnet/libinfo.h                            |    6 +-
 include/mxnet/ndarray.h                            |   37 +-
 include/mxnet/tensor_blob.h                        |   16 +-
 include/mxnet/tuple.h                              |  176 ++-
 julia/test/unittest/ndarray.jl                     |   22 +-
 make/config.mk                                     |   12 +-
 make/crosscompile.jetson.mk                        |    9 +-
 make/osx.mk                                        |    6 +
 perl-package/AI-MXNet/lib/AI/MXNet/Executor.pm     |    2 +-
 perl-package/AI-MXNet/lib/AI/MXNet/NDArray.pm      |    4 +-
 perl-package/AI-MXNet/lib/AI/MXNet/Symbol.pm       |    4 +-
 perl-package/AI-MXNetCAPI/mxnet.i                  |  172 +--
 perl-package/AI-MXNetCAPI/mxnet_typemaps.i         |   22 +-
 python/mxnet/__init__.py                           |    2 +-
 python/mxnet/base.py                               |  153 +-
 .../mxnet/contrib/onnx/mx2onnx/_export_helper.py   |    2 +-
 .../mxnet/contrib/onnx/mx2onnx/_op_translations.py |    2 +-
 .../contrib/onnx/onnx2mx/_translation_utils.py     |    2 +-
 python/mxnet/contrib/quantization.py               |    1 +
 python/mxnet/contrib/tensorrt.py                   |  119 +-
 python/mxnet/contrib/text/vocab.py                 |    2 +-
 python/mxnet/executor.py                           |   50 +-
 python/mxnet/gluon/contrib/__init__.py             |    2 +
 python/mxnet/gluon/contrib/{ => cnn}/__init__.py   |   10 +-
 python/mxnet/gluon/contrib/cnn/conv_layers.py      |  221 +++
 python/mxnet/gluon/contrib/nn/basic_layers.py      |    8 +-
 python/mxnet/gluon/nn/activations.py               |    3 +-
 python/mxnet/gluon/nn/basic_layers.py              |    8 +-
 python/mxnet/gluon/parameter.py                    |    2 +-
 python/mxnet/gluon/rnn/rnn_layer.py                |   41 +-
 python/mxnet/gluon/trainer.py                      |    4 +-
 python/mxnet/gluon/utils.py                        |   12 +-
 python/mxnet/image/detection.py                    |    2 +-
 python/mxnet/image/image.py                        |    8 +-
 python/mxnet/io/io.py                              |    2 +
 python/mxnet/metric.py                             |  132 +-
 python/mxnet/model.py                              |    4 +-
 python/mxnet/module/base_module.py                 |    2 +-
 python/mxnet/module/module.py                      |   16 +-
 python/mxnet/module/python_module.py               |    2 +-
 python/mxnet/ndarray/_internal.py                  |    2 -
 python/mxnet/ndarray/contrib.py                    |    5 +-
 python/mxnet/ndarray/ndarray.py                    |  188 ++-
 python/mxnet/ndarray/register.py                   |    7 +-
 python/mxnet/ndarray/sparse.py                     |    4 +-
 python/mxnet/ndarray_doc.py                        |    2 +-
 python/mxnet/operator.py                           |   28 +-
 python/mxnet/optimizer/optimizer.py                |    2 +-
 python/mxnet/recordio.py                           |    4 +
 python/mxnet/rnn/rnn_cell.py                       |    4 +-
 python/mxnet/symbol/_internal.py                   |    2 -
 python/mxnet/symbol/register.py                    |    7 +-
 python/mxnet/symbol/symbol.py                      |  196 ++-
 python/mxnet/test_utils.py                         |   14 +-
 python/mxnet/visualization.py                      |    4 +
 .../assembly/src/main/assembly/assembly.xml        |   13 +-
 .../assembly/src/main/assembly/javadoc.xml         |    2 +-
 .../assembly/src/main/assembly/source.xml          |    2 +-
 scala-package/core/pom.xml                         |    5 +-
 .../src/main/scala/org/apache/mxnet/Executor.scala |  143 +-
 .../scala/org/apache/mxnet/ExecutorManager.scala   |   27 +-
 .../main/scala/org/apache/mxnet/FeedForward.scala  |   27 +-
 .../core/src/main/scala/org/apache/mxnet/IO.scala  |   35 +-
 .../src/main/scala/org/apache/mxnet/Image.scala    |    8 +
 .../src/main/scala/org/apache/mxnet/LibInfo.scala  |   32 +-
 .../scala/org/apache/mxnet/MX_PRIMITIVES.scala     |    2 +
 .../src/main/scala/org/apache/mxnet/NDArray.scala  |   29 +-
 .../scala/org/apache/mxnet/NDArrayCollector.scala  |    6 +
 .../main/scala/org/apache/mxnet/NumpyScope.scala   |   63 +
 .../scala/org/apache/mxnet/ResourceScope.scala     |   46 +-
 .../src/main/scala/org/apache/mxnet/Symbol.scala   |   93 +-
 .../scala/org/apache/mxnet/io/MXDataIter.scala     |    3 +-
 .../scala/org/apache/mxnet/io/NDArrayIter.scala    |    5 +-
 .../org/apache/mxnet/io/PrefetchingIter.scala      |   87 +-
 .../scala/org/apache/mxnet/javaapi/Image.scala     |   25 +
 .../scala/org/apache/mxnet/module/BaseModule.scala |   20 +-
 .../org/apache/mxnet/module/BucketingModule.scala  |   60 +-
 .../scala/org/apache/mxnet/module/Module.scala     |  157 ++-
 .../org/apache/mxnet/module/SequentialModule.scala |  151 +-
 .../org/apache/mxnet/util/OptionConversion.scala   |    2 +
 .../java/org/apache/mxnet/javaapi/NDArrayTest.java |    4 +-
 .../src/test/scala/org/apache/mxnet/IOSuite.scala  |   29 +-
 .../test/scala/org/apache/mxnet/ImageSuite.scala   |   10 +-
 .../test/scala/org/apache/mxnet/KVStoreSuite.scala |    3 -
 .../org/apache/mxnet/ModelParallelSuite.scala      |    5 -
 .../test/scala/org/apache/mxnet/ModuleSuite.scala  |   32 +-
 .../scala/org/apache/mxnet/NumpyScopeSuite.scala}  |   26 +-
 .../scala/org/apache/mxnet/OperatorSuite.scala     |    6 -
 .../test/scala/org/apache/mxnet/SymbolSuite.scala  |   14 -
 .../scala/org/apache/mxnet/train/ConvSuite.scala   |    1 +
 scala-package/deploy/pom.xml                       |    3 +
 scala-package/deploy/src/main/deploy/deploy.xml    |    2 +-
 scala-package/examples/pom.xml                     |    4 -
 .../mxnetexamples/javaapi/infer/bert/BertQA.java   |    6 +-
 .../benchmark/ScalaInferenceBenchmark.scala        |    8 +-
 .../CNNTextClassification.scala                    |    8 +-
 .../mxnetexamples/customop/ExampleCustomOp.scala   |    6 +-
 .../customop/ExampleCustomOpWithRtc.scala          |    2 +-
 .../org/apache/mxnetexamples/gan/GanMnist.scala    |    6 +-
 .../datasets/SyntheticDataIter.scala               |    2 +-
 .../imageclassifier/ImageClassifierExample.scala   |    4 +-
 .../objectdetector/SSDClassifierExample.scala      |    4 +-
 .../org/apache/mxnetexamples/module/MnistMlp.scala |    2 +-
 .../mxnetexamples/module/SequentialModuleEx.scala  |    2 +-
 .../mxnetexamples/multitask/ExampleMultiTask.scala |   15 +-
 .../mxnetexamples/neuralstyle/NeuralStyle.scala    |    6 +-
 .../neuralstyle/end2end/BoostInference.scala       |    6 +-
 .../neuralstyle/end2end/BoostTrain.scala           |    6 +-
 .../neuralstyle/end2end/DataProcessing.scala       |    4 +-
 .../org/apache/mxnetexamples/rnn/BucketIo.scala    |   12 +-
 .../apache/mxnetexamples/rnn/LstmBucketing.scala   |    2 +-
 .../org/apache/mxnetexamples/rnn/TestCharRnn.scala |    2 +-
 .../apache/mxnetexamples/rnn/TrainCharRnn.scala    |    9 +-
 .../benchmark/ScalaInferenceBenchmarkSuite.scala   |    1 +
 .../CNNClassifierExampleSuite.scala                |    3 +-
 .../customop/CustomOpExampleSuite.scala            |    1 +
 .../apache/mxnetexamples/gan/GanExampleSuite.scala |    3 +-
 .../IMClassificationExampleSuite.scala             |    1 +
 .../ImageClassifierExampleSuite.scala              |    1 +
 .../ObjectDetectorExampleSuite.scala               |    1 +
 .../neuralstyle/NeuralStyleSuite.scala             |    3 +-
 .../apache/mxnetexamples/rnn/ExampleRNNSuite.scala |    3 +-
 scala-package/infer/pom.xml                        |    4 -
 .../scala/org/apache/mxnet/infer/Classifier.scala  |   17 +
 .../org/apache/mxnet/infer/ImageClassifier.scala   |   36 +-
 .../org/apache/mxnet/infer/MXNetHandler.scala      |   14 +
 .../org/apache/mxnet/infer/ObjectDetector.scala    |   19 +
 .../scala/org/apache/mxnet/infer/Predictor.scala   |   38 +-
 .../mxnet/infer/javaapi/ObjectDetector.scala       |   80 +-
 .../org/apache/mxnet/infer/javaapi/Predictor.scala |   52 +-
 .../org/apache/mxnet/infer/ClassifierSuite.scala   |    4 +-
 .../main/scala/org/apache/mxnet/init/Base.scala    |   27 +-
 .../main/scala/org/apache/mxnet/init/LibInfo.scala |   30 +
 scala-package/macros/pom.xml                       |    4 -
 .../scala/org/apache/mxnet/APIDocGenerator.scala   |  101 +-
 .../scala/org/apache/mxnet/GeneratorBase.scala     |   81 +-
 .../main/scala/org/apache/mxnet/NDArrayMacro.scala |   50 +-
 .../main/scala/org/apache/mxnet/SymbolMacro.scala  |   47 +-
 .../apache/mxnet/javaapi/JavaNDArrayMacro.scala    |   17 +-
 .../org/apache/mxnet/utils/CToScalaUtils.scala     |   15 +-
 .../src/main/java/mxnet/NDArrayCreation.java       |    2 +-
 .../src/main/java/mxnet/NDArrayOperation.java      |    2 +-
 .../main/native/org_apache_mxnet_native_c_api.cc   |  219 ++-
 .../main/native/org_apache_mxnet_native_c_api.h    |   32 +
 scala-package/pom.xml                              |   50 +-
 scala-package/spark/pom.xml                        |    5 +
 .../scala/org/apache/mxnet/spark/MXNDArray.scala   |    2 +-
 .../scala/org/apache/mxnet/spark/MXNetModel.scala  |    2 +-
 .../apache/mxnet/spark/io/LabeledPointIter.scala   |    4 +-
 .../mxnet/spark/io/LongLivingDataBatch.scala       |    3 +-
 .../org/apache/mxnet/spark/io/PointIter.scala      |    4 +-
 .../org/apache/mxnet/spark/utils/Network.scala     |   22 +-
 .../org/apache/mxnet/spark/MXNetGeneralSuite.scala |    1 +
 .../apache/mxnet/spark/SharedSparkContext.scala    |   12 +-
 src/c_api/c_api.cc                                 |  129 +-
 src/c_api/c_api_common.h                           |   31 +
 src/c_api/c_api_executor.cc                        |  482 ++++++-
 src/c_api/c_api_ndarray.cc                         |   12 +
 src/c_api/c_api_symbolic.cc                        |  107 +-
 src/c_api/c_predict_api.cc                         |    1 +
 src/common/exec_utils.h                            |    4 +-
 src/common/serialization.h                         |  318 -----
 src/common/utils.h                                 |   60 +-
 src/engine/naive_engine.cc                         |    3 +
 src/engine/threaded_engine.cc                      |   25 +
 src/engine/threaded_engine.h                       |   43 +-
 src/executor/attach_op_execs_pass.cc               |    8 +-
 src/executor/exec_pass.h                           |   21 +-
 src/executor/graph_executor.cc                     |  226 ++-
 src/executor/graph_executor.h                      |    3 +
 src/executor/infer_graph_attr_pass.cc              |   20 +-
 src/executor/tensorrt_pass.cc                      |  596 --------
 src/executor/trt_graph_executor.cc                 |  443 ------
 src/executor/trt_graph_executor.h                  |  111 --
 src/imperative/cached_op.cc                        |   10 +-
 src/imperative/cached_op.h                         |    8 +-
 src/imperative/imperative.cc                       |    4 +-
 src/imperative/imperative_utils.cc                 |   61 +-
 src/imperative/imperative_utils.h                  |   56 +-
 src/initialize.cc                                  |    4 +-
 src/io/image_aug_default.cc                        |    2 +-
 src/io/image_det_aug_default.cc                    |    4 +-
 src/io/image_io.cc                                 |    6 +-
 src/io/image_iter_common.h                         |   10 +
 src/io/iter_batchloader.h                          |    2 +-
 src/io/iter_image_recordio_2.cc                    |  119 +-
 src/io/iter_sparse_batchloader.h                   |    2 +-
 src/kvstore/gradient_compression.cc                |   10 +-
 src/libinfo.cc                                     |    3 +
 src/ndarray/ndarray.cc                             |   55 +-
 src/ndarray/ndarray_function.cc                    |   12 +-
 src/ndarray/ndarray_function.h                     |    2 +-
 src/nnvm/plan_memory.cc                            |    6 +-
 src/operator/batch_norm_v1-inl.h                   |    2 +-
 src/operator/bilinear_sampler-inl.h                |    4 +-
 src/operator/channel_op_common.h                   |    4 +
 src/operator/contrib/adamw-inl.h                   |    5 +-
 src/operator/contrib/adaptive_avg_pooling-inl.h    |    6 +-
 src/operator/contrib/bilinear_resize-inl.h         |  175 ++-
 src/operator/contrib/bilinear_resize.cc            |   52 +-
 src/operator/contrib/bilinear_resize.cu            |   37 +-
 src/operator/contrib/boolean_mask.cc               |    2 +-
 src/operator/contrib/bounding_box-inl.h            |   11 +-
 src/operator/contrib/count_sketch-inl.h            |    2 +-
 src/operator/contrib/deformable_convolution-inl.h  |   14 +-
 src/operator/contrib/dgl_graph.cc                  |   68 +-
 src/operator/contrib/fft-inl.h                     |    2 +-
 src/operator/contrib/ifft-inl.h                    |    2 +-
 src/operator/contrib/index_copy-inl.h              |    5 +-
 src/operator/contrib/multi_proposal-inl.h          |   18 +-
 src/operator/contrib/multibox_detection-inl.h      |    4 +-
 src/operator/contrib/multibox_detection.cc         |    2 +-
 src/operator/contrib/multibox_detection.cu         |    2 +-
 src/operator/contrib/multibox_prior-inl.h          |   12 +-
 src/operator/contrib/multibox_prior.cc             |    7 +-
 src/operator/contrib/multibox_prior.cu             |    5 +-
 src/operator/contrib/multibox_target-inl.h         |    2 +-
 src/operator/contrib/multibox_target.cc            |    2 +-
 src/operator/contrib/multibox_target.cu            |    2 +-
 src/operator/contrib/optimizer_op.cc               |    2 +-
 src/operator/contrib/proposal-inl.h                |   18 +-
 src/operator/contrib/quadratic_op-inl.h            |    2 +-
 src/operator/contrib/sync_batch_norm-inl.h         |    2 +-
 src/operator/contrib/tensorrt-inl.h                |   79 --
 src/operator/contrib/tensorrt.cc                   |  181 ---
 src/operator/contrib/transformer-inl.h             |    4 +-
 src/operator/control_flow.cc                       |  328 ++---
 src/operator/convolution_v1-inl.h                  |   10 +-
 src/operator/cudnn_rnn-inl.h                       |  863 ------------
 src/operator/custom/custom-inl.h                   |   45 +-
 src/operator/custom/custom.cc                      |   18 +-
 src/operator/image/image_random-inl.h              |   18 +-
 src/operator/image/resize-inl.h                    |    4 +-
 src/operator/leaky_relu-inl.h                      |    6 +-
 src/operator/loss_binary_op-inl.h                  |    2 +-
 src/operator/mshadow_op.h                          |   97 +-
 src/operator/mxnet_op.h                            |   89 +-
 src/operator/nn/batch_norm.cc                      |    2 +-
 src/operator/nn/concat.cc                          |   51 +-
 src/operator/nn/convolution-inl.h                  |   18 +-
 src/operator/nn/convolution.cc                     |   85 +-
 src/operator/nn/ctc_loss-inl.h                     |    2 +-
 src/operator/nn/cudnn/cudnn_algoreg-inl.h          |    2 +-
 src/operator/nn/cudnn/cudnn_batch_norm.cc          |    2 +-
 src/operator/nn/cudnn/cudnn_convolution-inl.h      |    6 +-
 src/operator/nn/cudnn/cudnn_deconvolution-inl.h    |    6 +-
 src/operator/nn/deconvolution-inl.h                |   30 +-
 src/operator/nn/deconvolution.cc                   |   50 +-
 src/operator/nn/dropout-inl.h                      |    7 +-
 src/operator/nn/dropout.cc                         |    4 +-
 src/operator/nn/fully_connected.cc                 |    4 +-
 src/operator/nn/im2col.h                           |    4 +-
 src/operator/nn/layer_norm-inl.h                   |   10 +-
 src/operator/nn/layer_norm.cc                      |    6 +-
 src/operator/nn/lrn.cc                             |    2 +-
 src/operator/nn/mkldnn/mkldnn_act-inl.h            |   74 +
 src/operator/nn/mkldnn/mkldnn_act.cc               |   91 +-
 src/operator/nn/mkldnn/mkldnn_base-inl.h           |    4 +-
 src/operator/nn/mkldnn/mkldnn_base.cc              |    1 +
 src/operator/nn/mkldnn/mkldnn_concat.cc            |   12 +-
 src/operator/nn/mkldnn/mkldnn_ops-inl.h            |   12 +
 src/operator/nn/mkldnn/mkldnn_reshape.cc           |  194 +++
 src/operator/nn/mkldnn/mkldnn_slice.cc             |    6 +-
 src/operator/nn/mkldnn/mkldnn_transpose.cc         |  161 +++
 src/operator/nn/pooling-inl.h                      |   14 +-
 src/operator/nn/pooling.cc                         |    6 +-
 src/operator/nn/upsampling-inl.h                   |    4 +-
 src/operator/nn/upsampling.cc                      |   56 +-
 src/operator/operator_common.h                     |   19 +-
 src/operator/operator_util.cc                      |    2 +-
 src/operator/optimizer_op-inl.h                    |    8 +-
 src/operator/pad-inl.h                             |    2 +-
 src/operator/pooling_v1-inl.h                      |   35 +-
 src/operator/quantization/dequantize-inl.h         |   66 +-
 src/operator/quantization/dequantize.cc            |   25 +-
 src/operator/quantization/dequantize.cu            |    2 +-
 .../quantization/mkldnn/mkldnn_dequantize-inl.h    |  140 +-
 .../quantization/mkldnn/mkldnn_quantize_v2-inl.h   |  202 +--
 .../quantization/mkldnn/mkldnn_quantized_act.cc    |   55 +
 .../quantization/mkldnn/mkldnn_quantized_conv.cc   |    2 +-
 .../mkldnn/mkldnn_quantized_elemwise_add.cc        |  206 +++
 .../mkldnn/mkldnn_quantized_fully_connected.cc     |    4 +-
 .../quantization/mkldnn/mkldnn_requantize-inl.h    |   91 +-
 src/operator/quantization/quantization_utils.h     |   79 +-
 src/operator/quantization/quantize-inl.h           |    4 +-
 src/operator/quantization/quantize_graph_pass.cc   |   32 +-
 src/operator/quantization/quantize_v2-inl.h        |  216 ++-
 src/operator/quantization/quantize_v2.cc           |   21 +-
 src/operator/quantization/quantize_v2.cu           |    2 +-
 src/operator/quantization/quantized_activation.cc  |  138 ++
 src/operator/quantization/quantized_concat.cc      |   18 +-
 src/operator/quantization/quantized_conv.cc        |    4 +-
 src/operator/quantization/quantized_conv.cu        |    2 +-
 .../quantization/quantized_elemwise_add-inl.h      |   58 +
 .../quantization/quantized_elemwise_add.cc         |  141 ++
 src/operator/quantization/quantized_flatten-inl.h  |    6 +-
 .../quantization/quantized_fully_connected.cc      |    8 +-
 .../quantization/quantized_fully_connected.cu      |    2 +-
 src/operator/quantization/quantized_pooling.cc     |    4 +-
 src/operator/quantization/requantize-inl.h         |   25 +-
 src/operator/quantization/requantize.cc            |    4 +
 src/operator/random/multisample_op.h               |    2 +-
 src/operator/random/sample_multinomial_op.h        |   19 +-
 src/operator/random/unique_sample_op.h             |    2 +-
 src/operator/regression_output-inl.h               |    2 +-
 src/operator/rnn-inl.h                             | 1466 +++++++++++++++-----
 src/operator/rnn.cc                                |  230 ++-
 src/operator/rnn.cu                                |   21 +-
 src/operator/sequence_last-inl.h                   |    2 +-
 src/operator/sequence_reverse-inl.h                |   57 +-
 src/operator/slice_channel-inl.h                   |   17 +-
 src/operator/softmax_output-inl.h                  |   12 +-
 src/operator/softmax_output.cc                     |   12 +-
 src/operator/spatial_transformer-inl.h             |    4 +-
 src/operator/subgraph/build_subgraph.cc            |   16 +-
 src/operator/subgraph/mkldnn/mkldnn_conv.cc        |   64 +-
 src/operator/subgraph/mkldnn/mkldnn_fc.cc          |    4 +-
 ..._property.h => mkldnn_post_quantize_property.h} |   63 +-
 .../subgraph/mkldnn/mkldnn_subgraph_property.cc    |    4 +-
 .../tensorrt}/nnvm_to_onnx-inl.h                   |  131 +-
 .../{contrib => subgraph/tensorrt}/nnvm_to_onnx.cc |  284 ++--
 .../subgraph/tensorrt}/onnx_to_tensorrt.cc         |   27 +-
 .../subgraph/tensorrt}/onnx_to_tensorrt.h          |   18 +-
 src/operator/subgraph/tensorrt/tensorrt-inl.h      |  240 ++++
 src/operator/subgraph/tensorrt/tensorrt.cc         |  336 +++++
 .../{contrib => subgraph/tensorrt}/tensorrt.cu     |   30 +-
 src/operator/subgraph_op_common.cc                 |    4 +-
 src/operator/subgraph_op_common.h                  |   12 +-
 src/operator/svm_output-inl.h                      |    6 +-
 src/operator/swapaxis-inl.h                        |   10 +-
 src/operator/tensor/broadcast_reduce-inl.cuh       |   74 +-
 src/operator/tensor/broadcast_reduce-inl.h         |   51 +-
 src/operator/tensor/broadcast_reduce_op.h          |  346 +++--
 src/operator/tensor/broadcast_reduce_op_value.cc   |    6 +-
 src/operator/tensor/diag_op-inl.h                  |   12 +-
 src/operator/tensor/dot-inl.h                      |   12 +-
 src/operator/tensor/elemwise_binary_broadcast_op.h |   53 +-
 src/operator/tensor/elemwise_binary_op_basic.cc    |    3 +
 src/operator/tensor/elemwise_unary_op_basic.cc     |   10 +-
 src/operator/tensor/histogram-inl.h                |   14 +-
 src/operator/tensor/indexing_op.h                  |   31 +-
 src/operator/tensor/init_op.cc                     |   11 +
 src/operator/tensor/init_op.cu                     |    3 +
 src/operator/tensor/init_op.h                      |   87 +-
 src/operator/tensor/la_op-inl.h                    |   94 ++
 src/operator/tensor/la_op.cc                       |  231 +++
 src/operator/tensor/la_op.cu                       |   24 +
 src/operator/tensor/la_op.h                        |   70 +-
 src/operator/tensor/matrix_op-inl.h                |  377 ++---
 src/operator/tensor/matrix_op.cc                   |   68 +-
 src/operator/tensor/ordering_op-inl.h              |    2 +-
 src/operator/tensor/slice-inl.h                    |    6 +-
 src/operator/tensor/sparse_retain-inl.h            |    2 +-
 src/operator/tensor/square_sum-inl.h               |    9 +-
 .../dequantize.cu => profiler/nvtx.cc}             |   15 +-
 .../quantize_v2.cu => profiler/nvtx.h}             |   45 +-
 src/profiler/profiler.h                            |   26 +
 src/profiler/storage_profiler.h                    |    6 +-
 src/resource.cc                                    |   14 +-
 src/storage/storage.cc                             |   10 +-
 .../cpp/engine/engine_shutdown_test.cc             |   31 +-
 tests/cpp/engine/threaded_engine_test.cc           |   94 +-
 tests/cpp/include/test_mkldnn.h                    |   18 +-
 tests/cpp/include/test_util.h                      |    4 +-
 tests/cpp/misc/serialization.cc                    |   68 -
 tests/cpp/operator/batchnorm_test.cc               |    4 +-
 tests/cpp/operator/mkldnn_operator_test.cc         |    6 +-
 tests/nightly/JenkinsfileForBinaries               |   38 +-
 .../nightly/apache_rat_license_check/rat-excludes  |    1 +
 tests/nightly/test_large_array.py                  |   58 +-
 tests/python/gpu/test_gluon_contrib_gpu.py         |   63 +
 tests/python/gpu/test_gluon_gpu.py                 |   55 +
 tests/python/gpu/test_gluon_transforms.py          |    4 +-
 tests/python/gpu/test_operator_gpu.py              |   23 +-
 tests/python/mkl/test_mkldnn.py                    |   56 +-
 .../python/profiling/simple_forward.py             |   35 +-
 tests/python/profiling/test_nvtx.py                |   52 +
 tests/python/quantization/test_quantization.py     |  209 ++-
 tests/python/tensorrt/lenet5_train.py              |    2 +
 tests/python/tensorrt/test_cvnets.py               |   34 +-
 tests/python/tensorrt/test_cycle.py                |   69 -
 tests/python/tensorrt/test_resnet18.py             |   42 +-
 tests/python/tensorrt/test_tensorrt_lenet5.py      |   91 +-
 tests/python/tensorrt/test_training_warning.py     |   70 -
 tests/python/train/test_dtype.py                   |   66 +
 tests/python/unittest/test_contrib_control_flow.py |   30 +-
 tests/python/unittest/test_contrib_operator.py     |   14 +-
 tests/python/unittest/test_exc_handling.py         |  134 +-
 tests/python/unittest/test_gluon.py                |    2 +-
 tests/python/unittest/test_gluon_rnn.py            |    6 +-
 tests/python/unittest/test_image.py                |   13 +
 tests/python/unittest/test_infer_shape.py          |   16 +
 tests/python/unittest/test_metric.py               |   82 ++
 tests/python/unittest/test_module.py               |   28 +
 tests/python/unittest/test_ndarray.py              |   54 +-
 tests/python/unittest/test_operator.py             |  655 +++++++--
 tests/python/unittest/test_random.py               |   12 +
 tests/python/unittest/test_sparse_ndarray.py       |    1 +
 tests/python/unittest/test_symbol.py               |    5 +
 tools/caffe_converter/compare_layers.py            |    5 -
 tools/caffe_converter/test_converter.py            |    2 -
 tools/dependencies/LICENSE.binary.dependencies     |  289 ++++
 tools/dependencies/eigen.sh                        |    1 +
 tools/dependencies/libpng.sh                       |    2 +-
 tools/dependencies/libtiff.sh                      |    6 +-
 tools/dependencies/libturbojpeg.sh                 |    2 +-
 tools/dependencies/openssl.sh                      |    2 +-
 tools/pip/setup.py                                 |   19 +-
 tools/staticbuild/build.sh                         |    8 +
 tools/staticbuild/build_lib.sh                     |   10 +-
 532 files changed, 17774 insertions(+), 7643 deletions(-)
 copy ci/docker/{Dockerfile.build.ubuntu_gpu => 
Dockerfile.build.ubuntu_gpu_cu100} (94%)
 copy ci/docker/{Dockerfile.build.ubuntu_gpu => 
Dockerfile.build.ubuntu_gpu_cu80} (97%)
 copy ci/docker/{Dockerfile.build.ubuntu_gpu => 
Dockerfile.build.ubuntu_gpu_cu90} (94%)
 rename ci/docker/{Dockerfile.build.ubuntu_gpu => 
Dockerfile.build.ubuntu_gpu_cu92} (94%)
 create mode 100755 ci/docker/install/centos7_cudnn.sh
 copy ci/docker/install/{ubuntu_python.sh => ubuntu_cudnn.sh} (56%)
 copy ci/docker/install/ubuntu_nvidia.sh => cmake/Modules/FindNVTX.cmake (53%)
 mode change 100755 => 100644
 create mode 100644 contrib/clojure-package/examples/bert/.gitignore
 create mode 100644 contrib/clojure-package/examples/bert/README.md
 create mode 100644 contrib/clojure-package/examples/bert/fine-tune-bert.ipynb
 create mode 100644 contrib/clojure-package/examples/bert/fine-tune-bert.md
 create mode 100755 contrib/clojure-package/examples/bert/get_bert_data.sh
 copy contrib/clojure-package/examples/{infer/objectdetector => 
bert}/project.clj (58%)
 create mode 100644 contrib/clojure-package/examples/bert/squad-samples.edn
 create mode 100644 
contrib/clojure-package/examples/bert/src/bert/bert_sentence_classification.clj
 create mode 100644 contrib/clojure-package/examples/bert/src/bert/infer.clj
 create mode 100644 contrib/clojure-package/examples/bert/src/bert/util.clj
 create mode 100644 
contrib/clojure-package/examples/bert/test/bert/bert_sentence_classification_test.clj
 create mode 100644 
contrib/clojure-package/examples/bert/test/bert/infer_test.clj
 delete mode 100644 
contrib/clojure-package/examples/infer/objectdetector/src/infer/draw.clj
 copy contrib/clojure-package/src/org/apache/clojure_mxnet/{callback.clj => 
ndarray_api.clj} (53%)
 copy contrib/clojure-package/src/org/apache/clojure_mxnet/{callback.clj => 
ndarray_random_api.clj} (59%)
 create mode 100644 
contrib/clojure-package/src/org/apache/clojure_mxnet/symbol_api.clj
 create mode 100644 
contrib/clojure-package/src/org/apache/clojure_mxnet/symbol_random_api.clj
 create mode 100644 contrib/clojure-package/test/good-test-ndarray-api.clj
 create mode 100644 
contrib/clojure-package/test/good-test-ndarray-random-api.clj
 create mode 100644 contrib/clojure-package/test/good-test-symbol-api.clj
 create mode 100644 contrib/clojure-package/test/good-test-symbol-random-api.clj
 create mode 100644 
contrib/clojure-package/test/org/apache/clojure_mxnet/ndarray_api_test.clj
 create mode 100644 
contrib/clojure-package/test/org/apache/clojure_mxnet/symbol_api_test.clj
 create mode 100644 cpp-package/example/test_kvstore.cpp
 create mode 100644 docs/tutorials/python/profiler_nvprof.png
 create mode 100644 docs/tutorials/python/profiler_nvprof_zoomed.png
 create mode 100644 docs/tutorials/python/profiler_winograd.png
 rename example/gluon/{super_resolution => }/data.py (100%)
 copy python/mxnet/gluon/contrib/{ => cnn}/__init__.py (82%)
 create mode 100644 python/mxnet/gluon/contrib/cnn/conv_layers.py
 create mode 100644 
scala-package/core/src/main/scala/org/apache/mxnet/NumpyScope.scala
 rename 
scala-package/{macros/src/main/scala/org/apache/mxnet/utils/OperatorBuildUtils.scala
 => core/src/test/scala/org/apache/mxnet/NumpyScopeSuite.scala} (58%)
 delete mode 100644 src/common/serialization.h
 delete mode 100644 src/executor/tensorrt_pass.cc
 delete mode 100644 src/executor/trt_graph_executor.cc
 delete mode 100644 src/executor/trt_graph_executor.h
 delete mode 100644 src/operator/contrib/tensorrt-inl.h
 delete mode 100644 src/operator/contrib/tensorrt.cc
 delete mode 100644 src/operator/cudnn_rnn-inl.h
 create mode 100644 src/operator/nn/mkldnn/mkldnn_act-inl.h
 create mode 100644 src/operator/nn/mkldnn/mkldnn_reshape.cc
 create mode 100644 src/operator/nn/mkldnn/mkldnn_transpose.cc
 create mode 100644 src/operator/quantization/mkldnn/mkldnn_quantized_act.cc
 create mode 100644 
src/operator/quantization/mkldnn/mkldnn_quantized_elemwise_add.cc
 create mode 100644 src/operator/quantization/quantized_activation.cc
 create mode 100644 src/operator/quantization/quantized_elemwise_add-inl.h
 create mode 100644 src/operator/quantization/quantized_elemwise_add.cc
 rename src/operator/subgraph/mkldnn/{mkldnn_conv_post_quantize_property.h => 
mkldnn_post_quantize_property.h} (68%)
 rename src/operator/{contrib => subgraph/tensorrt}/nnvm_to_onnx-inl.h (61%)
 rename src/operator/{contrib => subgraph/tensorrt}/nnvm_to_onnx.cc (68%)
 rename src/{executor => operator/subgraph/tensorrt}/onnx_to_tensorrt.cc (89%)
 rename src/{executor => operator/subgraph/tensorrt}/onnx_to_tensorrt.h (88%)
 create mode 100644 src/operator/subgraph/tensorrt/tensorrt-inl.h
 create mode 100644 src/operator/subgraph/tensorrt/tensorrt.cc
 rename src/operator/{contrib => subgraph/tensorrt}/tensorrt.cu (69%)
 copy src/{operator/quantization/dequantize.cu => profiler/nvtx.cc} (73%)
 copy src/{operator/quantization/quantize_v2.cu => profiler/nvtx.h} (58%)
 copy src/operator/quantization/dequantize.cu => 
tests/cpp/engine/engine_shutdown_test.cc (54%)
 delete mode 100644 tests/cpp/misc/serialization.cc
 create mode 100644 tests/python/gpu/test_gluon_contrib_gpu.py
 copy ci/docker/Dockerfile.build.ubuntu_base_gpu => 
tests/python/profiling/simple_forward.py (54%)
 create mode 100644 tests/python/profiling/test_nvtx.py
 delete mode 100644 tests/python/tensorrt/test_cycle.py
 delete mode 100644 tests/python/tensorrt/test_training_warning.py
 create mode 100644 tools/dependencies/LICENSE.binary.dependencies

Reply via email to