This is an automated email from the ASF dual-hosted git repository.
haibin pushed a change to branch fit-api
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git.
discard 588730c [Fit-API] Adress PR comments (#14885)
omit 33e8845 [Fit API] update estimator (#14849)
omit 0748b47 [MXNET-1396][Fit-API] Update default handler logic (#14765)
omit 7e10355 [Fit API] improve event handlers (#14685)
omit d4bf975 move to gluon contrib (#14635)
omit ffad48a move estimator to contrib (#14633)
omit d76234b [MXNET-1344, 1346][FIT API] Retrieve Batch size and Logging
verbose support for Gluon fit() API (#14587)
omit 213ee5f [MXNet-1343][Fit API]Add CNN integration test for fit() API
(#14405)
omit 3629611 [MXNet-1375][Fit API]Added RNN integration test for fit() API
(#14547)
omit 42a7061 [MXNet-1340][Fit API]Update train stats (#14494)
omit 8c8d015 [MXNet-1349][Fit API]Add validation support and unit tests
for fit() API (#14442)
omit 7f06bfa Fixed issue where the estimator was printing beyond the
dataset size … (#14464)
omit f250e1e [MXNet-1334][Fit API]base class for estimator and
eventhandler (#14346)
add b68f18c Fix dockerized GPU builds in dev_menu (#14603)
add dab4ffc replace std::random_shuffle to std::shuffle (#14523)
add 3781816 Add exception handling support for waitall (#14397)
add daabe5c split_and_load can now handle num_ctx > num_data. Issue
#13909 (#14607)
add 1dafa1a Comment out test_unix_python3_tensorrt_gpu step (#14642)
add 9dda62f The folder python-howto was removed in an earlier commit. The
reference to that folder was not removed. Making a PR to remove the reference
to this folder to keep documents consistent (#14573)
add 4530ad8 Fix aspect ratio sampling for RandomResizedCrop (#14585)
add 74e71e9 [MXNET-400] support string type for kvstore key in
cpp-package (#10792)
add 733e54c Added repeats for github status updates (#14530)
add 9443ae6 Fix warning on macro expansion using defined. (#14598)
add c6516cc [MXNET-1359] Adds a multiclass-MCC metric derived from
Pearson (#14461)
add d596b59 Support SSD f32/int8 evaluation on COCO dataset (#14646)
add fde4963 [WIP][Dependency Update] Upgrade the libtiff to 4.0.10
(#14623)
add 8f0878c [MXNET-1287] Miscellaneous Scala warning fixes (#14658)
add 2c5d7f7 Optimize transpose operator with MKL-DNN (#14545)
add 52e2e8e Fix scalastyle (#14669)
add e701e71 Avoid secondary deployment of package to local (#14647)
add 26b14bc [MXNET-1287] Up scala comp (#14667)
add 596ef3a Add nd.power and sym.pow (#14606)
add 6a93bda Fix scaladoc scalastyle violations in Infer package (#14671)
add 0284236 [Clojure] enhance draw bounding box (#14567)
add 800590e Add MXEnginePushAsync and MXEnginePushSync C APIs (#14615)
add c788804 Updates tolerances for test_layer_bidirectional (#14682)
add 30d479f Tweak the copy for the cudnn autotuning warning. (#14680)
add 5fc5c27 Fix profiler check (#14677)
add 1c49e40 Change RNN OP to stateful (#14476)
add c437d5b use mkl sparse matrix to improve performance (#14492)
add 273ebc7 [DEP] upgrade dmlc-core (#14510)
add a5db391 [Clojure] Add methods based on NDArrayAPI/SymbolAPI (#14195)
add c2ba51b [Clojure] Clojure BERT QA example (#14691)
add b3b952f fp16 safe norm operator (#14616)
add f90d1c0 Use ubuntu_rat container for rat check (#14678)
add 413fe97 Avoid uneccesary vector copies in imperative_utils.cc (#14665)
add 1f84682 Properly handling custom op exception by modify engine
(#14693)
add 52a3553 [docstring] improve docstring and indentation in `module.clj`
(#14705)
add 3f3ba92 [numpy] Support zero-dim and zero-size tensors in MXNet
(#14661)
add 51d3291 Updated docs for R-package installation (#14269)
add 8e04b88 Update inception_inference.cpp (#14674)
add ff04de0 Add vim-nox to ci/docker/install/ubuntu_core.sh (#14632)
add a26ad37 Disable USE_GPERFTOOLS (#14711)
add 3e2f752 Reference engine from chunk via weak pointer (#14591)
add 42e929b Fix spelling in threaded_engine_test (#14709)
add 3b23c2d Fix documentation for bilinear upsampling and add unit test
(#14035)
add a083a61 [MKLDNN]Improve quantizeV2 and dequantize latency (#14641)
add 0da4b67 [MKLDNN]Add quantized relu (#14604)
add 18d4051 Add publish test of PyPi cu100mkl (#14637)
add 93238a2 [contrib][op] fix MultiBoxPrior confusing results if first
ratio is not 1.0 (#13763)
add 391a1be Set idx2name for Optimizer object (#14703)
add 818be02 [MXNET-1377] Add static-dependencies licenses (#14726)
add 5b6e25b [MXNET-1287] Feat dep (#14668)
add dc48cd2 License Googletest and Appendix (#14687)
add 153d2f4 [MXNET-1385] Improved Scala Init and Macros warning messages
(#14656)
add 5331933 Reenable TensorRT step (#14654)
add dd1004b fix pi instructions (#14746)
add 100586a Change size_t to int within for loop to fix windows build
error (#14740)
add 68efc15 fix custom op fork test (#14753)
add 494c29e [BUGFIX] fix ELU function will appear nan when calculating
the gradient (#14673)
add da7fff7 fix min max on zero-sized ndarray (#14745)
add a1b0a3a [Clojure] Better api docstrings by replacing newlines (#14752)
add 0f63659 add a compiler flag to use int64 as tensor size (#14570)
add 3b39c56 fix shape index bug (#14518)
add 2fd4720 fix acc_type_switch macro with extra tests (#14773)
add 014ca13 [DOC] Update ubuntu install instructions from source (#14534)
add 8604c3c [Mxnet-1397] Support symbolic api for requantize and
dequantize (#14749)
add 8cae72e julia/ndarray: fix flaky test cases for `clamp` (#14776)
add 587d480 Use DEFAULT macro in C APIs (#14767)
add 22377ed Fix GELU backward possible NaN (#14782)
add acf53fd Mention additional language bindings and add links (#14798)
add 3d3803e Updates python setup.py for recent license changes (#14778)
add 97e09f2 Improve CMake handling of sse2 and sse3 (#14757)
add 6aeb97e change mxnet_option behavior (#14743)
add 680bade Use correct stash name when running nightly tests (#14809)
add 5dd9fa2 [clojure][generator] ndarray/symbol api random merged (#14800)
add 369b66d Improve cached_op performance for static mode (#14785)
add 40e3d7c data preparation file moved in example (#14781)
add 6cbc273 Scala/Java Predict API fix #14756 (#14804)
add 003800c clean up submodule (#14645)
add 6c60025 [MKLDNN]Refactor requantize to speed up execution (#14608)
add c18381d [MXNET-1398] Enable zero-copy from numpy to MXNet NDArray
(#14733)
add 64287dd Speed up SequenceReverse (#14627)
add 1af29e9 Fixes for wine detection tutorial (#13886)
add 3a46980 Use USE_SIGNAL_HANDLER by default set to ON in CMakeLists.txt
(#14599)
add 07aef13 Add unpooled gpu memory type (#14716)
add 1238aa0 Revert "use mkl sparse matrix to improve performance
(#14492)" (#14806)
add c7577e5 added extraction/generation of diagonal and triangonal
matrices to linalg (#14501)
add 5fda0a5 add clojure tutorials to index (#14814)
add 488fad2 Fix iterator over symbol when multiple children have the same
name (#14597)
add cdd7087 Fix Clojure BERT example's context argument (#14843)
add 84c1635 [MKLDNN] add quantized sum (#14614)
add bde1b84 [int8] Add MobileNetV2_1.0 & ResNet18 Quantization (#14823)
add 1c874cf reformat trt to use subgraph API, add fp16 support (#14040)
add 5e5a59e Upgrade Pylint version to 2.3.1 (#14807)
add 977e558 Make docblocks for Gluon BatchNorm and SyncBatchNorm
consistent with the code (#14840)
add 381a9da Print reproduction command on CI failure (#14815)
add e17b7e2 [MXNET-13578] Fix cmake installation failed (#14692)
add 36c3306 Update base CUDA image for CI to v10.0 cuDNN 7.3.1 (#14513)
add 1540a84 [Clojure] Remove unneeded test files (#14813)
add 372f531 [DEV] update code owner (#14862)
add 204f3f2 Revert "Improve cached_op performance for static mode
(#14785)" (#14868)
add d09f68a Update lstm_crf.py (#14865)
add 5ba285b Fix sample_multinomial number of outputs bug (#14873)
add 4d7bae1 Add the Gluon Implementation of Deformable Convolution
(#14810)
add 621b391 Refactor ImageRecordIter (#14824)
add 25ba1d1 Prevent crashes for opencv exception and std::exception
(#14433)
add b30949f [MXNet-1211] Factor and "Like" modes in BilinearResize2D
operator (#13226)
add 2113cb7 [Clojure] Add Fine Tuning Sentence Pair Classification BERT
Example (#14769)
add f6ef206 [Bugfix] Fix layer norm for large input shape (#14870)
add 42ede50 rewrite test_custom_op_exc (#14878)
add a722db4 [Dependency Update] Upgrade openssl to 1.1.1b (#14837)
add 0255dd6 [Dependency Update] Upgrade cuDNN & NCCL (#14884)
add fdd45cf Add mkldnn_version.h to pip package (#14899)
add 5bda980 fix add_n bug: when input mem overlap with output mem,
results is wrong (#14889)
add 08895b7 Fix the return type of sparse.clip operator (#14856)
add 527573e Add support for fast variable-length LSTM (#14208)
add 0ddef13 Revert "[Dependency Update] Upgrade cuDNN & NCCL (#14884)"
(#14910)
add 2e03e9f Adds additional CUDA build environments (#14909)
add 8dddac0 [MXNET-1400] adding tests cases to verify large tensor
support for depth_to_space and space_to_depth (#14797)
add da02488 upgrade the libpng to 1.6.35 (#14620)
add 13f81a0 Improve dev_menu virtualenv handling (#14788)
add 874fb89 Add API documentation for upsampling operator with examples
(#14919)
add f4598e7 Pins version of scikit-learn for python2 due to drop in
support (#14928)
add 4796851 Deprecate NDArrayCollector and instead use ResourceScope
(#14780)
add b22ee95 [MXNET-857] Add initial NVTX profiler implementation (#12328)
add 8a4ad9f upgrade the version to 2.0.2 (#14621)
add 669ab2c Updates to cudnn package installation (#14923)
add d577b6f [MXNET-1352] Allow dynamic shape in while_loop and if
conditionals (#14393)
add f67d067 Add numpy linspace (#14927)
add 13d6ee6 Fix reshape to add in-place back (#14903)
add 31225485 use mx.context.num_gpus instead of mx.test_utils.list_gpus in
MF recommender example (#14926)
add 1eba37a Re-enable static cached_op optimization (#14931)
add 99f5f66 Disables TensorRT build step (#14958)
add f7b7163 Fixed and re-enables TensorRT steps (#14960)
add 8b7e374 Fixes call to build ubuntu gpu in nightly tests (#14964)
add 1b4e604 [MXNet-1334][Fit API]base class for estimator and
eventhandler (#14346)
add 5b1eb20 Fixed issue where the estimator was printing beyond the
dataset size … (#14464)
add 02e7c9b [MXNet-1349][Fit API]Add validation support and unit tests
for fit() API (#14442)
add 92c3c21 [MXNet-1340][Fit API]Update train stats (#14494)
add b11114a [MXNet-1375][Fit API]Added RNN integration test for fit() API
(#14547)
add ca2d884 [MXNet-1343][Fit API]Add CNN integration test for fit() API
(#14405)
add fcee290 [MXNET-1344, 1346][FIT API] Retrieve Batch size and Logging
verbose support for Gluon fit() API (#14587)
add 768470e move estimator to contrib (#14633)
add 6c455ef move to gluon contrib (#14635)
add 900b449 [Fit API] improve event handlers (#14685)
add 5ac7751 [MXNET-1396][Fit-API] Update default handler logic (#14765)
add d57a712 [Fit API] update estimator (#14849)
add 3b17837 [Fit-API] Adress PR comments (#14885)
This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version. This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:
* -- * -- B -- O -- O -- O (588730c)
\
N -- N -- N refs/heads/fit-api (3b17837)
You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.
Any revisions marked "omit" are not gone; other references still
refer to them. Any revisions marked "discard" are gone forever.
No new revisions were added by this update.
Summary of changes:
.gitignore | 2 +-
3rdparty/dmlc-core | 2 +-
3rdparty/googletest | 2 +-
3rdparty/mshadow | 2 +-
3rdparty/onnx-tensorrt | 2 +-
CMakeLists.txt | 50 +-
CODEOWNERS | 25 +-
CONTRIBUTORS.md | 4 +
LICENSE | 42 +-
Makefile | 12 +-
R-package/src/ndarray.cc | 6 +-
R-package/src/symbol.cc | 20 +-
README.md | 2 +-
amalgamation/amalgamation.py | 27 +-
ci/Jenkinsfile_utils.groovy | 33 +-
ci/build.py | 1 +
ci/docker/Dockerfile.build.centos7_gpu | 6 +-
ci/docker/Dockerfile.build.ubuntu_base_gpu | 7 +-
ci/docker/Dockerfile.build.ubuntu_build_cuda | 6 +-
...buntu_gpu => Dockerfile.build.ubuntu_gpu_cu100} | 6 +-
...ubuntu_gpu => Dockerfile.build.ubuntu_gpu_cu80} | 2 +-
...ubuntu_gpu => Dockerfile.build.ubuntu_gpu_cu90} | 6 +-
...ubuntu_gpu => Dockerfile.build.ubuntu_gpu_cu92} | 6 +-
ci/docker/Dockerfile.build.ubuntu_gpu_tensorrt | 7 +-
ci/docker/Dockerfile.build.ubuntu_nightly_gpu | 7 +-
ci/docker/install/centos7_cudnn.sh | 59 +
ci/docker/install/tensorrt.sh | 3 +-
ci/docker/install/ubuntu_core.sh | 1 +
.../install/{ubuntu_python.sh => ubuntu_cudnn.sh} | 36 +-
ci/docker/install/ubuntu_nvidia.sh | 2 +-
ci/docker/install/ubuntu_publish.sh | 4 +-
ci/docker/install/ubuntu_python.sh | 4 +-
ci/docker/install/ubuntu_tutorials.sh | 4 +-
ci/docker/runtime_functions.sh | 68 +-
ci/jenkins/Jenkins_steps.groovy | 83 +-
ci/jenkins/Jenkinsfile_unix_cpu | 3 +-
ci/jenkins/Jenkinsfile_unix_gpu | 4 +-
.../Modules/FindNVTX.cmake | 27 +-
cmake/Utils.cmake | 2 +-
contrib/clojure-package/.gitignore | 6 +
contrib/clojure-package/examples/bert/.gitignore | 18 +
contrib/clojure-package/examples/bert/README.md | 156 +++
.../examples/bert/fine-tune-bert.ipynb | 510 +++++++
.../examples/bert/fine-tune-bert.md | 371 +++++
.../clojure-package/examples/bert/get_bert_data.sh | 32 +
.../{infer/objectdetector => bert}/project.clj | 24 +-
.../examples/bert/squad-samples.edn | 39 +
.../bert/src/bert/bert_sentence_classification.clj | 160 +++
.../examples/bert/src/bert/infer.clj | 129 ++
.../examples/bert/src/bert/util.clj | 52 +
.../bert/bert_sentence_classification_test.clj | 86 ++
.../examples/bert/test/bert/infer_test.clj | 43 +
.../examples/infer/objectdetector/project.clj | 1 -
.../infer/objectdetector/src/infer/draw.clj | 44 -
.../src/infer/objectdetector_example.clj | 59 +-
.../test/infer/objectdetector_example_test.clj | 6 +-
contrib/clojure-package/integration-tests.sh | 2 +-
contrib/clojure-package/src/dev/generator.clj | 592 ++++++--
.../src/org/apache/clojure_mxnet/callback.clj | 9 +-
.../src/org/apache/clojure_mxnet/image.clj | 26 +-
.../src/org/apache/clojure_mxnet/module.clj | 544 +++++---
.../{callback.clj => ndarray_api.clj} | 29 +-
.../{callback.clj => ndarray_random_api.clj} | 25 +-
.../src/org/apache/clojure_mxnet/symbol_api.clj | 32 +
.../org/apache/clojure_mxnet/symbol_random_api.clj | 32 +
.../src/org/apache/clojure_mxnet/util.clj | 8 +-
.../clojure-package/test/dev/generator_test.clj | 183 ++-
.../clojure-package/test/good-test-ndarray-api.clj | 170 +++
.../test/good-test-ndarray-random-api.clj | 95 ++
.../clojure-package/test/good-test-symbol-api.clj | 192 +++
.../test/good-test-symbol-random-api.clj | 118 ++
.../test/org/apache/clojure_mxnet/conv_test.clj | 24 +-
.../test/org/apache/clojure_mxnet/image_test.clj | 16 +-
.../org/apache/clojure_mxnet/ndarray_api_test.clj | 415 ++++++
.../org/apache/clojure_mxnet/symbol_api_test.clj | 61 +
cpp-package/example/charRNN.cpp | 5 +-
.../example/inference/inception_inference.cpp | 2 +-
cpp-package/example/resnet.cpp | 2 +-
cpp-package/example/test_kvstore.cpp | 201 +++
cpp-package/include/mxnet-cpp/kvstore.h | 13 +-
cpp-package/include/mxnet-cpp/kvstore.hpp | 78 +-
cpp-package/include/mxnet-cpp/ndarray.hpp | 14 +-
cpp-package/include/mxnet-cpp/symbol.hpp | 32 +-
cpp-package/tests/ci_test.sh | 7 +-
cpp-package/tests/travis/setup.sh | 2 +-
dev_menu.py | 33 +-
docs/api/python/gluon/contrib.md | 15 +
docs/api/python/ndarray/linalg.md | 6 +-
docs/api/python/profiler/profiler.md | 4 +-
docs/api/python/symbol/linalg.md | 6 +-
docs/architecture/exception_handling.md | 3 -
docs/architecture/note_data_loading.md | 8 +-
docs/faq/env_var.md | 9 +-
docs/install/index.md | 38 +-
docs/install/requirements.txt | 2 +-
docs/install/ubuntu_setup.md | 97 +-
docs/tutorials/embedded/wine_detector.md | 51 +-
docs/tutorials/index.md | 21 +-
docs/tutorials/python/profiler.md | 25 +-
docs/tutorials/python/profiler_nvprof.png | Bin 0 -> 235747 bytes
docs/tutorials/python/profiler_nvprof_zoomed.png | Bin 0 -> 254663 bytes
docs/tutorials/python/profiler_winograd.png | Bin 0 -> 75450 bytes
example/README.md | 1 -
example/gluon/{super_resolution => }/data.py | 0
example/gluon/lstm_crf/lstm_crf.py | 2 +-
example/quantization/README.md | 70 +-
example/quantization/imagenet_gen_qsym_mkldnn.py | 20 +-
example/quantization/imagenet_inference.py | 54 +-
example/recommenders/demo1-MF.ipynb | 2 +-
example/ssd/README.md | 56 +-
example/ssd/dataset/mscoco.py | 10 +-
example/ssd/dataset/names/mscoco.names | 12 +-
example/ssd/symbol/legacy_vgg16_ssd_300.py | 3 +-
example/ssd/symbol/legacy_vgg16_ssd_512.py | 3 +-
example/ssd/symbol/symbol_builder.py | 3 +-
example/ssd/train.py | 5 +-
example/ssd/train/metric.py | 11 +
example/ssd/train/train_net.py | 8 +-
include/mxnet/c_api.h | 285 +++-
include/mxnet/c_api_error.h | 25 +-
include/mxnet/engine.h | 6 +-
include/mxnet/imperative.h | 16 +
include/mxnet/libinfo.h | 6 +-
include/mxnet/ndarray.h | 37 +-
include/mxnet/tensor_blob.h | 16 +-
include/mxnet/tuple.h | 176 ++-
julia/test/unittest/ndarray.jl | 22 +-
make/config.mk | 12 +-
make/crosscompile.jetson.mk | 9 +-
make/osx.mk | 6 +
perl-package/AI-MXNet/lib/AI/MXNet/Executor.pm | 2 +-
perl-package/AI-MXNet/lib/AI/MXNet/NDArray.pm | 4 +-
perl-package/AI-MXNet/lib/AI/MXNet/Symbol.pm | 4 +-
perl-package/AI-MXNetCAPI/mxnet.i | 172 +--
perl-package/AI-MXNetCAPI/mxnet_typemaps.i | 22 +-
python/mxnet/__init__.py | 2 +-
python/mxnet/base.py | 153 +-
.../mxnet/contrib/onnx/mx2onnx/_export_helper.py | 2 +-
.../mxnet/contrib/onnx/mx2onnx/_op_translations.py | 2 +-
.../contrib/onnx/onnx2mx/_translation_utils.py | 2 +-
python/mxnet/contrib/quantization.py | 1 +
python/mxnet/contrib/tensorrt.py | 119 +-
python/mxnet/contrib/text/vocab.py | 2 +-
python/mxnet/executor.py | 50 +-
python/mxnet/gluon/contrib/__init__.py | 2 +
python/mxnet/gluon/contrib/{ => cnn}/__init__.py | 10 +-
python/mxnet/gluon/contrib/cnn/conv_layers.py | 221 +++
python/mxnet/gluon/contrib/nn/basic_layers.py | 8 +-
python/mxnet/gluon/nn/activations.py | 3 +-
python/mxnet/gluon/nn/basic_layers.py | 8 +-
python/mxnet/gluon/parameter.py | 2 +-
python/mxnet/gluon/rnn/rnn_layer.py | 41 +-
python/mxnet/gluon/trainer.py | 4 +-
python/mxnet/gluon/utils.py | 12 +-
python/mxnet/image/detection.py | 2 +-
python/mxnet/image/image.py | 8 +-
python/mxnet/io/io.py | 2 +
python/mxnet/metric.py | 132 +-
python/mxnet/model.py | 4 +-
python/mxnet/module/base_module.py | 2 +-
python/mxnet/module/module.py | 16 +-
python/mxnet/module/python_module.py | 2 +-
python/mxnet/ndarray/_internal.py | 2 -
python/mxnet/ndarray/contrib.py | 5 +-
python/mxnet/ndarray/ndarray.py | 188 ++-
python/mxnet/ndarray/register.py | 7 +-
python/mxnet/ndarray/sparse.py | 4 +-
python/mxnet/ndarray_doc.py | 2 +-
python/mxnet/operator.py | 28 +-
python/mxnet/optimizer/optimizer.py | 2 +-
python/mxnet/recordio.py | 4 +
python/mxnet/rnn/rnn_cell.py | 4 +-
python/mxnet/symbol/_internal.py | 2 -
python/mxnet/symbol/register.py | 7 +-
python/mxnet/symbol/symbol.py | 196 ++-
python/mxnet/test_utils.py | 14 +-
python/mxnet/visualization.py | 4 +
.../assembly/src/main/assembly/assembly.xml | 13 +-
.../assembly/src/main/assembly/javadoc.xml | 2 +-
.../assembly/src/main/assembly/source.xml | 2 +-
scala-package/core/pom.xml | 5 +-
.../src/main/scala/org/apache/mxnet/Executor.scala | 143 +-
.../scala/org/apache/mxnet/ExecutorManager.scala | 27 +-
.../main/scala/org/apache/mxnet/FeedForward.scala | 27 +-
.../core/src/main/scala/org/apache/mxnet/IO.scala | 35 +-
.../src/main/scala/org/apache/mxnet/Image.scala | 8 +
.../src/main/scala/org/apache/mxnet/LibInfo.scala | 32 +-
.../scala/org/apache/mxnet/MX_PRIMITIVES.scala | 2 +
.../src/main/scala/org/apache/mxnet/NDArray.scala | 29 +-
.../scala/org/apache/mxnet/NDArrayCollector.scala | 6 +
.../main/scala/org/apache/mxnet/NumpyScope.scala | 63 +
.../scala/org/apache/mxnet/ResourceScope.scala | 46 +-
.../src/main/scala/org/apache/mxnet/Symbol.scala | 93 +-
.../scala/org/apache/mxnet/io/MXDataIter.scala | 3 +-
.../scala/org/apache/mxnet/io/NDArrayIter.scala | 5 +-
.../org/apache/mxnet/io/PrefetchingIter.scala | 87 +-
.../scala/org/apache/mxnet/javaapi/Image.scala | 25 +
.../scala/org/apache/mxnet/module/BaseModule.scala | 20 +-
.../org/apache/mxnet/module/BucketingModule.scala | 60 +-
.../scala/org/apache/mxnet/module/Module.scala | 157 ++-
.../org/apache/mxnet/module/SequentialModule.scala | 151 +-
.../org/apache/mxnet/util/OptionConversion.scala | 2 +
.../java/org/apache/mxnet/javaapi/NDArrayTest.java | 4 +-
.../src/test/scala/org/apache/mxnet/IOSuite.scala | 29 +-
.../test/scala/org/apache/mxnet/ImageSuite.scala | 10 +-
.../test/scala/org/apache/mxnet/KVStoreSuite.scala | 3 -
.../org/apache/mxnet/ModelParallelSuite.scala | 5 -
.../test/scala/org/apache/mxnet/ModuleSuite.scala | 32 +-
.../scala/org/apache/mxnet/NumpyScopeSuite.scala} | 26 +-
.../scala/org/apache/mxnet/OperatorSuite.scala | 6 -
.../test/scala/org/apache/mxnet/SymbolSuite.scala | 14 -
.../scala/org/apache/mxnet/train/ConvSuite.scala | 1 +
scala-package/deploy/pom.xml | 3 +
scala-package/deploy/src/main/deploy/deploy.xml | 2 +-
scala-package/examples/pom.xml | 4 -
.../mxnetexamples/javaapi/infer/bert/BertQA.java | 6 +-
.../benchmark/ScalaInferenceBenchmark.scala | 8 +-
.../CNNTextClassification.scala | 8 +-
.../mxnetexamples/customop/ExampleCustomOp.scala | 6 +-
.../customop/ExampleCustomOpWithRtc.scala | 2 +-
.../org/apache/mxnetexamples/gan/GanMnist.scala | 6 +-
.../datasets/SyntheticDataIter.scala | 2 +-
.../imageclassifier/ImageClassifierExample.scala | 4 +-
.../objectdetector/SSDClassifierExample.scala | 4 +-
.../org/apache/mxnetexamples/module/MnistMlp.scala | 2 +-
.../mxnetexamples/module/SequentialModuleEx.scala | 2 +-
.../mxnetexamples/multitask/ExampleMultiTask.scala | 15 +-
.../mxnetexamples/neuralstyle/NeuralStyle.scala | 6 +-
.../neuralstyle/end2end/BoostInference.scala | 6 +-
.../neuralstyle/end2end/BoostTrain.scala | 6 +-
.../neuralstyle/end2end/DataProcessing.scala | 4 +-
.../org/apache/mxnetexamples/rnn/BucketIo.scala | 12 +-
.../apache/mxnetexamples/rnn/LstmBucketing.scala | 2 +-
.../org/apache/mxnetexamples/rnn/TestCharRnn.scala | 2 +-
.../apache/mxnetexamples/rnn/TrainCharRnn.scala | 9 +-
.../benchmark/ScalaInferenceBenchmarkSuite.scala | 1 +
.../CNNClassifierExampleSuite.scala | 3 +-
.../customop/CustomOpExampleSuite.scala | 1 +
.../apache/mxnetexamples/gan/GanExampleSuite.scala | 3 +-
.../IMClassificationExampleSuite.scala | 1 +
.../ImageClassifierExampleSuite.scala | 1 +
.../ObjectDetectorExampleSuite.scala | 1 +
.../neuralstyle/NeuralStyleSuite.scala | 3 +-
.../apache/mxnetexamples/rnn/ExampleRNNSuite.scala | 3 +-
scala-package/infer/pom.xml | 4 -
.../scala/org/apache/mxnet/infer/Classifier.scala | 17 +
.../org/apache/mxnet/infer/ImageClassifier.scala | 36 +-
.../org/apache/mxnet/infer/MXNetHandler.scala | 14 +
.../org/apache/mxnet/infer/ObjectDetector.scala | 19 +
.../scala/org/apache/mxnet/infer/Predictor.scala | 38 +-
.../mxnet/infer/javaapi/ObjectDetector.scala | 80 +-
.../org/apache/mxnet/infer/javaapi/Predictor.scala | 52 +-
.../org/apache/mxnet/infer/ClassifierSuite.scala | 4 +-
.../main/scala/org/apache/mxnet/init/Base.scala | 27 +-
.../main/scala/org/apache/mxnet/init/LibInfo.scala | 30 +
scala-package/macros/pom.xml | 4 -
.../scala/org/apache/mxnet/APIDocGenerator.scala | 101 +-
.../scala/org/apache/mxnet/GeneratorBase.scala | 81 +-
.../main/scala/org/apache/mxnet/NDArrayMacro.scala | 50 +-
.../main/scala/org/apache/mxnet/SymbolMacro.scala | 47 +-
.../apache/mxnet/javaapi/JavaNDArrayMacro.scala | 17 +-
.../org/apache/mxnet/utils/CToScalaUtils.scala | 15 +-
.../src/main/java/mxnet/NDArrayCreation.java | 2 +-
.../src/main/java/mxnet/NDArrayOperation.java | 2 +-
.../main/native/org_apache_mxnet_native_c_api.cc | 219 ++-
.../main/native/org_apache_mxnet_native_c_api.h | 32 +
scala-package/pom.xml | 50 +-
scala-package/spark/pom.xml | 5 +
.../scala/org/apache/mxnet/spark/MXNDArray.scala | 2 +-
.../scala/org/apache/mxnet/spark/MXNetModel.scala | 2 +-
.../apache/mxnet/spark/io/LabeledPointIter.scala | 4 +-
.../mxnet/spark/io/LongLivingDataBatch.scala | 3 +-
.../org/apache/mxnet/spark/io/PointIter.scala | 4 +-
.../org/apache/mxnet/spark/utils/Network.scala | 22 +-
.../org/apache/mxnet/spark/MXNetGeneralSuite.scala | 1 +
.../apache/mxnet/spark/SharedSparkContext.scala | 12 +-
src/c_api/c_api.cc | 129 +-
src/c_api/c_api_common.h | 31 +
src/c_api/c_api_executor.cc | 482 ++++++-
src/c_api/c_api_ndarray.cc | 12 +
src/c_api/c_api_symbolic.cc | 107 +-
src/c_api/c_predict_api.cc | 1 +
src/common/exec_utils.h | 4 +-
src/common/serialization.h | 318 -----
src/common/utils.h | 60 +-
src/engine/naive_engine.cc | 3 +
src/engine/threaded_engine.cc | 25 +
src/engine/threaded_engine.h | 43 +-
src/executor/attach_op_execs_pass.cc | 8 +-
src/executor/exec_pass.h | 21 +-
src/executor/graph_executor.cc | 226 ++-
src/executor/graph_executor.h | 3 +
src/executor/infer_graph_attr_pass.cc | 20 +-
src/executor/tensorrt_pass.cc | 596 --------
src/executor/trt_graph_executor.cc | 443 ------
src/executor/trt_graph_executor.h | 111 --
src/imperative/cached_op.cc | 10 +-
src/imperative/cached_op.h | 8 +-
src/imperative/imperative.cc | 4 +-
src/imperative/imperative_utils.cc | 61 +-
src/imperative/imperative_utils.h | 56 +-
src/initialize.cc | 4 +-
src/io/image_aug_default.cc | 2 +-
src/io/image_det_aug_default.cc | 4 +-
src/io/image_io.cc | 6 +-
src/io/image_iter_common.h | 10 +
src/io/iter_batchloader.h | 2 +-
src/io/iter_image_recordio_2.cc | 119 +-
src/io/iter_sparse_batchloader.h | 2 +-
src/kvstore/gradient_compression.cc | 10 +-
src/libinfo.cc | 3 +
src/ndarray/ndarray.cc | 55 +-
src/ndarray/ndarray_function.cc | 12 +-
src/ndarray/ndarray_function.h | 2 +-
src/nnvm/plan_memory.cc | 6 +-
src/operator/batch_norm_v1-inl.h | 2 +-
src/operator/bilinear_sampler-inl.h | 4 +-
src/operator/channel_op_common.h | 4 +
src/operator/contrib/adamw-inl.h | 5 +-
src/operator/contrib/adaptive_avg_pooling-inl.h | 6 +-
src/operator/contrib/bilinear_resize-inl.h | 175 ++-
src/operator/contrib/bilinear_resize.cc | 52 +-
src/operator/contrib/bilinear_resize.cu | 37 +-
src/operator/contrib/boolean_mask.cc | 2 +-
src/operator/contrib/bounding_box-inl.h | 11 +-
src/operator/contrib/count_sketch-inl.h | 2 +-
src/operator/contrib/deformable_convolution-inl.h | 14 +-
src/operator/contrib/dgl_graph.cc | 68 +-
src/operator/contrib/fft-inl.h | 2 +-
src/operator/contrib/ifft-inl.h | 2 +-
src/operator/contrib/index_copy-inl.h | 5 +-
src/operator/contrib/multi_proposal-inl.h | 18 +-
src/operator/contrib/multibox_detection-inl.h | 4 +-
src/operator/contrib/multibox_detection.cc | 2 +-
src/operator/contrib/multibox_detection.cu | 2 +-
src/operator/contrib/multibox_prior-inl.h | 12 +-
src/operator/contrib/multibox_prior.cc | 7 +-
src/operator/contrib/multibox_prior.cu | 5 +-
src/operator/contrib/multibox_target-inl.h | 2 +-
src/operator/contrib/multibox_target.cc | 2 +-
src/operator/contrib/multibox_target.cu | 2 +-
src/operator/contrib/optimizer_op.cc | 2 +-
src/operator/contrib/proposal-inl.h | 18 +-
src/operator/contrib/quadratic_op-inl.h | 2 +-
src/operator/contrib/sync_batch_norm-inl.h | 2 +-
src/operator/contrib/tensorrt-inl.h | 79 --
src/operator/contrib/tensorrt.cc | 181 ---
src/operator/contrib/transformer-inl.h | 4 +-
src/operator/control_flow.cc | 328 ++---
src/operator/convolution_v1-inl.h | 10 +-
src/operator/cudnn_rnn-inl.h | 863 ------------
src/operator/custom/custom-inl.h | 45 +-
src/operator/custom/custom.cc | 18 +-
src/operator/image/image_random-inl.h | 18 +-
src/operator/image/resize-inl.h | 4 +-
src/operator/leaky_relu-inl.h | 6 +-
src/operator/loss_binary_op-inl.h | 2 +-
src/operator/mshadow_op.h | 97 +-
src/operator/mxnet_op.h | 89 +-
src/operator/nn/batch_norm.cc | 2 +-
src/operator/nn/concat.cc | 51 +-
src/operator/nn/convolution-inl.h | 18 +-
src/operator/nn/convolution.cc | 85 +-
src/operator/nn/ctc_loss-inl.h | 2 +-
src/operator/nn/cudnn/cudnn_algoreg-inl.h | 2 +-
src/operator/nn/cudnn/cudnn_batch_norm.cc | 2 +-
src/operator/nn/cudnn/cudnn_convolution-inl.h | 6 +-
src/operator/nn/cudnn/cudnn_deconvolution-inl.h | 6 +-
src/operator/nn/deconvolution-inl.h | 30 +-
src/operator/nn/deconvolution.cc | 50 +-
src/operator/nn/dropout-inl.h | 7 +-
src/operator/nn/dropout.cc | 4 +-
src/operator/nn/fully_connected.cc | 4 +-
src/operator/nn/im2col.h | 4 +-
src/operator/nn/layer_norm-inl.h | 10 +-
src/operator/nn/layer_norm.cc | 6 +-
src/operator/nn/lrn.cc | 2 +-
src/operator/nn/mkldnn/mkldnn_act-inl.h | 74 +
src/operator/nn/mkldnn/mkldnn_act.cc | 91 +-
src/operator/nn/mkldnn/mkldnn_base-inl.h | 4 +-
src/operator/nn/mkldnn/mkldnn_base.cc | 1 +
src/operator/nn/mkldnn/mkldnn_concat.cc | 12 +-
src/operator/nn/mkldnn/mkldnn_ops-inl.h | 12 +
src/operator/nn/mkldnn/mkldnn_reshape.cc | 194 +++
src/operator/nn/mkldnn/mkldnn_slice.cc | 6 +-
src/operator/nn/mkldnn/mkldnn_transpose.cc | 161 +++
src/operator/nn/pooling-inl.h | 14 +-
src/operator/nn/pooling.cc | 6 +-
src/operator/nn/upsampling-inl.h | 4 +-
src/operator/nn/upsampling.cc | 56 +-
src/operator/operator_common.h | 19 +-
src/operator/operator_util.cc | 2 +-
src/operator/optimizer_op-inl.h | 8 +-
src/operator/pad-inl.h | 2 +-
src/operator/pooling_v1-inl.h | 35 +-
src/operator/quantization/dequantize-inl.h | 66 +-
src/operator/quantization/dequantize.cc | 25 +-
src/operator/quantization/dequantize.cu | 2 +-
.../quantization/mkldnn/mkldnn_dequantize-inl.h | 140 +-
.../quantization/mkldnn/mkldnn_quantize_v2-inl.h | 202 +--
.../quantization/mkldnn/mkldnn_quantized_act.cc | 55 +
.../quantization/mkldnn/mkldnn_quantized_conv.cc | 2 +-
.../mkldnn/mkldnn_quantized_elemwise_add.cc | 206 +++
.../mkldnn/mkldnn_quantized_fully_connected.cc | 4 +-
.../quantization/mkldnn/mkldnn_requantize-inl.h | 91 +-
src/operator/quantization/quantization_utils.h | 79 +-
src/operator/quantization/quantize-inl.h | 4 +-
src/operator/quantization/quantize_graph_pass.cc | 32 +-
src/operator/quantization/quantize_v2-inl.h | 216 ++-
src/operator/quantization/quantize_v2.cc | 21 +-
src/operator/quantization/quantize_v2.cu | 2 +-
src/operator/quantization/quantized_activation.cc | 138 ++
src/operator/quantization/quantized_concat.cc | 18 +-
src/operator/quantization/quantized_conv.cc | 4 +-
src/operator/quantization/quantized_conv.cu | 2 +-
.../quantization/quantized_elemwise_add-inl.h | 58 +
.../quantization/quantized_elemwise_add.cc | 141 ++
src/operator/quantization/quantized_flatten-inl.h | 6 +-
.../quantization/quantized_fully_connected.cc | 8 +-
.../quantization/quantized_fully_connected.cu | 2 +-
src/operator/quantization/quantized_pooling.cc | 4 +-
src/operator/quantization/requantize-inl.h | 25 +-
src/operator/quantization/requantize.cc | 4 +
src/operator/random/multisample_op.h | 2 +-
src/operator/random/sample_multinomial_op.h | 19 +-
src/operator/random/unique_sample_op.h | 2 +-
src/operator/regression_output-inl.h | 2 +-
src/operator/rnn-inl.h | 1466 +++++++++++++++-----
src/operator/rnn.cc | 230 ++-
src/operator/rnn.cu | 21 +-
src/operator/sequence_last-inl.h | 2 +-
src/operator/sequence_reverse-inl.h | 57 +-
src/operator/slice_channel-inl.h | 17 +-
src/operator/softmax_output-inl.h | 12 +-
src/operator/softmax_output.cc | 12 +-
src/operator/spatial_transformer-inl.h | 4 +-
src/operator/subgraph/build_subgraph.cc | 16 +-
src/operator/subgraph/mkldnn/mkldnn_conv.cc | 64 +-
src/operator/subgraph/mkldnn/mkldnn_fc.cc | 4 +-
..._property.h => mkldnn_post_quantize_property.h} | 63 +-
.../subgraph/mkldnn/mkldnn_subgraph_property.cc | 4 +-
.../tensorrt}/nnvm_to_onnx-inl.h | 131 +-
.../{contrib => subgraph/tensorrt}/nnvm_to_onnx.cc | 284 ++--
.../subgraph/tensorrt}/onnx_to_tensorrt.cc | 27 +-
.../subgraph/tensorrt}/onnx_to_tensorrt.h | 18 +-
src/operator/subgraph/tensorrt/tensorrt-inl.h | 240 ++++
src/operator/subgraph/tensorrt/tensorrt.cc | 336 +++++
.../{contrib => subgraph/tensorrt}/tensorrt.cu | 30 +-
src/operator/subgraph_op_common.cc | 4 +-
src/operator/subgraph_op_common.h | 12 +-
src/operator/svm_output-inl.h | 6 +-
src/operator/swapaxis-inl.h | 10 +-
src/operator/tensor/broadcast_reduce-inl.cuh | 74 +-
src/operator/tensor/broadcast_reduce-inl.h | 51 +-
src/operator/tensor/broadcast_reduce_op.h | 346 +++--
src/operator/tensor/broadcast_reduce_op_value.cc | 6 +-
src/operator/tensor/diag_op-inl.h | 12 +-
src/operator/tensor/dot-inl.h | 12 +-
src/operator/tensor/elemwise_binary_broadcast_op.h | 53 +-
src/operator/tensor/elemwise_binary_op_basic.cc | 3 +
src/operator/tensor/elemwise_unary_op_basic.cc | 10 +-
src/operator/tensor/histogram-inl.h | 14 +-
src/operator/tensor/indexing_op.h | 31 +-
src/operator/tensor/init_op.cc | 11 +
src/operator/tensor/init_op.cu | 3 +
src/operator/tensor/init_op.h | 87 +-
src/operator/tensor/la_op-inl.h | 94 ++
src/operator/tensor/la_op.cc | 231 +++
src/operator/tensor/la_op.cu | 24 +
src/operator/tensor/la_op.h | 70 +-
src/operator/tensor/matrix_op-inl.h | 377 ++---
src/operator/tensor/matrix_op.cc | 68 +-
src/operator/tensor/ordering_op-inl.h | 2 +-
src/operator/tensor/slice-inl.h | 6 +-
src/operator/tensor/sparse_retain-inl.h | 2 +-
src/operator/tensor/square_sum-inl.h | 9 +-
.../dequantize.cu => profiler/nvtx.cc} | 15 +-
.../quantize_v2.cu => profiler/nvtx.h} | 45 +-
src/profiler/profiler.h | 26 +
src/profiler/storage_profiler.h | 6 +-
src/resource.cc | 14 +-
src/storage/storage.cc | 10 +-
.../cpp/engine/engine_shutdown_test.cc | 31 +-
tests/cpp/engine/threaded_engine_test.cc | 94 +-
tests/cpp/include/test_mkldnn.h | 18 +-
tests/cpp/include/test_util.h | 4 +-
tests/cpp/misc/serialization.cc | 68 -
tests/cpp/operator/batchnorm_test.cc | 4 +-
tests/cpp/operator/mkldnn_operator_test.cc | 6 +-
tests/nightly/JenkinsfileForBinaries | 38 +-
.../nightly/apache_rat_license_check/rat-excludes | 1 +
tests/nightly/test_large_array.py | 58 +-
tests/python/gpu/test_gluon_contrib_gpu.py | 63 +
tests/python/gpu/test_gluon_gpu.py | 55 +
tests/python/gpu/test_gluon_transforms.py | 4 +-
tests/python/gpu/test_operator_gpu.py | 23 +-
tests/python/mkl/test_mkldnn.py | 56 +-
.../python/profiling/simple_forward.py | 35 +-
tests/python/profiling/test_nvtx.py | 52 +
tests/python/quantization/test_quantization.py | 209 ++-
tests/python/tensorrt/lenet5_train.py | 2 +
tests/python/tensorrt/test_cvnets.py | 34 +-
tests/python/tensorrt/test_cycle.py | 69 -
tests/python/tensorrt/test_resnet18.py | 42 +-
tests/python/tensorrt/test_tensorrt_lenet5.py | 91 +-
tests/python/tensorrt/test_training_warning.py | 70 -
tests/python/train/test_dtype.py | 66 +
tests/python/unittest/test_contrib_control_flow.py | 30 +-
tests/python/unittest/test_contrib_operator.py | 14 +-
tests/python/unittest/test_exc_handling.py | 134 +-
tests/python/unittest/test_gluon.py | 2 +-
tests/python/unittest/test_gluon_rnn.py | 6 +-
tests/python/unittest/test_image.py | 13 +
tests/python/unittest/test_infer_shape.py | 16 +
tests/python/unittest/test_metric.py | 82 ++
tests/python/unittest/test_module.py | 28 +
tests/python/unittest/test_ndarray.py | 54 +-
tests/python/unittest/test_operator.py | 655 +++++++--
tests/python/unittest/test_random.py | 12 +
tests/python/unittest/test_sparse_ndarray.py | 1 +
tests/python/unittest/test_symbol.py | 5 +
tools/caffe_converter/compare_layers.py | 5 -
tools/caffe_converter/test_converter.py | 2 -
tools/dependencies/LICENSE.binary.dependencies | 289 ++++
tools/dependencies/eigen.sh | 1 +
tools/dependencies/libpng.sh | 2 +-
tools/dependencies/libtiff.sh | 6 +-
tools/dependencies/libturbojpeg.sh | 2 +-
tools/dependencies/openssl.sh | 2 +-
tools/pip/setup.py | 19 +-
tools/staticbuild/build.sh | 8 +
tools/staticbuild/build_lib.sh | 10 +-
532 files changed, 17774 insertions(+), 7643 deletions(-)
copy ci/docker/{Dockerfile.build.ubuntu_gpu =>
Dockerfile.build.ubuntu_gpu_cu100} (94%)
copy ci/docker/{Dockerfile.build.ubuntu_gpu =>
Dockerfile.build.ubuntu_gpu_cu80} (97%)
copy ci/docker/{Dockerfile.build.ubuntu_gpu =>
Dockerfile.build.ubuntu_gpu_cu90} (94%)
rename ci/docker/{Dockerfile.build.ubuntu_gpu =>
Dockerfile.build.ubuntu_gpu_cu92} (94%)
create mode 100755 ci/docker/install/centos7_cudnn.sh
copy ci/docker/install/{ubuntu_python.sh => ubuntu_cudnn.sh} (56%)
copy ci/docker/install/ubuntu_nvidia.sh => cmake/Modules/FindNVTX.cmake (53%)
mode change 100755 => 100644
create mode 100644 contrib/clojure-package/examples/bert/.gitignore
create mode 100644 contrib/clojure-package/examples/bert/README.md
create mode 100644 contrib/clojure-package/examples/bert/fine-tune-bert.ipynb
create mode 100644 contrib/clojure-package/examples/bert/fine-tune-bert.md
create mode 100755 contrib/clojure-package/examples/bert/get_bert_data.sh
copy contrib/clojure-package/examples/{infer/objectdetector =>
bert}/project.clj (58%)
create mode 100644 contrib/clojure-package/examples/bert/squad-samples.edn
create mode 100644
contrib/clojure-package/examples/bert/src/bert/bert_sentence_classification.clj
create mode 100644 contrib/clojure-package/examples/bert/src/bert/infer.clj
create mode 100644 contrib/clojure-package/examples/bert/src/bert/util.clj
create mode 100644
contrib/clojure-package/examples/bert/test/bert/bert_sentence_classification_test.clj
create mode 100644
contrib/clojure-package/examples/bert/test/bert/infer_test.clj
delete mode 100644
contrib/clojure-package/examples/infer/objectdetector/src/infer/draw.clj
copy contrib/clojure-package/src/org/apache/clojure_mxnet/{callback.clj =>
ndarray_api.clj} (53%)
copy contrib/clojure-package/src/org/apache/clojure_mxnet/{callback.clj =>
ndarray_random_api.clj} (59%)
create mode 100644
contrib/clojure-package/src/org/apache/clojure_mxnet/symbol_api.clj
create mode 100644
contrib/clojure-package/src/org/apache/clojure_mxnet/symbol_random_api.clj
create mode 100644 contrib/clojure-package/test/good-test-ndarray-api.clj
create mode 100644
contrib/clojure-package/test/good-test-ndarray-random-api.clj
create mode 100644 contrib/clojure-package/test/good-test-symbol-api.clj
create mode 100644 contrib/clojure-package/test/good-test-symbol-random-api.clj
create mode 100644
contrib/clojure-package/test/org/apache/clojure_mxnet/ndarray_api_test.clj
create mode 100644
contrib/clojure-package/test/org/apache/clojure_mxnet/symbol_api_test.clj
create mode 100644 cpp-package/example/test_kvstore.cpp
create mode 100644 docs/tutorials/python/profiler_nvprof.png
create mode 100644 docs/tutorials/python/profiler_nvprof_zoomed.png
create mode 100644 docs/tutorials/python/profiler_winograd.png
rename example/gluon/{super_resolution => }/data.py (100%)
copy python/mxnet/gluon/contrib/{ => cnn}/__init__.py (82%)
create mode 100644 python/mxnet/gluon/contrib/cnn/conv_layers.py
create mode 100644
scala-package/core/src/main/scala/org/apache/mxnet/NumpyScope.scala
rename
scala-package/{macros/src/main/scala/org/apache/mxnet/utils/OperatorBuildUtils.scala
=> core/src/test/scala/org/apache/mxnet/NumpyScopeSuite.scala} (58%)
delete mode 100644 src/common/serialization.h
delete mode 100644 src/executor/tensorrt_pass.cc
delete mode 100644 src/executor/trt_graph_executor.cc
delete mode 100644 src/executor/trt_graph_executor.h
delete mode 100644 src/operator/contrib/tensorrt-inl.h
delete mode 100644 src/operator/contrib/tensorrt.cc
delete mode 100644 src/operator/cudnn_rnn-inl.h
create mode 100644 src/operator/nn/mkldnn/mkldnn_act-inl.h
create mode 100644 src/operator/nn/mkldnn/mkldnn_reshape.cc
create mode 100644 src/operator/nn/mkldnn/mkldnn_transpose.cc
create mode 100644 src/operator/quantization/mkldnn/mkldnn_quantized_act.cc
create mode 100644
src/operator/quantization/mkldnn/mkldnn_quantized_elemwise_add.cc
create mode 100644 src/operator/quantization/quantized_activation.cc
create mode 100644 src/operator/quantization/quantized_elemwise_add-inl.h
create mode 100644 src/operator/quantization/quantized_elemwise_add.cc
rename src/operator/subgraph/mkldnn/{mkldnn_conv_post_quantize_property.h =>
mkldnn_post_quantize_property.h} (68%)
rename src/operator/{contrib => subgraph/tensorrt}/nnvm_to_onnx-inl.h (61%)
rename src/operator/{contrib => subgraph/tensorrt}/nnvm_to_onnx.cc (68%)
rename src/{executor => operator/subgraph/tensorrt}/onnx_to_tensorrt.cc (89%)
rename src/{executor => operator/subgraph/tensorrt}/onnx_to_tensorrt.h (88%)
create mode 100644 src/operator/subgraph/tensorrt/tensorrt-inl.h
create mode 100644 src/operator/subgraph/tensorrt/tensorrt.cc
rename src/operator/{contrib => subgraph/tensorrt}/tensorrt.cu (69%)
copy src/{operator/quantization/dequantize.cu => profiler/nvtx.cc} (73%)
copy src/{operator/quantization/quantize_v2.cu => profiler/nvtx.h} (58%)
copy src/operator/quantization/dequantize.cu =>
tests/cpp/engine/engine_shutdown_test.cc (54%)
delete mode 100644 tests/cpp/misc/serialization.cc
create mode 100644 tests/python/gpu/test_gluon_contrib_gpu.py
copy ci/docker/Dockerfile.build.ubuntu_base_gpu =>
tests/python/profiling/simple_forward.py (54%)
create mode 100644 tests/python/profiling/test_nvtx.py
delete mode 100644 tests/python/tensorrt/test_cycle.py
delete mode 100644 tests/python/tensorrt/test_training_warning.py
create mode 100644 tools/dependencies/LICENSE.binary.dependencies