[GitHub] eric-haibin-lin commented on a change in pull request #11357: [MXNET-331] Single machine All Reduce Topology-aware Communication
eric-haibin-lin commented on a change in pull request #11357: [MXNET-331] Single machine All Reduce Topology-aware Communication URL: https://github.com/apache/incubator-mxnet/pull/11357#discussion_r199057710 ## File path: src/kvstore/gpu_topology.h ## @@ -0,0 +1,1043 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/** + * Copyright (c) 2015 by Contributors + */ +#ifndef MXNET_KVSTORE_GPU_TOPOLOGY_H_ +#define MXNET_KVSTORE_GPU_TOPOLOGY_H_ +#if MXNET_USE_CUDA + #include + #include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MXNET_KVSTORE_MAXDEPTH 16 + +namespace mxnet { +namespace kvstore { + +template +inline void PrintVector(const std::string& str, const std::vector& vec) { + std::cout << str << ":\n"; + for (unsigned i = 0; i < vec.size(); ++i) +std::cout << vec[i] << " "; + std::cout << std::endl; +} + +template +inline void PrintMatrix(const std::string& str, const std::vector& matrix, +int num_rows, int num_cols) { + std::cout << str << ":\n"; + int count = 0; + for (int row = 0; row < num_rows; ++row) { +for (int col = 0; col < num_cols; ++col) { + std::cout << matrix[count++] << " "; +} +std::cout << std::endl; + } +} + +inline void PrintTopo(const std::string& str, const std::vector& topo_row, +std::vector scan_row) { + PrintVector("Topo vector", topo_row); + PrintVector("Scan vector", scan_row); + std::cout << str << ":\n"; + int depth = scan_row.size()-1; + for (int row = 0; row < depth; ++row) { +int start = scan_row[row]; +int end = scan_row[row+1]; +for (; start < end; start++) { + for (int i = 0; i < (2 << (depth-row-2))+1; ++i) { +std::cout << " "; + } + std::cout << topo_row[start]; +} +std::cout << std::endl; + } +} + +// Generate adjacency matrix with row/col numbering from 0, 1, ..., n_gpu +// @input: devs is a vector of GPU contexts +// @output: matrix is adjacency matrix of link topology graph +// where edge weight represents relative performance of NVIDIA GPUs +//0: Self-connection +//1: PCI-E +//2: 1 NVLink connection +//3: 2 NVLink connections +template +inline void GetP2PWeight(const std::vector& devs, + std::vector* matrix) { + int num_gpus = devs.size(); + int count= 0; + std::vector zero_dev_id(num_gpus, -1); + for (auto d : devs) { +zero_dev_id[count] = d.dev_id; +count++; + } + +#if MXNET_USE_CUDA + cudaDeviceP2PAttr attr; + attr = cudaDevP2PAttrPerformanceRank; + std::vector max(num_gpus, 0); + + for (int row = 0; row < num_gpus; ++row) { +for (int col = 0; col < num_gpus; ++col) { + if (row == col) { +(*matrix)[row*num_gpus+col] = 0; + } else { +int value; +int row_gpu = zero_dev_id[row]; +int col_gpu = zero_dev_id[col]; +cudaDeviceGetP2PAttribute(, attr, row_gpu, col_gpu); +if (value > max[row]) + max[row] = value; +(*matrix)[row*num_gpus+col] = static_cast(value)+1; + } +} + } + + // Check that all GPUs have at least 1 NVLink connection + int max_value = 0; + for (unsigned int i = 0; i < max.size(); ++i) { +if (max[i] > max_value) + max_value = max[i]; + } + + // If all GPUs have at least 1 NVLink connection, then we can use NVLink only + // to communicate instead of going over PCI-E + if (max_value > 0) { +for (auto& matrix_value : *matrix) { + matrix_value = (matrix_value == 1) ? 0 : matrix_value; +} + } + PrintMatrix("Weight W", *matrix, num_gpus, num_gpus); +#else + LOG(WARNING) << "GPU required for link topology"; +#endif +} + +// Dense matrix-vector multiplication +// Assume: matrix is square +// y = A*x (no accumulate) +template +inline void gemv(const std::vector& A, + const std::vector& x, + std::vector* y) { + int nrows = x.size(); + int count = 0; + for (int row=0; row < nrows; ++row) { +(*y)[row] = 0; +for (int col=0; col < nrows; ++col) { + (*y)[row] += A[count]*static_cast(x[col]); + count++; +} + } +} +
[GitHub] eric-haibin-lin commented on a change in pull request #11357: [MXNET-331] Single machine All Reduce Topology-aware Communication
eric-haibin-lin commented on a change in pull request #11357: [MXNET-331] Single machine All Reduce Topology-aware Communication URL: https://github.com/apache/incubator-mxnet/pull/11357#discussion_r199058554 ## File path: tests/python/gpu/test_kvstore_gpu.py ## @@ -88,34 +89,48 @@ def check_rsp_pull(kv, count, ctxs, is_same_rowid=False, use_slice=False): # test fails intermittently. temporarily disabled till it gets fixed. tracked at https://github.com/apache/incubator-mxnet/issues/9384 # check_rsp_push_pull('local') +os.environ["MXNET_KVSTORE_USETREE"] = "" +check_rsp_push_pull('device') +check_rsp_push_pull('device', is_push_cpu=False) +os.environ["MXNET_KVSTORE_USETREE"] = "1" +logging.info("Setting env to use tree reduce...") check_rsp_push_pull('device') check_rsp_push_pull('device', is_push_cpu=False) def test_row_sparse_pull_single_device(): -kvstore = mx.kv.create('device') -copy = mx.nd.random_normal(shape=(4,4), ctx=mx.gpu(0)) -grad = copy.tostype("row_sparse") +envs = ["","1"] Review comment: minor suggestion: we could add some util class like https://github.com/apache/incubator-mxnet/blob/master/python/mxnet/autograd.py#L93-L119 which manages the scope of such env var. It sets the env var to some var when entering the scope, and reset the env var when exiting the scope. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] eric-haibin-lin commented on a change in pull request #11357: [MXNET-331] Single machine All Reduce Topology-aware Communication
eric-haibin-lin commented on a change in pull request #11357: [MXNET-331] Single machine All Reduce Topology-aware Communication URL: https://github.com/apache/incubator-mxnet/pull/11357#discussion_r199057204 ## File path: src/kvstore/comm.h ## @@ -750,6 +762,8 @@ class CommDevice : public Comm { std::vector compressed_send_buf; /// \brief the small buffer for compressed data in receiver std::vector compressed_recv_buf; +/// \brief size of allocation in case we do not actually allocate merged +TShape merged_size; Review comment: Is this being used? This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] hxhxhx88 opened a new issue #11480: Image classfication example has wrong accuracy metric.
hxhxhx88 opened a new issue #11480: Image classfication example has wrong accuracy metric. URL: https://github.com/apache/incubator-mxnet/issues/11480 In file [example/image-classification/common/fit.py](https://github.com/apache/incubator-mxnet/blob/master/example/image-classification/common/fit.py) at [line 295](https://github.com/apache/incubator-mxnet/blob/master/example/image-classification/common/fit.py#L295), the `mx.callback.Speedometer` is initialized with `auto_reset` unset, which will be its [default value](https://github.com/apache/incubator-mxnet/blob/master/python/mxnet/callback.py#L142), i.e. `True`. This will make the logged **epoch accuracy** incorrect, since the module and the speedometer share the same metric. Following is the current log for 1 epoch: ``` INFO:root:Epoch[1] Batch [20]Speed: 1375.76 samples/sec accuracy=0.519345 INFO:root:Epoch[1] Batch [40]Speed: 1374.71 samples/sec accuracy=0.515625 INFO:root:Epoch[1] Batch [60]Speed: 1371.57 samples/sec accuracy=0.521094 INFO:root:Epoch[1] Batch [80]Speed: 1376.55 samples/sec accuracy=0.533203 INFO:root:Epoch[1] Batch [100] Speed: 1375.78 samples/sec accuracy=0.548828 INFO:root:Epoch[1] Batch [120] Speed: 1370.05 samples/sec accuracy=0.544141 INFO:root:Epoch[1] Batch [140] Speed: 1375.61 samples/sec accuracy=0.566797 INFO:root:Epoch[1] Batch [160] Speed: 1372.94 samples/sec accuracy=0.571094 INFO:root:Epoch[1] Batch [180] Speed: 1328.99 samples/sec accuracy=0.566016 INFO:root:Epoch[1] Batch [200] Speed: 1319.79 samples/sec accuracy=0.576562 INFO:root:Epoch[1] Batch [220] Speed: 1322.05 samples/sec accuracy=0.576172 INFO:root:Epoch[1] Batch [240] Speed: 1318.98 samples/sec accuracy=0.597656 INFO:root:Epoch[1] Batch [260] Speed: 1323.49 samples/sec accuracy=0.593359 INFO:root:Epoch[1] Batch [280] Speed: 1301.38 samples/sec accuracy=0.611719 INFO:root:Epoch[1] Batch [300] Speed: 1299.34 samples/sec accuracy=0.615234 INFO:root:Epoch[1] Batch [320] Speed: 1300.00 samples/sec accuracy=0.622656 INFO:root:Epoch[1] Batch [340] Speed: 1302.01 samples/sec accuracy=0.641406 INFO:root:Epoch[1] Batch [360] Speed: 1300.59 samples/sec accuracy=0.632812 INFO:root:Epoch[1] Batch [380] Speed: 1301.89 samples/sec accuracy=0.623047 INFO:root:Epoch[1] Train-accuracy=0.642969 ``` Following is the log with `auto_reset` set to `False`: ``` INFO:root:Epoch[1] Batch [20]Speed: 1366.77 samples/sec accuracy=0.518601 INFO:root:Epoch[1] Batch [40]Speed: 1373.52 samples/sec accuracy=0.518293 INFO:root:Epoch[1] Batch [60]Speed: 1369.00 samples/sec accuracy=0.518315 INFO:root:Epoch[1] Batch [80]Speed: 1372.24 samples/sec accuracy=0.521991 INFO:root:Epoch[1] Batch [100] Speed: 1377.56 samples/sec accuracy=0.526609 INFO:root:Epoch[1] Batch [120] Speed: 1373.20 samples/sec accuracy=0.530475 INFO:root:Epoch[1] Batch [140] Speed: 1371.03 samples/sec accuracy=0.536070 INFO:root:Epoch[1] Batch [160] Speed: 1367.66 samples/sec accuracy=0.541295 INFO:root:Epoch[1] Batch [180] Speed: 1371.29 samples/sec accuracy=0.543854 INFO:root:Epoch[1] Batch [200] Speed: 1364.62 samples/sec accuracy=0.547069 INFO:root:Epoch[1] Batch [220] Speed: 1370.96 samples/sec accuracy=0.549456 INFO:root:Epoch[1] Batch [240] Speed: 1363.94 samples/sec accuracy=0.553488 INFO:root:Epoch[1] Batch [260] Speed: 1371.96 samples/sec accuracy=0.556454 INFO:root:Epoch[1] Batch [280] Speed: 1368.86 samples/sec accuracy=0.560582 INFO:root:Epoch[1] Batch [300] Speed: 1360.55 samples/sec accuracy=0.564654 INFO:root:Epoch[1] Batch [320] Speed: 1366.20 samples/sec accuracy=0.567465 INFO:root:Epoch[1] Batch [340] Speed: 1366.99 samples/sec accuracy=0.571527 INFO:root:Epoch[1] Batch [360] Speed: 1367.97 samples/sec accuracy=0.575160 INFO:root:Epoch[1] Batch [380] Speed: 1367.14 samples/sec accuracy=0.578043 INFO:root:Epoch[1] Train-accuracy=0.579803 ``` Note that when `auto_reset=True`, the batch-wise accuracy is correct, but epoch-wise is wrong. When `auto_reset=False`, the batch-wise one is incorrect, but epoch-wise is correct. I believe there are two fixes: 1. Manually set `auto_reset=False` to log accumulated accuracy for each batch. 2. Set an independent metric for the speedometer. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL
[GitHub] zheng-da commented on issue #10921: [MXNET-500]Test cases improvement for MKLDNN on Gluon
zheng-da commented on issue #10921: [MXNET-500]Test cases improvement for MKLDNN on Gluon URL: https://github.com/apache/incubator-mxnet/pull/10921#issuecomment-401248185 can you try rebase and see if it works? This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] leezu commented on issue #11314: Embedding Backward (AddTakeGradLargeBatchCaller) non-deterministic nan values
leezu commented on issue #11314: Embedding Backward (AddTakeGradLargeBatchCaller) non-deterministic nan values URL: https://github.com/apache/incubator-mxnet/issues/11314#issuecomment-401248199 There is some evidence that `nan` values can still occur with above workaround applied, though the root cause is still not determined. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] Hui-Li commented on issue #9267: compiling from source failed
Hui-Li commented on issue #9267: compiling from source failed URL: https://github.com/apache/incubator-mxnet/issues/9267#issuecomment-401245124 In case you are using Mac OS, it is probably that you use GCC as the compiler. Change to clang in **config.mk**: ``` export CC = clang export CXX = clang++ export NVCC = nvcc ``` This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] szha commented on issue #11287: [MXNET-548] fixed path for auto_module_index.js
szha commented on issue #11287: [MXNET-548] fixed path for auto_module_index.js URL: https://github.com/apache/incubator-mxnet/pull/11287#issuecomment-401239592 @aaronmarkham could you do a rebase? some tests are already fixed recently. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] szha commented on issue #11152: updating Scala IntelliJ tutorial & installation instructions
szha commented on issue #11152: updating Scala IntelliJ tutorial & installation instructions URL: https://github.com/apache/incubator-mxnet/pull/11152#issuecomment-401239498 @aaronmarkham lol. you can use interactive rebase or squash to squash/fix-up all commits into one before rebasing, so that if there's any conflict you only need to resolve it once. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] szha commented on issue #11442: No module named 'dmlc_tracker'
szha commented on issue #11442: No module named 'dmlc_tracker' URL: https://github.com/apache/incubator-mxnet/issues/11442#issuecomment-401239133 dmlc_tracker will be included in nightly builds starting from tonight. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] zhulihuacmss commented on issue #7146: How to compile Amalgamation for android?
zhulihuacmss commented on issue #7146: How to compile Amalgamation for android? URL: https://github.com/apache/incubator-mxnet/issues/7146#issuecomment-401237164 @Corea i have met the same problem 'can not find openblas.so', how did you solve it? thanks This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] marcoabreu closed pull request #11452: [Nightly Tests] Disable some nightly tests for installation guide
marcoabreu closed pull request #11452: [Nightly Tests] Disable some nightly tests for installation guide URL: https://github.com/apache/incubator-mxnet/pull/11452 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/tests/nightly/Jenkinsfile b/tests/nightly/Jenkinsfile index 4b7a0943701..fb9fbfd0521 100755 --- a/tests/nightly/Jenkinsfile +++ b/tests/nightly/Jenkinsfile @@ -89,11 +89,13 @@ try { node('mxnetlinux-cpu') { ws('workspace/nt-Installation-cpu') { init_git() + //Some install guide tests are currently diabled and tracked here: + //1. https://github.com/apache/incubator-mxnet/issues/11369 + //2. https://github.com/apache/incubator-mxnet/issues/11288 docker_run('ubuntu_base_cpu', 'nightly_test_installation ubuntu_python_cpu_virtualenv', false) - docker_run('ubuntu_base_cpu', 'nightly_test_installation ubuntu_python_cpu_pip', false) - //Docker Install Test is currently disabled and tracked here: https://github.com/apache/incubator-mxnet/issues/11288 + //docker_run('ubuntu_base_cpu', 'nightly_test_installation ubuntu_python_cpu_pip', false) //docker_run('ubuntu_base_cpu', 'nightly_test_installation ubuntu_python_cpu_docker', false) - docker_run('ubuntu_base_cpu', 'nightly_test_installation ubuntu_python_cpu_source', false) + //docker_run('ubuntu_base_cpu', 'nightly_test_installation ubuntu_python_cpu_source', false) } } }, @@ -101,9 +103,11 @@ try { node('mxnetlinux-gpu') { ws('workspace/nt-Installation-gpu') { init_git() + //Some install guide tests are currently diabled and tracked here: + //1. https://github.com/apache/incubator-mxnet/issues/11369 + //2. https://github.com/apache/incubator-mxnet/issues/11288 docker_run('ubuntu_base_gpu', 'nightly_test_installation ubuntu_python_gpu_virtualenv', true) - docker_run('ubuntu_base_gpu', 'nightly_test_installation ubuntu_python_gpu_pip', true) - //Docker Install Test is currently disabled and tracked here: https://github.com/apache/incubator-mxnet/issues/11288 + //docker_run('ubuntu_base_gpu', 'nightly_test_installation ubuntu_python_gpu_pip', true) //docker_run('ubuntu_base_gpu', 'nightly_test_installation ubuntu_python_gpu_docker', true) docker_run('ubuntu_base_gpu', 'nightly_test_installation ubuntu_python_gpu_source', true) } This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[incubator-mxnet] branch master updated: [Nightly Tests] Disable some nightly tests for installation guide (#11452)
This is an automated email from the ASF dual-hosted git repository. marcoabreu pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git The following commit(s) were added to refs/heads/master by this push: new a0669d0 [Nightly Tests] Disable some nightly tests for installation guide (#11452) a0669d0 is described below commit a0669d07d12f7dcad47e8db8befd1e0a99a8203f Author: mbaijal <30911248+mbai...@users.noreply.github.com> AuthorDate: Thu Jun 28 20:27:14 2018 -0700 [Nightly Tests] Disable some nightly tests for installation guide (#11452) * Disable nightly install guide test for pip and build from source on cpu * add link to github issue --- tests/nightly/Jenkinsfile | 14 +- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tests/nightly/Jenkinsfile b/tests/nightly/Jenkinsfile index 4b7a094..fb9fbfd 100755 --- a/tests/nightly/Jenkinsfile +++ b/tests/nightly/Jenkinsfile @@ -89,11 +89,13 @@ try { node('mxnetlinux-cpu') { ws('workspace/nt-Installation-cpu') { init_git() + //Some install guide tests are currently diabled and tracked here: + //1. https://github.com/apache/incubator-mxnet/issues/11369 + //2. https://github.com/apache/incubator-mxnet/issues/11288 docker_run('ubuntu_base_cpu', 'nightly_test_installation ubuntu_python_cpu_virtualenv', false) - docker_run('ubuntu_base_cpu', 'nightly_test_installation ubuntu_python_cpu_pip', false) - //Docker Install Test is currently disabled and tracked here: https://github.com/apache/incubator-mxnet/issues/11288 + //docker_run('ubuntu_base_cpu', 'nightly_test_installation ubuntu_python_cpu_pip', false) //docker_run('ubuntu_base_cpu', 'nightly_test_installation ubuntu_python_cpu_docker', false) - docker_run('ubuntu_base_cpu', 'nightly_test_installation ubuntu_python_cpu_source', false) + //docker_run('ubuntu_base_cpu', 'nightly_test_installation ubuntu_python_cpu_source', false) } } }, @@ -101,9 +103,11 @@ try { node('mxnetlinux-gpu') { ws('workspace/nt-Installation-gpu') { init_git() + //Some install guide tests are currently diabled and tracked here: + //1. https://github.com/apache/incubator-mxnet/issues/11369 + //2. https://github.com/apache/incubator-mxnet/issues/11288 docker_run('ubuntu_base_gpu', 'nightly_test_installation ubuntu_python_gpu_virtualenv', true) - docker_run('ubuntu_base_gpu', 'nightly_test_installation ubuntu_python_gpu_pip', true) - //Docker Install Test is currently disabled and tracked here: https://github.com/apache/incubator-mxnet/issues/11288 + //docker_run('ubuntu_base_gpu', 'nightly_test_installation ubuntu_python_gpu_pip', true) //docker_run('ubuntu_base_gpu', 'nightly_test_installation ubuntu_python_gpu_docker', true) docker_run('ubuntu_base_gpu', 'nightly_test_installation ubuntu_python_gpu_source', true) }
[incubator-mxnet] branch master updated: [MXNET-607] Fix the broken reported by the new BLC (#11465)
This is an automated email from the ASF dual-hosted git repository. marcoabreu pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git The following commit(s) were added to refs/heads/master by this push: new a4054cd [MXNET-607] Fix the broken reported by the new BLC (#11465) a4054cd is described below commit a4054cd5b20ebc12409effa398b1a32329bb91bf Author: kpmurali <37911926+kpmur...@users.noreply.github.com> AuthorDate: Thu Jun 28 20:25:14 2018 -0700 [MXNET-607] Fix the broken reported by the new BLC (#11465) * Fixing the broken for the moved directories in ap/python and scala imageclassifier and SSDClassifier * Fixing the broken for the moved directories in ap/python and scala imageclassifier and SSDClassifier --- docs/tutorials/gluon/mnist.md| 12 ++-- .../infer/imageclassifier/ImageClassifierExample.scala | 6 +++--- .../infer/objectdetector/SSDClassifierExample.scala | 6 +++--- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/tutorials/gluon/mnist.md b/docs/tutorials/gluon/mnist.md index 3a2a2cb..5b8a98a 100644 --- a/docs/tutorials/gluon/mnist.md +++ b/docs/tutorials/gluon/mnist.md @@ -77,7 +77,7 @@ In an MLP, the outputs of most FC layers are fed into an activation function, wh The following code declares three fully connected layers with 128, 64 and 10 neurons each. The last fully connected layer often has its hidden size equal to the number of output classes in the dataset. Furthermore, these FC layers uses ReLU activation for performing an element-wise ReLU transformation on the FC layer output. -To do this, we will use [Sequential layer](http://mxnet.io/api/python/gluon.html#mxnet.gluon.nn.Sequential) type. This is simply a linear stack of neural network layers. `nn.Dense` layers are nothing but the fully connected layers we discussed above. +To do this, we will use [Sequential layer](http://mxnet.io/api/python/gluon/gluon.html#mxnet.gluon.nn.Sequential) type. This is simply a linear stack of neural network layers. `nn.Dense` layers are nothing but the fully connected layers we discussed above. ```python # define network @@ -90,13 +90,13 @@ with net.name_scope(): Initialize parameters and optimizer -The following source code initializes all parameters received from parameter dict using [Xavier](http://mxnet.io/api/python/optimization.html#mxnet.initializer.Xavier) initializer +The following source code initializes all parameters received from parameter dict using [Xavier](http://mxnet.io/api/python/optimization/optimization.html#mxnet.initializer.Xavier) initializer to train the MLP network we defined above. For our training, we will make use of the stochastic gradient descent (SGD) optimizer. In particular, we'll be using mini-batch SGD. Standard SGD processes train data one example at a time. In practice, this is very slow and one can speed up the process by processing examples in small batches. In this case, our batch size will be 100, which is a reasonable choice. Another parameter we select here is the learning rate, which controls the step size the optimizer takes in search of a soluti [...] -We will use [Trainer](http://mxnet.io/api/python/gluon.html#trainer) class to apply the -[SGD optimizer](http://mxnet.io/api/python/optimization.html#mxnet.optimizer.SGD) on the +We will use [Trainer](http://mxnet.io/api/python/gluon/gluon.html#trainer) class to apply the +[SGD optimizer](http://mxnet.io/api/python/optimization/optimization.html#mxnet.optimizer.SGD) on the initialized parameters. ```python @@ -112,7 +112,7 @@ Typically, one runs the training until convergence, which means that we have lea We will take following steps for training: -- Define [Accuracy evaluation metric](http://mxnet.io/api/python/metric.html#mxnet.metric.Accuracy) over training data. +- Define [Accuracy evaluation metric](http://mxnet.io/api/python/metric/metric.html#mxnet.metric.Accuracy) over training data. - Loop over inputs for every epoch. - Forward input through network to get output. - Compute loss with output and label inside record scope. @@ -121,7 +121,7 @@ We will take following steps for training: Loss function takes (output, label) pairs and computes a scalar loss for each sample in the mini-batch. The scalars measure how far each output is from the label. There are many predefined loss functions in gluon.loss. Here we use -[softmax_cross_entropy_loss](http://mxnet.io/api/python/gluon.html#mxnet.gluon.loss.softmax_cross_entropy_loss) for digit classification. We will compute loss and do backward propagation inside +[softmax_cross_entropy_loss](http://mxnet.io/api/python/gluon/gluon.html#mxnet.gluon.loss.softmax_cross_entropy_loss) for digit classification. We will compute loss and do backward propagation inside training scope which is defined by `autograd.record()`.
[GitHub] marcoabreu closed pull request #11465: [MXNET-607] Fix the broken reported by the new BLC
marcoabreu closed pull request #11465: [MXNET-607] Fix the broken reported by the new BLC URL: https://github.com/apache/incubator-mxnet/pull/11465 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/docs/tutorials/gluon/mnist.md b/docs/tutorials/gluon/mnist.md index 3a2a2cbe01b..5b8a98a3d66 100644 --- a/docs/tutorials/gluon/mnist.md +++ b/docs/tutorials/gluon/mnist.md @@ -77,7 +77,7 @@ In an MLP, the outputs of most FC layers are fed into an activation function, wh The following code declares three fully connected layers with 128, 64 and 10 neurons each. The last fully connected layer often has its hidden size equal to the number of output classes in the dataset. Furthermore, these FC layers uses ReLU activation for performing an element-wise ReLU transformation on the FC layer output. -To do this, we will use [Sequential layer](http://mxnet.io/api/python/gluon.html#mxnet.gluon.nn.Sequential) type. This is simply a linear stack of neural network layers. `nn.Dense` layers are nothing but the fully connected layers we discussed above. +To do this, we will use [Sequential layer](http://mxnet.io/api/python/gluon/gluon.html#mxnet.gluon.nn.Sequential) type. This is simply a linear stack of neural network layers. `nn.Dense` layers are nothing but the fully connected layers we discussed above. ```python # define network @@ -90,13 +90,13 @@ with net.name_scope(): Initialize parameters and optimizer -The following source code initializes all parameters received from parameter dict using [Xavier](http://mxnet.io/api/python/optimization.html#mxnet.initializer.Xavier) initializer +The following source code initializes all parameters received from parameter dict using [Xavier](http://mxnet.io/api/python/optimization/optimization.html#mxnet.initializer.Xavier) initializer to train the MLP network we defined above. For our training, we will make use of the stochastic gradient descent (SGD) optimizer. In particular, we'll be using mini-batch SGD. Standard SGD processes train data one example at a time. In practice, this is very slow and one can speed up the process by processing examples in small batches. In this case, our batch size will be 100, which is a reasonable choice. Another parameter we select here is the learning rate, which controls the step size the optimizer takes in search of a solution. We'll pick a learning rate of 0.02, again a reasonable choice. Settings such as batch size and learning rate are what are usually referred to as hyper-parameters. What values we give them can have a great impact on training performance. -We will use [Trainer](http://mxnet.io/api/python/gluon.html#trainer) class to apply the -[SGD optimizer](http://mxnet.io/api/python/optimization.html#mxnet.optimizer.SGD) on the +We will use [Trainer](http://mxnet.io/api/python/gluon/gluon.html#trainer) class to apply the +[SGD optimizer](http://mxnet.io/api/python/optimization/optimization.html#mxnet.optimizer.SGD) on the initialized parameters. ```python @@ -112,7 +112,7 @@ Typically, one runs the training until convergence, which means that we have lea We will take following steps for training: -- Define [Accuracy evaluation metric](http://mxnet.io/api/python/metric.html#mxnet.metric.Accuracy) over training data. +- Define [Accuracy evaluation metric](http://mxnet.io/api/python/metric/metric.html#mxnet.metric.Accuracy) over training data. - Loop over inputs for every epoch. - Forward input through network to get output. - Compute loss with output and label inside record scope. @@ -121,7 +121,7 @@ We will take following steps for training: Loss function takes (output, label) pairs and computes a scalar loss for each sample in the mini-batch. The scalars measure how far each output is from the label. There are many predefined loss functions in gluon.loss. Here we use -[softmax_cross_entropy_loss](http://mxnet.io/api/python/gluon.html#mxnet.gluon.loss.softmax_cross_entropy_loss) for digit classification. We will compute loss and do backward propagation inside +[softmax_cross_entropy_loss](http://mxnet.io/api/python/gluon/gluon.html#mxnet.gluon.loss.softmax_cross_entropy_loss) for digit classification. We will compute loss and do backward propagation inside training scope which is defined by `autograd.record()`. ```python diff --git a/scala-package/examples/src/main/scala/org/apache/mxnetexamples/infer/imageclassifier/ImageClassifierExample.scala b/scala-package/examples/src/main/scala/org/apache/mxnetexamples/infer/imageclassifier/ImageClassifierExample.scala index 8a57527f355..e886b908ba2 100644 --- a/scala-package/examples/src/main/scala/org/apache/mxnetexamples/infer/imageclassifier/ImageClassifierExample.scala
[incubator-mxnet] branch master updated: fix regex which processes RAT check output (#11476)
This is an automated email from the ASF dual-hosted git repository. marcoabreu pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git The following commit(s) were added to refs/heads/master by this push: new f02e627 fix regex which processes RAT check output (#11476) f02e627 is described below commit f02e627109d54f1cee32ef20a14462d0b4878f49 Author: mbaijal <30911248+mbai...@users.noreply.github.com> AuthorDate: Thu Jun 28 20:23:03 2018 -0700 fix regex which processes RAT check output (#11476) --- tests/nightly/apache_rat_license_check/license_check.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/nightly/apache_rat_license_check/license_check.sh b/tests/nightly/apache_rat_license_check/license_check.sh index 84accf9..79e86c0 100755 --- a/tests/nightly/apache_rat_license_check/license_check.sh +++ b/tests/nightly/apache_rat_license_check/license_check.sh @@ -43,7 +43,7 @@ SOURCE="^0 Unknown Licenses" echo "---Process The Output---" -if [[ "$OUTPUT" =~ "$SOURCE" ]]; then +if [[ "$OUTPUT" =~ $SOURCE ]]; then echo "SUCCESS: There are no files with an Unknown License."; else echo "ERROR: RAT Check detected files with unknown licenses. Please fix and run test again!";
[GitHub] marcoabreu closed pull request #11476: [Nightly Tests] Fix regex which processes Apache RAT check output
marcoabreu closed pull request #11476: [Nightly Tests] Fix regex which processes Apache RAT check output URL: https://github.com/apache/incubator-mxnet/pull/11476 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/tests/nightly/apache_rat_license_check/license_check.sh b/tests/nightly/apache_rat_license_check/license_check.sh index 84accf93f70..79e86c03e59 100755 --- a/tests/nightly/apache_rat_license_check/license_check.sh +++ b/tests/nightly/apache_rat_license_check/license_check.sh @@ -43,7 +43,7 @@ SOURCE="^0 Unknown Licenses" echo "---Process The Output---" -if [[ "$OUTPUT" =~ "$SOURCE" ]]; then +if [[ "$OUTPUT" =~ $SOURCE ]]; then echo "SUCCESS: There are no files with an Unknown License."; else echo "ERROR: RAT Check detected files with unknown licenses. Please fix and run test again!"; This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] freshcoderman closed issue #11479: gamma require 2d
freshcoderman closed issue #11479: gamma require 2d URL: https://github.com/apache/incubator-mxnet/issues/11479 This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] freshcoderman opened a new issue #11479: gamma require 2d
freshcoderman opened a new issue #11479: gamma require 2d URL: https://github.com/apache/incubator-mxnet/issues/11479 Note: Providing complete information in the most concise form is the best way to get help. This issue template serves as the checklist for essential information to most of the technical issues and bug reports. For non-technical issues and feature requests, feel free to present the information in what you believe is the best form. For Q & A and discussion, please start a discussion thread at https://discuss.mxnet.io ## Description (Brief description of the problem in no more than 2 sentences.) ## Environment info (Required) ``` What to do: 1. Download the diagnosis script from https://raw.githubusercontent.com/apache/incubator-mxnet/master/tools/diagnose.py 2. Run the script using `python diagnose.py` and paste its output here. ``` Package used (Python/R/Scala/Julia): (I'm using ...) For Scala user, please provide: 1. Java version: (`java -version`) 2. Maven version: (`mvn -version`) 3. Scala runtime if applicable: (`scala -version`) For R user, please provide R `sessionInfo()`: ## Build info (Required if built from source) Compiler (gcc/clang/mingw/visual studio): MXNet commit hash: (Paste the output of `git rev-parse HEAD` here.) Build config: (Paste the content of config.mk, or the build command.) ## Error Message: (Paste the complete error message, including stack trace.) ## Minimum reproducible example (If you are using your own code, please provide a short script that reproduces the error. Otherwise, please provide link to the existing example.) ## Steps to reproduce (Paste the commands you ran that produced the error.) 1. 2. ## What have you tried to solve it? 1. 2. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] rahul003 closed issue #11469: Performance regression in augmentation
rahul003 closed issue #11469: Performance regression in augmentation URL: https://github.com/apache/incubator-mxnet/issues/11469 This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[incubator-mxnet] branch master updated: fix record io augmentation speed (#11474)
This is an automated email from the ASF dual-hosted git repository. zhasheng pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git The following commit(s) were added to refs/heads/master by this push: new b344d89 fix record io augmentation speed (#11474) b344d89 is described below commit b344d89a80f87fce01a964347ff0b5f54e62f374 Author: Tong He AuthorDate: Thu Jun 28 19:24:00 2018 -0700 fix record io augmentation speed (#11474) --- example/image-classification/common/data.py | 2 +- src/io/image_aug_default.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/example/image-classification/common/data.py b/example/image-classification/common/data.py index bfaadb3..c1dfcf5 100755 --- a/example/image-classification/common/data.py +++ b/example/image-classification/common/data.py @@ -86,7 +86,7 @@ def add_data_aug_args(parser): def set_resnet_aug(aug): # standard data augmentation setting for resnet training -aug.set_defaults(random_crop=1, random_resized_crop=1) +aug.set_defaults(random_crop=0, random_resized_crop=1) aug.set_defaults(min_random_area=0.08) aug.set_defaults(max_random_aspect_ratio=4./3., min_random_aspect_ratio=3./4.) aug.set_defaults(brightness=0.4, contrast=0.4, saturation=0.4, pca_noise=0.1) diff --git a/src/io/image_aug_default.cc b/src/io/image_aug_default.cc index ce9c79c..5b28aa1 100644 --- a/src/io/image_aug_default.cc +++ b/src/io/image_aug_default.cc @@ -293,7 +293,7 @@ class DefaultImageAugmenter : public ImageAugmenter { if (param_.max_rotate_angle > 0 || param_.max_shear_ratio > 0.0f || param_.rotate > 0 || rotate_list_.size() > 0 || param_.max_random_scale != 1.0f || param_.min_random_scale != 1.0 -|| min_aspect_ratio != 1.0f || max_aspect_ratio != 1.0f +|| (!param_.random_resized_crop && (min_aspect_ratio != 1.0f || max_aspect_ratio != 1.0f)) || param_.max_img_size != 1e10f || param_.min_img_size != 0.0f) { std::uniform_real_distribution rand_uniform(0, 1); // shear
[GitHub] szha closed pull request #11474: Fix record io augmentation speed
szha closed pull request #11474: Fix record io augmentation speed URL: https://github.com/apache/incubator-mxnet/pull/11474 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/example/image-classification/common/data.py b/example/image-classification/common/data.py index bfaadb3ff6b..c1dfcf56546 100755 --- a/example/image-classification/common/data.py +++ b/example/image-classification/common/data.py @@ -86,7 +86,7 @@ def add_data_aug_args(parser): def set_resnet_aug(aug): # standard data augmentation setting for resnet training -aug.set_defaults(random_crop=1, random_resized_crop=1) +aug.set_defaults(random_crop=0, random_resized_crop=1) aug.set_defaults(min_random_area=0.08) aug.set_defaults(max_random_aspect_ratio=4./3., min_random_aspect_ratio=3./4.) aug.set_defaults(brightness=0.4, contrast=0.4, saturation=0.4, pca_noise=0.1) diff --git a/src/io/image_aug_default.cc b/src/io/image_aug_default.cc index ce9c79c403a..5b28aa18913 100644 --- a/src/io/image_aug_default.cc +++ b/src/io/image_aug_default.cc @@ -293,7 +293,7 @@ class DefaultImageAugmenter : public ImageAugmenter { if (param_.max_rotate_angle > 0 || param_.max_shear_ratio > 0.0f || param_.rotate > 0 || rotate_list_.size() > 0 || param_.max_random_scale != 1.0f || param_.min_random_scale != 1.0 -|| min_aspect_ratio != 1.0f || max_aspect_ratio != 1.0f +|| (!param_.random_resized_crop && (min_aspect_ratio != 1.0f || max_aspect_ratio != 1.0f)) || param_.max_img_size != 1e10f || param_.min_img_size != 0.0f) { std::uniform_real_distribution rand_uniform(0, 1); // shear This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] szha closed pull request #11379: support shared parameter in summary
szha closed pull request #11379: support shared parameter in summary URL: https://github.com/apache/incubator-mxnet/pull/11379 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/python/mxnet/gluon/block.py b/python/mxnet/gluon/block.py index 0ef28496c20..731d5414973 100644 --- a/python/mxnet/gluon/block.py +++ b/python/mxnet/gluon/block.py @@ -563,6 +563,7 @@ def summary(self, *inputs): :class:`mxnet.ndarray.NDArray` is supported. """ summary = OrderedDict() +seen = set() hooks = [] def _get_shape_str(args): @@ -611,9 +612,14 @@ def _summary_hook(block, _, outputs): params = 0 summary[m_key]['trainable'] = 0 +summary[m_key]['shared'] = 0 for p in block._reg_params.values(): params += p.data().size summary[m_key]['trainable'] += 0 if p.grad_req == 'null' else p.data().size +if p in seen: +summary[m_key]['shared'] += p.data().size +else: +seen.add(p) summary[m_key]['n_params'] = params from .nn.basic_layers import Sequential, HybridSequential @@ -624,6 +630,7 @@ def _summary_hook(block, _, outputs): summary['Input']['output_shape'] = _get_shape_str(inputs) summary['Input']['n_params'] = 0 summary['Input']['trainable'] = 0 +summary['Input']['shared'] = 0 try: self.apply(_register_summary_hook) @@ -635,16 +642,19 @@ def _summary_hook(block, _, outputs): print('='*80) total_params = 0 trainable_params = 0 +shared_params = 0 for layer in summary: print(line_format.format(layer, str(summary[layer]['output_shape']), summary[layer]['n_params'])) total_params += summary[layer]['n_params'] trainable_params += summary[layer]['trainable'] +shared_params += summary[layer]['shared'] print('='*80) print('Total params: ' + str(total_params)) print('Trainable params: ' + str(trainable_params)) print('Non-trainable params: ' + str(total_params - trainable_params)) +print('Shared params: ' + str(shared_params)) print('-'*80) finally: for h in hooks: diff --git a/tests/python/unittest/test_gluon.py b/tests/python/unittest/test_gluon.py index 7fff6b8c1f5..d275cd61029 100644 --- a/tests/python/unittest/test_gluon.py +++ b/tests/python/unittest/test_gluon.py @@ -1264,9 +1264,9 @@ def test_summary(): net2 = nn.Sequential() with net2.name_scope(): -net2.add(nn.Embedding(10, 20)) +net2.add(nn.Embedding(40, 30)) net2.add(gluon.rnn.LSTM(30)) -net2.add(nn.Dense(40, flatten=False)) +net2.add(nn.Dense(40, flatten=False, params=net2[0].params)) net2.initialize() net2.summary(mx.nd.ones((80, 32))) This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[incubator-mxnet] branch master updated: support shared parameter in summary (#11379)
This is an automated email from the ASF dual-hosted git repository. zhasheng pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git The following commit(s) were added to refs/heads/master by this push: new 36f2aae support shared parameter in summary (#11379) 36f2aae is described below commit 36f2aae9fb97159d226f24c9b1f829eccc318c14 Author: Sheng Zha AuthorDate: Thu Jun 28 19:15:39 2018 -0700 support shared parameter in summary (#11379) --- python/mxnet/gluon/block.py | 10 ++ tests/python/unittest/test_gluon.py | 4 ++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/python/mxnet/gluon/block.py b/python/mxnet/gluon/block.py index 0ef2849..731d541 100644 --- a/python/mxnet/gluon/block.py +++ b/python/mxnet/gluon/block.py @@ -563,6 +563,7 @@ class Block(object): :class:`mxnet.ndarray.NDArray` is supported. """ summary = OrderedDict() +seen = set() hooks = [] def _get_shape_str(args): @@ -611,9 +612,14 @@ class Block(object): params = 0 summary[m_key]['trainable'] = 0 +summary[m_key]['shared'] = 0 for p in block._reg_params.values(): params += p.data().size summary[m_key]['trainable'] += 0 if p.grad_req == 'null' else p.data().size +if p in seen: +summary[m_key]['shared'] += p.data().size +else: +seen.add(p) summary[m_key]['n_params'] = params from .nn.basic_layers import Sequential, HybridSequential @@ -624,6 +630,7 @@ class Block(object): summary['Input']['output_shape'] = _get_shape_str(inputs) summary['Input']['n_params'] = 0 summary['Input']['trainable'] = 0 +summary['Input']['shared'] = 0 try: self.apply(_register_summary_hook) @@ -635,16 +642,19 @@ class Block(object): print('='*80) total_params = 0 trainable_params = 0 +shared_params = 0 for layer in summary: print(line_format.format(layer, str(summary[layer]['output_shape']), summary[layer]['n_params'])) total_params += summary[layer]['n_params'] trainable_params += summary[layer]['trainable'] +shared_params += summary[layer]['shared'] print('='*80) print('Total params: ' + str(total_params)) print('Trainable params: ' + str(trainable_params)) print('Non-trainable params: ' + str(total_params - trainable_params)) +print('Shared params: ' + str(shared_params)) print('-'*80) finally: for h in hooks: diff --git a/tests/python/unittest/test_gluon.py b/tests/python/unittest/test_gluon.py index 7fff6b8..d275cd6 100644 --- a/tests/python/unittest/test_gluon.py +++ b/tests/python/unittest/test_gluon.py @@ -1264,9 +1264,9 @@ def test_summary(): net2 = nn.Sequential() with net2.name_scope(): -net2.add(nn.Embedding(10, 20)) +net2.add(nn.Embedding(40, 30)) net2.add(gluon.rnn.LSTM(30)) -net2.add(nn.Dense(40, flatten=False)) +net2.add(nn.Dense(40, flatten=False, params=net2[0].params)) net2.initialize() net2.summary(mx.nd.ones((80, 32)))
[GitHub] szha commented on issue #11003: rnn_cell little bug fixed
szha commented on issue #11003: rnn_cell little bug fixed URL: https://github.com/apache/incubator-mxnet/pull/11003#issuecomment-401226598 suppose you have a git remote called "upstream" (which you can get by doing `git remote add upstream https://github.com/apache/incubator-mxnet`), you can do `git pull upstream master --rebase` and then do a force push. You don't need to reopen the PR This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] pengzhao-intel commented on issue #11448: fail to fall back when sparse arrays are passed to MKLDNN-enabled operators.
pengzhao-intel commented on issue #11448: fail to fall back when sparse arrays are passed to MKLDNN-enabled operators. URL: https://github.com/apache/incubator-mxnet/issues/11448#issuecomment-401225029 what else need to cover? @zouluobao can help This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] anirudh2290 commented on issue #11478: 1.2.1 release notes
anirudh2290 commented on issue #11478: 1.2.1 release notes URL: https://github.com/apache/incubator-mxnet/pull/11478#issuecomment-401224690 @aaronmarkham can you please take a look. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] haojin2 commented on a change in pull request #11229: [MXNET-379] L1 Normalization
haojin2 commented on a change in pull request #11229: [MXNET-379] L1 Normalization URL: https://github.com/apache/incubator-mxnet/pull/11229#discussion_r199034382 ## File path: tests/python/unittest/test_operator.py ## @@ -3009,6 +3009,49 @@ def npy_layer_norm(data, gamma, beta, axis=1, eps=1E-5): grad_nodes={'data': req, 'gamma': req, 'beta': req}, numeric_eps=1e-2, rtol=1e-2, atol=1e-2) +@with_seed() +def test_norm(): +def l1norm(input_data, axis=0, keepdims=True): +return np.sum(abs(input_data), axis=axis, keepdims=keepdims) +def l2norm(input_data, axis=0, keepdims=True): +return np.linalg.norm(input_data, axis=axis, keepdims=keepdims) + +ctx = default_context() +data = mx.symbol.Variable('data') +in_data_dim = random_sample([4,5,6], 1)[0] +in_shape = rand_shape_nd(in_data_dim) +for order in [1, 2]: +for dtype in [np.float16, np.float32, np.float64]: +in_data = np.random.uniform(-1, 1, in_shape).astype(dtype) +in_data[abs(in_data) < 1e-2] = 1e-2 +for i in range(in_data_dim): +norm_sym = mx.symbol.norm(data=data, ord=order, axis=i, keepdims=True) +npy_out = l1norm(in_data, i) if order==1 else l2norm(in_data, i) Review comment: Okay, so maybe you can go ahead and change all ```Python 1e-x if dtype is np.float16 else 1e-y ``` in this function to use `==`? This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[incubator-mxnet-site] branch asf-site updated: Bump the publish timestamp.
This is an automated email from the ASF dual-hosted git repository. zhasheng pushed a commit to branch asf-site in repository https://gitbox.apache.org/repos/asf/incubator-mxnet-site.git The following commit(s) were added to refs/heads/asf-site by this push: new 35b9788 Bump the publish timestamp. 35b9788 is described below commit 35b978860e88bc09017fdf583de8e2b7a4a35dec Author: mxnet-ci AuthorDate: Fri Jun 29 01:37:52 2018 + Bump the publish timestamp. --- date.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/date.txt b/date.txt new file mode 100644 index 000..b2fb3cb --- /dev/null +++ b/date.txt @@ -0,0 +1 @@ +Fri Jun 29 01:37:52 UTC 2018
[GitHub] aaronmarkham commented on issue #11404: [MXNET-595] Install page not loading properly in Internet Explorer
aaronmarkham commented on issue #11404: [MXNET-595] Install page not loading properly in Internet Explorer URL: https://github.com/apache/incubator-mxnet/pull/11404#issuecomment-401219131 @indhub this should be the fix for the issue you reported... can you merge? This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] anirudhacharya commented on a change in pull request #11229: [MXNET-379] L1 Normalization
anirudhacharya commented on a change in pull request #11229: [MXNET-379] L1 Normalization URL: https://github.com/apache/incubator-mxnet/pull/11229#discussion_r199029890 ## File path: src/operator/tensor/broadcast_reduce_op.h ## @@ -880,27 +880,24 @@ inline bool L2NormStorageType(const nnvm::NodeAttrs& attrs, int& out_stype = out_attrs->at(0); const NormParam& param = nnvm::get(attrs.parsed); bool dispatched = false; - // l2 norm on a particular axis only supports cpu - const bool invalid_ctx = dev_mask != mshadow::cpu::kDevMask; - const auto dispatch_ex = + if (param.ord == 2) { +// l2 norm on a particular axis only supports cpu +const bool invalid_ctx = dev_mask != mshadow::cpu::kDevMask; +const auto dispatch_ex = invalid_ctx ? DispatchMode::kFComputeFallback : DispatchMode::kFComputeEx; - if (!dispatched && in_stype == kDefaultStorage) { -// dns -> dns -dispatched = storage_type_assign(_stype, kDefaultStorage, dispatch_mode, - DispatchMode::kFCompute); - } - const TShape axis = param.axis.has_value() ? param.axis.value() : TShape(); - if (!dispatched && (in_stype == kRowSparseStorage || in_stype == kCSRStorage) && - axis.ndim() == 0 && param.ord == 2) { -// l2 norm: rsp/csr, axis = () -> dns -dispatched = storage_type_assign(_stype, kDefaultStorage, dispatch_mode, - DispatchMode::kFComputeEx); - } - if (!dispatched && in_stype == kCSRStorage && axis.ndim() == 1 && !param.keepdims && - (axis[0] == 0 || axis[0] == 1) && param.ord == 2) { -// l2 norm: csr, axis = 0/1 -> dns -dispatched = storage_type_assign(_stype, kDefaultStorage, dispatch_mode, - dispatch_ex); +const TShape axis = param.axis.has_value() ? param.axis.value() : TShape(); +if (!dispatched && (in_stype == kRowSparseStorage || in_stype == kCSRStorage) && +axis.ndim() == 0 && param.ord == 2) { + // l2 norm: rsp/csr, axis = () -> dns + dispatched = storage_type_assign(_stype, kDefaultStorage, dispatch_mode, + DispatchMode::kFComputeEx); +} +if (!dispatched && in_stype == kCSRStorage && axis.ndim() == 1 && !param.keepdims && +(axis[0] == 0 || axis[0] == 1) && param.ord == 2) { + // l2 norm: csr, axis = 0/1 -> dns + dispatched = storage_type_assign(_stype, kDefaultStorage, dispatch_mode, + dispatch_ex); +} } if (!dispatched) { dispatched = dispatch_fallback(out_attrs, dispatch_mode); Review comment: i will fix this This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] leezu commented on issue #11403: L2 norm for fp16 goes to nan, needs to be fixed
leezu commented on issue #11403: L2 norm for fp16 goes to nan, needs to be fixed URL: https://github.com/apache/incubator-mxnet/issues/11403#issuecomment-401217193 How about using a numerically stable L2 norm algorithm by scaling the elements? Ie. [BLAS nrm2](http://www.netlib.org/blas/snrm2.f) This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] chinakook commented on issue #11003: rnn_cell little bug fixed
chinakook commented on issue #11003: rnn_cell little bug fixed URL: https://github.com/apache/incubator-mxnet/pull/11003#issuecomment-401216200 Rebasing like this? I've not very familiar with rebasing. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] haojin2 commented on a change in pull request #11229: [MXNET-379] L1 Normalization
haojin2 commented on a change in pull request #11229: [MXNET-379] L1 Normalization URL: https://github.com/apache/incubator-mxnet/pull/11229#discussion_r199028398 ## File path: tests/python/unittest/test_operator.py ## @@ -3009,6 +3009,49 @@ def npy_layer_norm(data, gamma, beta, axis=1, eps=1E-5): grad_nodes={'data': req, 'gamma': req, 'beta': req}, numeric_eps=1e-2, rtol=1e-2, atol=1e-2) +@with_seed() +def test_norm(): +def l1norm(input_data, axis=0, keepdims=True): +return np.sum(abs(input_data), axis=axis, keepdims=keepdims) +def l2norm(input_data, axis=0, keepdims=True): +return np.linalg.norm(input_data, axis=axis, keepdims=keepdims) + +ctx = default_context() +data = mx.symbol.Variable('data') +in_data_dim = random_sample([4,5,6], 1)[0] +in_shape = rand_shape_nd(in_data_dim) +for order in [1, 2]: +for dtype in [np.float16, np.float32, np.float64]: +in_data = np.random.uniform(-1, 1, in_shape).astype(dtype) +in_data[abs(in_data) < 1e-2] = 1e-2 +for i in range(in_data_dim): +norm_sym = mx.symbol.norm(data=data, ord=order, axis=i, keepdims=True) +npy_out = l1norm(in_data, i) if order==1 else l2norm(in_data, i) Review comment: ```python >>> a = 1 >>> a is 1 True ``` This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] anirudhacharya commented on a change in pull request #11229: [MXNET-379] L1 Normalization
anirudhacharya commented on a change in pull request #11229: [MXNET-379] L1 Normalization URL: https://github.com/apache/incubator-mxnet/pull/11229#discussion_r199028391 ## File path: src/operator/tensor/broadcast_reduce_op_value.cc ## @@ -284,10 +284,16 @@ array. Examples:: - x = [[1, 2], - [3, 4]] + x = [[[1,2], +[3,4]], + [[2,2], +[5,6]]] Review comment: sure i can do that This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] anirudhacharya commented on a change in pull request #11229: [MXNET-379] L1 Normalization
anirudhacharya commented on a change in pull request #11229: [MXNET-379] L1 Normalization URL: https://github.com/apache/incubator-mxnet/pull/11229#discussion_r199028119 ## File path: tests/python/unittest/test_operator.py ## @@ -3009,6 +3009,49 @@ def npy_layer_norm(data, gamma, beta, axis=1, eps=1E-5): grad_nodes={'data': req, 'gamma': req, 'beta': req}, numeric_eps=1e-2, rtol=1e-2, atol=1e-2) +@with_seed() +def test_norm(): +def l1norm(input_data, axis=0, keepdims=True): +return np.sum(abs(input_data), axis=axis, keepdims=keepdims) +def l2norm(input_data, axis=0, keepdims=True): +return np.linalg.norm(input_data, axis=axis, keepdims=keepdims) + +ctx = default_context() +data = mx.symbol.Variable('data') +in_data_dim = random_sample([4,5,6], 1)[0] +in_shape = rand_shape_nd(in_data_dim) +for order in [1, 2]: +for dtype in [np.float16, np.float32, np.float64]: +in_data = np.random.uniform(-1, 1, in_shape).astype(dtype) +in_data[abs(in_data) < 1e-2] = 1e-2 Review comment: sure, i will use 1e-3. that is the epsilon i am using in check_gradient This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] haojin2 commented on a change in pull request #11229: [MXNET-379] L1 Normalization
haojin2 commented on a change in pull request #11229: [MXNET-379] L1 Normalization URL: https://github.com/apache/incubator-mxnet/pull/11229#discussion_r199028037 ## File path: src/operator/tensor/broadcast_reduce_op_value.cc ## @@ -284,10 +284,16 @@ array. Examples:: - x = [[1, 2], - [3, 4]] + x = [[[1,2], +[3,4]], + [[2,2], +[5,6]]] Review comment: Then would you please fix those too? This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] anirudhacharya commented on a change in pull request #11229: [MXNET-379] L1 Normalization
anirudhacharya commented on a change in pull request #11229: [MXNET-379] L1 Normalization URL: https://github.com/apache/incubator-mxnet/pull/11229#discussion_r199027961 ## File path: tests/python/unittest/test_operator.py ## @@ -3009,6 +3009,49 @@ def npy_layer_norm(data, gamma, beta, axis=1, eps=1E-5): grad_nodes={'data': req, 'gamma': req, 'beta': req}, numeric_eps=1e-2, rtol=1e-2, atol=1e-2) +@with_seed() +def test_norm(): +def l1norm(input_data, axis=0, keepdims=True): +return np.sum(abs(input_data), axis=axis, keepdims=keepdims) +def l2norm(input_data, axis=0, keepdims=True): +return np.linalg.norm(input_data, axis=axis, keepdims=keepdims) + +ctx = default_context() +data = mx.symbol.Variable('data') +in_data_dim = random_sample([4,5,6], 1)[0] +in_shape = rand_shape_nd(in_data_dim) +for order in [1, 2]: +for dtype in [np.float16, np.float32, np.float64]: +in_data = np.random.uniform(-1, 1, in_shape).astype(dtype) +in_data[abs(in_data) < 1e-2] = 1e-2 +for i in range(in_data_dim): +norm_sym = mx.symbol.norm(data=data, ord=order, axis=i, keepdims=True) +npy_out = l1norm(in_data, i) if order==1 else l2norm(in_data, i) Review comment: python's ``is`` compares object reference whereas == compares for value. Here we want to compare the values and not object references. Using ``is`` might make the test fail. For example - ```python >>> 625 is 25**2 False >>> 625 == 25**2 True ``` This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] haojin2 commented on a change in pull request #11229: [MXNET-379] L1 Normalization
haojin2 commented on a change in pull request #11229: [MXNET-379] L1 Normalization URL: https://github.com/apache/incubator-mxnet/pull/11229#discussion_r199027684 ## File path: src/operator/tensor/broadcast_reduce_op_value.cc ## @@ -284,10 +284,16 @@ array. Examples:: - x = [[1, 2], - [3, 4]] + x = [[[1,2], +[3,4]], + [[2,2], +[5,6]]] - norm(x) = [5.47722578] + norm(x, ord=2, axis=1) = [[3.1622777 4.472136 ] +[5.3851647 6.3245554]] + + norm(x, ord=1, axis=1) = [[4., 6.], +[7., 8.]] rsp = x.cast_storage('row_sparse') Review comment: the lines below have examples for sparse, and you should update them with the latest changes to the behavior of this op, you should also add that only "ord=2" supports sparse ndarrays after your changes. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] haojin2 commented on a change in pull request #11229: [MXNET-379] L1 Normalization
haojin2 commented on a change in pull request #11229: [MXNET-379] L1 Normalization URL: https://github.com/apache/incubator-mxnet/pull/11229#discussion_r199027435 ## File path: src/operator/tensor/broadcast_reduce_op.h ## @@ -1021,8 +1023,38 @@ void L2NormGradCompute(const nnvm::NodeAttrs& attrs, } else { small = ReduceAxesShapeImpl(outputs[0].shape_, param.axis, true, false); } - ReduceAxesBackwardUseInOutImpl(ctx, small, inputs, + if (param.ord == 1) { +using namespace mshadow; +using namespace mshadow::expr; +TShape src_shape, dst_shape; +BroadcastReduceShapeCompact(outputs[0].shape_, small, _shape, _shape); +Stream *s = ctx.get_stream(); +MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, { + if (dst_shape.ndim() == 2) { +Tensor ograd = + inputs[0].get_with_shape(dst_shape.get<2>(), s); +Tensor igrad = + outputs[0].get_with_shape(src_shape.get<2>(), s); +Tensor data = + inputs[1].get_with_shape(src_shape.get<2>(), s); +ASSIGN_DISPATCH(igrad, req[0], + broadcast_to(ograd, src_shape)*F(data)); + } else { +const int ndim = MXNET_SPECIAL_MAX_NDIM; +Tensor igrad = + outputs[0].get_with_shape(src_shape.get(), s); +Tensor ograd = + inputs[0].get_with_shape(dst_shape.get(), s); +Tensor data = + inputs[1].get_with_shape(src_shape.get(), s); +ASSIGN_DISPATCH(igrad, req[0], + broadcast_to(ograd, src_shape)*F(data)); + } +}); + } else if (param.ord == 2) { +ReduceAxesBackwardUseInOutImpl(ctx, small, inputs, req, outputs); Review comment: No, you should have `req` aligned with `ctx` above. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] haojin2 commented on a change in pull request #11229: [MXNET-379] L1 Normalization
haojin2 commented on a change in pull request #11229: [MXNET-379] L1 Normalization URL: https://github.com/apache/incubator-mxnet/pull/11229#discussion_r199027220 ## File path: src/operator/tensor/broadcast_reduce_op.h ## @@ -880,27 +880,24 @@ inline bool L2NormStorageType(const nnvm::NodeAttrs& attrs, int& out_stype = out_attrs->at(0); const NormParam& param = nnvm::get(attrs.parsed); bool dispatched = false; - // l2 norm on a particular axis only supports cpu - const bool invalid_ctx = dev_mask != mshadow::cpu::kDevMask; - const auto dispatch_ex = + if (param.ord == 2) { +// l2 norm on a particular axis only supports cpu +const bool invalid_ctx = dev_mask != mshadow::cpu::kDevMask; +const auto dispatch_ex = invalid_ctx ? DispatchMode::kFComputeFallback : DispatchMode::kFComputeEx; - if (!dispatched && in_stype == kDefaultStorage) { -// dns -> dns -dispatched = storage_type_assign(_stype, kDefaultStorage, dispatch_mode, - DispatchMode::kFCompute); - } - const TShape axis = param.axis.has_value() ? param.axis.value() : TShape(); - if (!dispatched && (in_stype == kRowSparseStorage || in_stype == kCSRStorage) && - axis.ndim() == 0 && param.ord == 2) { -// l2 norm: rsp/csr, axis = () -> dns -dispatched = storage_type_assign(_stype, kDefaultStorage, dispatch_mode, - DispatchMode::kFComputeEx); - } - if (!dispatched && in_stype == kCSRStorage && axis.ndim() == 1 && !param.keepdims && - (axis[0] == 0 || axis[0] == 1) && param.ord == 2) { -// l2 norm: csr, axis = 0/1 -> dns -dispatched = storage_type_assign(_stype, kDefaultStorage, dispatch_mode, - dispatch_ex); +const TShape axis = param.axis.has_value() ? param.axis.value() : TShape(); +if (!dispatched && (in_stype == kRowSparseStorage || in_stype == kCSRStorage) && +axis.ndim() == 0 && param.ord == 2) { + // l2 norm: rsp/csr, axis = () -> dns + dispatched = storage_type_assign(_stype, kDefaultStorage, dispatch_mode, + DispatchMode::kFComputeEx); +} +if (!dispatched && in_stype == kCSRStorage && axis.ndim() == 1 && !param.keepdims && +(axis[0] == 0 || axis[0] == 1) && param.ord == 2) { + // l2 norm: csr, axis = 0/1 -> dns + dispatched = storage_type_assign(_stype, kDefaultStorage, dispatch_mode, + dispatch_ex); +} } if (!dispatched) { dispatched = dispatch_fallback(out_attrs, dispatch_mode); Review comment: @anirudhacharya I think you missed the default case when copying the previous code over. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] anirudhacharya commented on a change in pull request #11229: [MXNET-379] L1 Normalization
anirudhacharya commented on a change in pull request #11229: [MXNET-379] L1 Normalization URL: https://github.com/apache/incubator-mxnet/pull/11229#discussion_r199026763 ## File path: src/operator/tensor/broadcast_reduce_op_value.cc ## @@ -284,10 +284,16 @@ array. Examples:: - x = [[1, 2], - [3, 4]] + x = [[[1,2], +[3,4]], + [[2,2], +[5,6]]] - norm(x) = [5.47722578] + norm(x, ord=2, axis=1) = [[3.1622777 4.472136 ] +[5.3851647 6.3245554]] + + norm(x, ord=1, axis=1) = [[4., 6.], +[7., 8.]] rsp = x.cast_storage('row_sparse') Review comment: I do not understand what you mean by "specification for sparse-related cases" could you please elaborate? This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] anirudhacharya commented on a change in pull request #11229: [MXNET-379] L1 Normalization
anirudhacharya commented on a change in pull request #11229: [MXNET-379] L1 Normalization URL: https://github.com/apache/incubator-mxnet/pull/11229#discussion_r199026810 ## File path: src/operator/tensor/broadcast_reduce_op_value.cc ## @@ -322,7 +328,7 @@ NNVM_REGISTER_OP(_backward_norm) [](const NodeAttrs& attrs) { return std::vector{ResourceRequest::kTempSpace}; }) -.set_attr("FCompute", L2NormGradCompute); +.set_attr("FCompute", LpNormGradCompute); Review comment: this is done This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] haojin2 commented on a change in pull request #11229: [MXNET-379] L1 Normalization
haojin2 commented on a change in pull request #11229: [MXNET-379] L1 Normalization URL: https://github.com/apache/incubator-mxnet/pull/11229#discussion_r199026778 ## File path: tests/python/unittest/test_operator.py ## @@ -3009,6 +3009,49 @@ def npy_layer_norm(data, gamma, beta, axis=1, eps=1E-5): grad_nodes={'data': req, 'gamma': req, 'beta': req}, numeric_eps=1e-2, rtol=1e-2, atol=1e-2) +@with_seed() +def test_norm(): +def l1norm(input_data, axis=0, keepdims=True): +return np.sum(abs(input_data), axis=axis, keepdims=keepdims) +def l2norm(input_data, axis=0, keepdims=True): +return np.linalg.norm(input_data, axis=axis, keepdims=keepdims) + +ctx = default_context() +data = mx.symbol.Variable('data') +in_data_dim = random_sample([4,5,6], 1)[0] +in_shape = rand_shape_nd(in_data_dim) +for order in [1, 2]: +for dtype in [np.float16, np.float32, np.float64]: +in_data = np.random.uniform(-1, 1, in_shape).astype(dtype) +in_data[abs(in_data) < 1e-2] = 1e-2 +for i in range(in_data_dim): +norm_sym = mx.symbol.norm(data=data, ord=order, axis=i, keepdims=True) +npy_out = l1norm(in_data, i) if order==1 else l2norm(in_data, i) Review comment: maybe use `is` instead of `==` This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] eric-haibin-lin commented on a change in pull request #11229: [MXNET-379] L1 Normalization
eric-haibin-lin commented on a change in pull request #11229: [MXNET-379] L1 Normalization URL: https://github.com/apache/incubator-mxnet/pull/11229#discussion_r199026685 ## File path: src/operator/tensor/broadcast_reduce_op.h ## @@ -880,27 +880,24 @@ inline bool L2NormStorageType(const nnvm::NodeAttrs& attrs, int& out_stype = out_attrs->at(0); const NormParam& param = nnvm::get(attrs.parsed); bool dispatched = false; - // l2 norm on a particular axis only supports cpu - const bool invalid_ctx = dev_mask != mshadow::cpu::kDevMask; - const auto dispatch_ex = + if (param.ord == 2) { +// l2 norm on a particular axis only supports cpu +const bool invalid_ctx = dev_mask != mshadow::cpu::kDevMask; +const auto dispatch_ex = invalid_ctx ? DispatchMode::kFComputeFallback : DispatchMode::kFComputeEx; - if (!dispatched && in_stype == kDefaultStorage) { -// dns -> dns -dispatched = storage_type_assign(_stype, kDefaultStorage, dispatch_mode, - DispatchMode::kFCompute); - } - const TShape axis = param.axis.has_value() ? param.axis.value() : TShape(); - if (!dispatched && (in_stype == kRowSparseStorage || in_stype == kCSRStorage) && - axis.ndim() == 0 && param.ord == 2) { -// l2 norm: rsp/csr, axis = () -> dns -dispatched = storage_type_assign(_stype, kDefaultStorage, dispatch_mode, - DispatchMode::kFComputeEx); - } - if (!dispatched && in_stype == kCSRStorage && axis.ndim() == 1 && !param.keepdims && - (axis[0] == 0 || axis[0] == 1) && param.ord == 2) { -// l2 norm: csr, axis = 0/1 -> dns -dispatched = storage_type_assign(_stype, kDefaultStorage, dispatch_mode, - dispatch_ex); +const TShape axis = param.axis.has_value() ? param.axis.value() : TShape(); +if (!dispatched && (in_stype == kRowSparseStorage || in_stype == kCSRStorage) && +axis.ndim() == 0 && param.ord == 2) { + // l2 norm: rsp/csr, axis = () -> dns + dispatched = storage_type_assign(_stype, kDefaultStorage, dispatch_mode, + DispatchMode::kFComputeEx); +} +if (!dispatched && in_stype == kCSRStorage && axis.ndim() == 1 && !param.keepdims && +(axis[0] == 0 || axis[0] == 1) && param.ord == 2) { + // l2 norm: csr, axis = 0/1 -> dns + dispatched = storage_type_assign(_stype, kDefaultStorage, dispatch_mode, + dispatch_ex); +} } if (!dispatched) { dispatched = dispatch_fallback(out_attrs, dispatch_mode); Review comment: Do not dispatch to fallback if all your inputs are dense. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] andrewfayres commented on issue #11477: Copy profiler examples to new location for CI
andrewfayres commented on issue #11477: Copy profiler examples to new location for CI URL: https://github.com/apache/incubator-mxnet/pull/11477#issuecomment-401214009 Odd, I know I updated these earlier. I'll take a look when I get home. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] haojin2 commented on a change in pull request #11229: [MXNET-379] L1 Normalization
haojin2 commented on a change in pull request #11229: [MXNET-379] L1 Normalization URL: https://github.com/apache/incubator-mxnet/pull/11229#discussion_r199026636 ## File path: tests/python/unittest/test_operator.py ## @@ -3009,6 +3009,49 @@ def npy_layer_norm(data, gamma, beta, axis=1, eps=1E-5): grad_nodes={'data': req, 'gamma': req, 'beta': req}, numeric_eps=1e-2, rtol=1e-2, atol=1e-2) +@with_seed() +def test_norm(): +def l1norm(input_data, axis=0, keepdims=True): +return np.sum(abs(input_data), axis=axis, keepdims=keepdims) +def l2norm(input_data, axis=0, keepdims=True): +return np.linalg.norm(input_data, axis=axis, keepdims=keepdims) + +ctx = default_context() +data = mx.symbol.Variable('data') +in_data_dim = random_sample([4,5,6], 1)[0] +in_shape = rand_shape_nd(in_data_dim) +for order in [1, 2]: +for dtype in [np.float16, np.float32, np.float64]: +in_data = np.random.uniform(-1, 1, in_shape).astype(dtype) +in_data[abs(in_data) < 1e-2] = 1e-2 Review comment: Please only do this for abs(in_data) < eps, where eps is the eps you're using for check_numeric_gradient This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] threeleafzerg commented on issue #10696: [MXNET-366]Extend MXNet Distributed Training by AllReduce
threeleafzerg commented on issue #10696: [MXNET-366]Extend MXNet Distributed Training by AllReduce URL: https://github.com/apache/incubator-mxnet/pull/10696#issuecomment-401213560 Hi yang, I checked your error txt. Your build failure should be unrelated with my PR. e.g. build/src/operator/custom/native_op.o: In function `mxnet::op::NativeOpProp::CreateOperator(mxnet::Context) const [clone .localalias.447]': native_op.cc:(.text+0x410): undefined reference to `mxnet::Operator* mxnet::op::CreateOp(mxnet::op::NativeOpParam)' Can you have a clean git clone and rebuild it again? It should be caused by environment issue. This is the jenkens result of Commit 1444a84: http://jenkins.mxnet-ci.amazon-ml.com/blue/organizations/jenkins/incubator-mxnet/detail/PR-10696/28/pipeline You can see that all build test pass and it failed at test cases unrelated with kvstore. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] haojin2 commented on a change in pull request #11229: [MXNET-379] L1 Normalization
haojin2 commented on a change in pull request #11229: [MXNET-379] L1 Normalization URL: https://github.com/apache/incubator-mxnet/pull/11229#discussion_r199025996 ## File path: src/operator/tensor/broadcast_reduce_op_value.cc ## @@ -284,10 +284,16 @@ array. Examples:: - x = [[1, 2], - [3, 4]] + x = [[[1,2], +[3,4]], + [[2,2], +[5,6]]] Review comment: Please insert spaces just like the format before. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] haojin2 commented on a change in pull request #11229: [MXNET-379] L1 Normalization
haojin2 commented on a change in pull request #11229: [MXNET-379] L1 Normalization URL: https://github.com/apache/incubator-mxnet/pull/11229#discussion_r199025950 ## File path: src/operator/tensor/broadcast_reduce_op_value.cc ## @@ -284,10 +284,16 @@ array. Examples:: - x = [[1, 2], - [3, 4]] + x = [[[1,2], +[3,4]], + [[2,2], +[5,6]]] - norm(x) = [5.47722578] + norm(x, ord=2, axis=1) = [[3.1622777 4.472136 ] +[5.3851647 6.3245554]] + + norm(x, ord=1, axis=1) = [[4., 6.], +[7., 8.]] rsp = x.cast_storage('row_sparse') Review comment: Please also add specification for sparse-related cases below. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] haojin2 commented on a change in pull request #11229: [MXNET-379] L1 Normalization
haojin2 commented on a change in pull request #11229: [MXNET-379] L1 Normalization URL: https://github.com/apache/incubator-mxnet/pull/11229#discussion_r199025732 ## File path: src/operator/tensor/broadcast_reduce_op.h ## @@ -1021,8 +1023,38 @@ void L2NormGradCompute(const nnvm::NodeAttrs& attrs, } else { small = ReduceAxesShapeImpl(outputs[0].shape_, param.axis, true, false); } - ReduceAxesBackwardUseInOutImpl(ctx, small, inputs, + if (param.ord == 1) { +using namespace mshadow; +using namespace mshadow::expr; +TShape src_shape, dst_shape; +BroadcastReduceShapeCompact(outputs[0].shape_, small, _shape, _shape); +Stream *s = ctx.get_stream(); +MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, { + if (dst_shape.ndim() == 2) { +Tensor ograd = + inputs[0].get_with_shape(dst_shape.get<2>(), s); +Tensor igrad = + outputs[0].get_with_shape(src_shape.get<2>(), s); +Tensor data = + inputs[1].get_with_shape(src_shape.get<2>(), s); +ASSIGN_DISPATCH(igrad, req[0], + broadcast_to(ograd, src_shape)*F(data)); + } else { +const int ndim = MXNET_SPECIAL_MAX_NDIM; +Tensor igrad = + outputs[0].get_with_shape(src_shape.get(), s); +Tensor ograd = + inputs[0].get_with_shape(dst_shape.get(), s); +Tensor data = + inputs[1].get_with_shape(src_shape.get(), s); +ASSIGN_DISPATCH(igrad, req[0], + broadcast_to(ograd, src_shape)*F(data)); + } +}); + } else if (param.ord == 2) { +ReduceAxesBackwardUseInOutImpl(ctx, small, inputs, req, outputs); Review comment: Fix the alignment here. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] haojin2 commented on a change in pull request #11229: [MXNET-379] L1 Normalization
haojin2 commented on a change in pull request #11229: [MXNET-379] L1 Normalization URL: https://github.com/apache/incubator-mxnet/pull/11229#discussion_r199025529 ## File path: src/operator/tensor/broadcast_reduce_op.h ## @@ -1021,8 +1023,38 @@ void L2NormGradCompute(const nnvm::NodeAttrs& attrs, } else { small = ReduceAxesShapeImpl(outputs[0].shape_, param.axis, true, false); } - ReduceAxesBackwardUseInOutImpl(ctx, small, inputs, + if (param.ord == 1) { +using namespace mshadow; +using namespace mshadow::expr; Review comment: These to `using namespace` line are redundant. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] haojin2 commented on a change in pull request #11229: [MXNET-379] L1 Normalization
haojin2 commented on a change in pull request #11229: [MXNET-379] L1 Normalization URL: https://github.com/apache/incubator-mxnet/pull/11229#discussion_r199025529 ## File path: src/operator/tensor/broadcast_reduce_op.h ## @@ -1021,8 +1023,38 @@ void L2NormGradCompute(const nnvm::NodeAttrs& attrs, } else { small = ReduceAxesShapeImpl(outputs[0].shape_, param.axis, true, false); } - ReduceAxesBackwardUseInOutImpl(ctx, small, inputs, + if (param.ord == 1) { +using namespace mshadow; +using namespace mshadow::expr; Review comment: These two `using namespace` line are redundant. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] anirudhacharya edited a comment on issue #11229: [MXNET-379] L1 Normalization
anirudhacharya edited a comment on issue #11229: [MXNET-379] L1 Normalization URL: https://github.com/apache/incubator-mxnet/pull/11229#issuecomment-400887360 @haojin2 @eric-haibin-lin @piiswrong ping for review. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] anirudhacharya commented on issue #11475: Amending ONNX importer/exporter #11213
anirudhacharya commented on issue #11475: Amending ONNX importer/exporter #11213 URL: https://github.com/apache/incubator-mxnet/issues/11475#issuecomment-401211603 Hi @zhreshold thanks for the feedback - I will make a PR to change ``python-pytest`` to ``onnx``. But the other changes could be hard to accommodate - - onnx2mx/mx2onnx, from the perspective of inside mxnet python package, it is more reasonable to use name from_onnx/to_onnx - This change was made in the recent [PR](https://github.com/apache/incubator-mxnet/pull/11213#discussion_r195585421) and I think it makes things more clear and unambiguous because ``onnx2mx`` says a lot more about what the module does compared to ``from_onnx``. But I am still open to discussion here. - Please be considerate about modules and their purposes before spreading to many files. For example, https://github.com/apache/incubator-mxnet/blob/master/python/mxnet/contrib/onnx/mx2onnx/_export_helper.py don't necessarily need to be a separate module, a private function in export_onnx.py is more appropriate. - import_model/import_onnx/import_to_gluon.py are basically doing similar things, just put them in import_onnx.py. Same story for export_model/export_onnx. What is the advantage we derive by merging all these into one file now. Lets discuss this over a call/in-person - Name of highest level APIs, e.g., import_model, get_model_metadata, import_to_gluon, export_model are totally dummy and missing valid info. I suggest to use import_onnx, get_onnx_metadata, export_onnx, import_to_gluon can be deleted and merged as an option for example import_onnx(to_gluon=True) This is API breakage. the ``import_model`` API was already shipped as part of v1.2 breaking it now would not be very customer friendly. There are blog posts and tutorials published based on this. Also as an aside, I think all this is very valuable feedback and I thank you for that but my perception is that it just came a bit late. It is hard to incorporate them after actually shipping the product. We had sent out design proposal for this module on the mxnet dev list in early March and then again we asked for one more round of feedback in the month of April. We created confluence wiki with [API Design Document](https://cwiki.apache.org/confluence/display/MXNET/ONNX-MXNet+API+Design) and sent it out on the dev list for review. We also solicited reviews on all the PRs. Going ahead it would be great if we could get the feedback during design and review phase rather than after merging and shipping the code. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] ArmageddonKnight commented on a change in pull request #11364: [MXNET-490] Added OpenLSTMRNN together with benchmarks and Tensorboard callback routines.
ArmageddonKnight commented on a change in pull request #11364: [MXNET-490] Added OpenLSTMRNN together with benchmarks and Tensorboard callback routines. URL: https://github.com/apache/incubator-mxnet/pull/11364#discussion_r199024323 ## File path: example/rnn-backends/bucketing/dataset/download_ptb.sh ## @@ -0,0 +1,27 @@ +#!/bin/sh +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# This file downloads the PTB dataset. + +mkdir -p $(cd $(dirname $0) && pwd)/ptb + +cd $(cd $(dirname $0) && pwd)/ptb && \ Review comment: Thank you for your comments. I have made changes accordingly. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[incubator-mxnet] branch master updated: Document AdaGrad eps as initial history accumulator value (#11392)
This is an automated email from the ASF dual-hosted git repository. zhasheng pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git The following commit(s) were added to refs/heads/master by this push: new 2594fca Document AdaGrad eps as initial history accumulator value (#11392) 2594fca is described below commit 2594fcabdd63aaf26e112ad337dcb711c7783bc2 Author: Leonard Lausen AuthorDate: Fri Jun 29 00:20:37 2018 + Document AdaGrad eps as initial history accumulator value (#11392) --- python/mxnet/optimizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/mxnet/optimizer.py b/python/mxnet/optimizer.py index 267a402..e73a45f 100644 --- a/python/mxnet/optimizer.py +++ b/python/mxnet/optimizer.py @@ -1090,7 +1090,7 @@ class AdaGrad(Optimizer): Parameters -- eps: float, optional -Small value to avoid division by 0. +Initial value of the history accumulator. Avoids division by 0. """ def __init__(self, eps=1e-7, **kwargs):
[GitHub] szha closed pull request #11392: Document AdaGrad eps as initial history accumulator value
szha closed pull request #11392: Document AdaGrad eps as initial history accumulator value URL: https://github.com/apache/incubator-mxnet/pull/11392 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/python/mxnet/optimizer.py b/python/mxnet/optimizer.py index 267a402f246..e73a45f74b0 100644 --- a/python/mxnet/optimizer.py +++ b/python/mxnet/optimizer.py @@ -1090,7 +1090,7 @@ class AdaGrad(Optimizer): Parameters -- eps: float, optional -Small value to avoid division by 0. +Initial value of the history accumulator. Avoids division by 0. """ def __init__(self, eps=1e-7, **kwargs): This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] szha commented on issue #11003: rnn_cell little bug fixed
szha commented on issue #11003: rnn_cell little bug fixed URL: https://github.com/apache/incubator-mxnet/pull/11003#issuecomment-401210997 @chinakook would you mind doing a rebase? we have a couple of flaky tests that are already fixed in the upstream. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] azai91 commented on issue #11118: [MXNET-431] Get physical cores
azai91 commented on issue #8: [MXNET-431] Get physical cores URL: https://github.com/apache/incubator-mxnet/pull/8#issuecomment-401210597 just chatted with @szha and the plan is to by default use cmake to build this dependency in the makefile (similar to what we do with MKLDNN). if the developer does not have cmake, then we will instruct them to enable to flag to turn off this feature and we will default to what we had before. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] lanking520 commented on a change in pull request #11477: Copy profiler examples to new location for CI
lanking520 commented on a change in pull request #11477: Copy profiler examples to new location for CI URL: https://github.com/apache/incubator-mxnet/pull/11477#discussion_r199023063 ## File path: scala-package/examples/src/main/scala/org/apache/mxnet/examples/profiler/ProfilerNDArray.scala ## @@ -0,0 +1,252 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mxnet.examples.profiler + +import org.kohsuke.args4j.{CmdLineParser, Option} +import org.slf4j.LoggerFactory +import scala.collection.JavaConverters._ +import java.io.File +import org.apache.mxnet.Profiler +import org.apache.mxnet.Random +import org.apache.mxnet.Shape +import org.apache.mxnet.NDArray +import org.apache.mxnet.Context + +/** + * @author Depeng Liang + */ +object ProfilerNDArray { + private val logger = LoggerFactory.getLogger(classOf[ProfilerNDArray]) + + def testBroadcast(): Unit = { +val sampleNum = 1000 +def testBroadcastTo(): Unit = { + for (i <- 0 until sampleNum) { +val nDim = scala.util.Random.nextInt(2) + 1 +val targetShape = Shape((0 until nDim).map(i => scala.util.Random.nextInt(10) + 1)) +val shape = targetShape.toArray.map { s => +if (scala.util.Random.nextInt(2) == 1) 1 +else s +} +val dat = NDArray.empty(shape: _*) +val randomRet = (0 until shape.product) + .map(r => scala.util.Random.nextFloat() - 0.5f).toArray +dat.set(randomRet) +val ndArrayRet = NDArray.broadcast_to(Map("shape" -> targetShape))(dat).get +require(ndArrayRet.shape == targetShape) +val err = { + // implementation of broadcast + val ret = { +(randomRet /: shape.zipWithIndex.reverse){ (acc, elem) => elem match { case (s, i) => + if (s != targetShape(i)) { +acc.grouped(shape.takeRight(shape.length - i).product).map {g => + (0 until targetShape(i)).map(x => g).flatten +}.flatten.toArray + } else acc +}} + } + val tmp = ndArrayRet.toArray.zip(ret).map{ case (l, r) => Math.pow(l - r, 2) } + tmp.sum / tmp.length +} +require(err < 1E-8) +ndArrayRet.dispose() +dat.dispose() + } +} +testBroadcastTo() + } + + def randomNDArray(dim: Int): NDArray = { +val tmp = Math.pow(1000, 1.0 / dim).toInt +val shape = Shape((0 until dim).map(d => scala.util.Random.nextInt(tmp) + 1)) +Random.uniform(-10f, 10f, shape) + } + + def testNDArraySaveload(): Unit = { +val maxDim = 5 +val nRepeat = 10 +val fileName = s"${System.getProperty("java.io.tmpdir")}/tmpList.bin" +for (repeat <- 0 until nRepeat) { + try { +val data = (0 until 10).map(i => randomNDArray(scala.util.Random.nextInt(4) + 1)) +NDArray.save(fileName, data) +val data2 = NDArray.load2Array(fileName) +require(data.length == data2.length) +for ((x, y) <- data.zip(data2)) { + val tmp = x - y + require(tmp.toArray.sum == 0) + tmp.dispose() +} +val dMap = data.zipWithIndex.map { case (arr, i) => + s"NDArray xx $i" -> arr +}.toMap +NDArray.save(fileName, dMap) + val dMap2 = NDArray.load2Map(fileName) + require(dMap.size == dMap2.size) + for ((k, x) <- dMap) { + val y = dMap2(k) + val tmp = x - y + require(tmp.toArray.sum == 0) + tmp.dispose() + } +data.foreach(_.dispose()) + } finally { +val file = new File(fileName) +file.delete() + } +} + } + + def testNDArrayCopy(): Unit = { +val c = Random.uniform(-10f, 10f, Shape(10, 10)) +val d = c.copyTo(Context.cpu(0)) +val tmp = c - d +require(tmp.toArray.map(Math.abs).sum == 0) +c.dispose() +d.dispose() + } + + def reldiff(a: NDArray, b: NDArray): Float = { +val diff = NDArray.sum(NDArray.abs(a - b)).toScalar +val norm = NDArray.sum(NDArray.abs(a)).toScalar Review comment: Same in here with `NDArray.api` This
[GitHub] lanking520 commented on a change in pull request #11477: Copy profiler examples to new location for CI
lanking520 commented on a change in pull request #11477: Copy profiler examples to new location for CI URL: https://github.com/apache/incubator-mxnet/pull/11477#discussion_r199022241 ## File path: scala-package/examples/src/main/scala/org/apache/mxnet/examples/profiler/ProfilerNDArray.scala ## @@ -0,0 +1,252 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mxnet.examples.profiler + +import org.kohsuke.args4j.{CmdLineParser, Option} +import org.slf4j.LoggerFactory +import scala.collection.JavaConverters._ +import java.io.File +import org.apache.mxnet.Profiler +import org.apache.mxnet.Random +import org.apache.mxnet.Shape +import org.apache.mxnet.NDArray +import org.apache.mxnet.Context + +/** + * @author Depeng Liang + */ +object ProfilerNDArray { + private val logger = LoggerFactory.getLogger(classOf[ProfilerNDArray]) + + def testBroadcast(): Unit = { +val sampleNum = 1000 +def testBroadcastTo(): Unit = { + for (i <- 0 until sampleNum) { +val nDim = scala.util.Random.nextInt(2) + 1 +val targetShape = Shape((0 until nDim).map(i => scala.util.Random.nextInt(10) + 1)) +val shape = targetShape.toArray.map { s => +if (scala.util.Random.nextInt(2) == 1) 1 +else s +} +val dat = NDArray.empty(shape: _*) +val randomRet = (0 until shape.product) + .map(r => scala.util.Random.nextFloat() - 0.5f).toArray +dat.set(randomRet) +val ndArrayRet = NDArray.broadcast_to(Map("shape" -> targetShape))(dat).get Review comment: Can you try to use NDArray.api.broadcast_to in here? This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] lanking520 commented on a change in pull request #11477: Copy profiler examples to new location for CI
lanking520 commented on a change in pull request #11477: Copy profiler examples to new location for CI URL: https://github.com/apache/incubator-mxnet/pull/11477#discussion_r199023103 ## File path: scala-package/examples/src/main/scala/org/apache/mxnet/examples/profiler/ProfilerNDArray.scala ## @@ -0,0 +1,252 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mxnet.examples.profiler + +import org.kohsuke.args4j.{CmdLineParser, Option} +import org.slf4j.LoggerFactory +import scala.collection.JavaConverters._ +import java.io.File +import org.apache.mxnet.Profiler +import org.apache.mxnet.Random +import org.apache.mxnet.Shape +import org.apache.mxnet.NDArray +import org.apache.mxnet.Context + +/** + * @author Depeng Liang + */ +object ProfilerNDArray { + private val logger = LoggerFactory.getLogger(classOf[ProfilerNDArray]) + + def testBroadcast(): Unit = { +val sampleNum = 1000 +def testBroadcastTo(): Unit = { + for (i <- 0 until sampleNum) { +val nDim = scala.util.Random.nextInt(2) + 1 +val targetShape = Shape((0 until nDim).map(i => scala.util.Random.nextInt(10) + 1)) +val shape = targetShape.toArray.map { s => +if (scala.util.Random.nextInt(2) == 1) 1 +else s +} +val dat = NDArray.empty(shape: _*) +val randomRet = (0 until shape.product) + .map(r => scala.util.Random.nextFloat() - 0.5f).toArray +dat.set(randomRet) +val ndArrayRet = NDArray.broadcast_to(Map("shape" -> targetShape))(dat).get +require(ndArrayRet.shape == targetShape) +val err = { + // implementation of broadcast + val ret = { +(randomRet /: shape.zipWithIndex.reverse){ (acc, elem) => elem match { case (s, i) => + if (s != targetShape(i)) { +acc.grouped(shape.takeRight(shape.length - i).product).map {g => + (0 until targetShape(i)).map(x => g).flatten +}.flatten.toArray + } else acc +}} + } + val tmp = ndArrayRet.toArray.zip(ret).map{ case (l, r) => Math.pow(l - r, 2) } + tmp.sum / tmp.length +} +require(err < 1E-8) +ndArrayRet.dispose() +dat.dispose() + } +} +testBroadcastTo() + } + + def randomNDArray(dim: Int): NDArray = { +val tmp = Math.pow(1000, 1.0 / dim).toInt +val shape = Shape((0 until dim).map(d => scala.util.Random.nextInt(tmp) + 1)) +Random.uniform(-10f, 10f, shape) + } + + def testNDArraySaveload(): Unit = { +val maxDim = 5 +val nRepeat = 10 +val fileName = s"${System.getProperty("java.io.tmpdir")}/tmpList.bin" +for (repeat <- 0 until nRepeat) { + try { +val data = (0 until 10).map(i => randomNDArray(scala.util.Random.nextInt(4) + 1)) +NDArray.save(fileName, data) +val data2 = NDArray.load2Array(fileName) +require(data.length == data2.length) +for ((x, y) <- data.zip(data2)) { + val tmp = x - y + require(tmp.toArray.sum == 0) + tmp.dispose() +} +val dMap = data.zipWithIndex.map { case (arr, i) => + s"NDArray xx $i" -> arr +}.toMap +NDArray.save(fileName, dMap) + val dMap2 = NDArray.load2Map(fileName) + require(dMap.size == dMap2.size) + for ((k, x) <- dMap) { + val y = dMap2(k) + val tmp = x - y + require(tmp.toArray.sum == 0) + tmp.dispose() + } +data.foreach(_.dispose()) + } finally { +val file = new File(fileName) +file.delete() + } +} + } + + def testNDArrayCopy(): Unit = { +val c = Random.uniform(-10f, 10f, Shape(10, 10)) +val d = c.copyTo(Context.cpu(0)) +val tmp = c - d +require(tmp.toArray.map(Math.abs).sum == 0) +c.dispose() +d.dispose() + } + + def reldiff(a: NDArray, b: NDArray): Float = { +val diff = NDArray.sum(NDArray.abs(a - b)).toScalar +val norm = NDArray.sum(NDArray.abs(a)).toScalar +diff / norm + } + + def reldiff(a: Array[Float], b: Array[Float]): Float = { +val diff = + (a zip b).map {
[GitHub] lanking520 commented on a change in pull request #11477: Copy profiler examples to new location for CI
lanking520 commented on a change in pull request #11477: Copy profiler examples to new location for CI URL: https://github.com/apache/incubator-mxnet/pull/11477#discussion_r199023126 ## File path: scala-package/examples/src/main/scala/org/apache/mxnet/examples/profiler/ProfilerNDArray.scala ## @@ -0,0 +1,252 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mxnet.examples.profiler + +import org.kohsuke.args4j.{CmdLineParser, Option} +import org.slf4j.LoggerFactory +import scala.collection.JavaConverters._ +import java.io.File +import org.apache.mxnet.Profiler +import org.apache.mxnet.Random +import org.apache.mxnet.Shape +import org.apache.mxnet.NDArray +import org.apache.mxnet.Context + +/** + * @author Depeng Liang + */ +object ProfilerNDArray { + private val logger = LoggerFactory.getLogger(classOf[ProfilerNDArray]) + + def testBroadcast(): Unit = { +val sampleNum = 1000 +def testBroadcastTo(): Unit = { + for (i <- 0 until sampleNum) { +val nDim = scala.util.Random.nextInt(2) + 1 +val targetShape = Shape((0 until nDim).map(i => scala.util.Random.nextInt(10) + 1)) +val shape = targetShape.toArray.map { s => +if (scala.util.Random.nextInt(2) == 1) 1 +else s +} +val dat = NDArray.empty(shape: _*) +val randomRet = (0 until shape.product) + .map(r => scala.util.Random.nextFloat() - 0.5f).toArray +dat.set(randomRet) +val ndArrayRet = NDArray.broadcast_to(Map("shape" -> targetShape))(dat).get +require(ndArrayRet.shape == targetShape) +val err = { + // implementation of broadcast + val ret = { +(randomRet /: shape.zipWithIndex.reverse){ (acc, elem) => elem match { case (s, i) => + if (s != targetShape(i)) { +acc.grouped(shape.takeRight(shape.length - i).product).map {g => + (0 until targetShape(i)).map(x => g).flatten +}.flatten.toArray + } else acc +}} + } + val tmp = ndArrayRet.toArray.zip(ret).map{ case (l, r) => Math.pow(l - r, 2) } + tmp.sum / tmp.length +} +require(err < 1E-8) +ndArrayRet.dispose() +dat.dispose() + } +} +testBroadcastTo() + } + + def randomNDArray(dim: Int): NDArray = { +val tmp = Math.pow(1000, 1.0 / dim).toInt +val shape = Shape((0 until dim).map(d => scala.util.Random.nextInt(tmp) + 1)) +Random.uniform(-10f, 10f, shape) + } + + def testNDArraySaveload(): Unit = { +val maxDim = 5 +val nRepeat = 10 +val fileName = s"${System.getProperty("java.io.tmpdir")}/tmpList.bin" +for (repeat <- 0 until nRepeat) { + try { +val data = (0 until 10).map(i => randomNDArray(scala.util.Random.nextInt(4) + 1)) +NDArray.save(fileName, data) +val data2 = NDArray.load2Array(fileName) +require(data.length == data2.length) +for ((x, y) <- data.zip(data2)) { + val tmp = x - y + require(tmp.toArray.sum == 0) + tmp.dispose() +} +val dMap = data.zipWithIndex.map { case (arr, i) => + s"NDArray xx $i" -> arr +}.toMap +NDArray.save(fileName, dMap) + val dMap2 = NDArray.load2Map(fileName) + require(dMap.size == dMap2.size) + for ((k, x) <- dMap) { + val y = dMap2(k) + val tmp = x - y + require(tmp.toArray.sum == 0) + tmp.dispose() + } +data.foreach(_.dispose()) + } finally { +val file = new File(fileName) +file.delete() + } +} + } + + def testNDArrayCopy(): Unit = { +val c = Random.uniform(-10f, 10f, Shape(10, 10)) +val d = c.copyTo(Context.cpu(0)) +val tmp = c - d +require(tmp.toArray.map(Math.abs).sum == 0) +c.dispose() +d.dispose() + } + + def reldiff(a: NDArray, b: NDArray): Float = { +val diff = NDArray.sum(NDArray.abs(a - b)).toScalar +val norm = NDArray.sum(NDArray.abs(a)).toScalar +diff / norm + } + + def reldiff(a: Array[Float], b: Array[Float]): Float = { +val diff = + (a zip b).map {
[GitHub] lanking520 commented on a change in pull request #11477: Copy profiler examples to new location for CI
lanking520 commented on a change in pull request #11477: Copy profiler examples to new location for CI URL: https://github.com/apache/incubator-mxnet/pull/11477#discussion_r199022352 ## File path: scala-package/examples/src/main/scala/org/apache/mxnet/examples/profiler/ProfilerMatMul.scala ## @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mxnet.examples.profiler + +import org.kohsuke.args4j.{CmdLineParser, Option} +import org.slf4j.LoggerFactory +import scala.collection.JavaConverters._ +import org.apache.mxnet.Context +import org.apache.mxnet.Profiler +import java.io.File +import org.apache.mxnet.Symbol +import org.apache.mxnet.Shape +import org.apache.mxnet.Random + +/** + * @author Depeng Liang + */ +object ProfilerMatMul { + private val logger = LoggerFactory.getLogger(classOf[ProfilerMatMul]) + + def main(args: Array[String]): Unit = { +val erul = new ProfilerMatMul +val parser: CmdLineParser = new CmdLineParser(erul) +try { + parser.parseArgument(args.toList.asJava) + val ctx = if (erul.gpu >= 0) Context.gpu(erul.gpu) else Context.cpu() + + val path = s"${erul.outputPath}${File.separator}${erul.profilerName}" + val kwargs = Map("file_name" -> path, "profile_" + erul.profilerMode -> "1") + Profiler.profilerSetConfig(kwargs) + logger.info(s"profile file save to $path") + + val A = Symbol.Variable("A") + val B = Symbol.Variable("B") + val C = Symbol.dot()(A, B)() Review comment: Please use `Symbol.api.dot()` This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] anirudh2290 opened a new pull request #11478: 1.2.1 release notes
anirudh2290 opened a new pull request #11478: 1.2.1 release notes URL: https://github.com/apache/incubator-mxnet/pull/11478 ## Description ## I have added more info to 1.2.1 release notes from what @mli sent on the mailing list. Please help review: Once this is approved , I will make the new release with the same content. @mli @piiswrong @ThomasDelteil @szha @marcoabreu ## Checklist ## ### Essentials ### Please feel free to remove inapplicable items for your PR. - [ ] The PR title starts with [MXNET-$JIRA_ID], where $JIRA_ID refers to the relevant [JIRA issue](https://issues.apache.org/jira/projects/MXNET/issues) created (except PRs with tiny changes) - [ ] Changes are complete (i.e. I finished coding on this PR) - [ ] All changes have test coverage: - Unit tests are added for small changes to verify correctness (e.g. adding a new operator) - Nightly tests are added for complicated/long-running ones (e.g. changing distributed kvstore) - Build tests will be added for build configuration changes (e.g. adding a new build option with NCCL) - [ ] Code is well-documented: - For user-facing API changes, API doc string has been updated. - For new C++ functions in header files, their functionalities and arguments are documented. - For new examples, README.md is added to explain the what the example does, the source of the dataset, expected performance on test set and reference to the original paper if applicable - Check the API doc at http://mxnet-ci-doc.s3-accelerate.dualstack.amazonaws.com/PR-$PR_ID/$BUILD_ID/index.html - [ ] To the my best knowledge, examples are either not affected by this change, or have been fixed to be compatible with this change ### Changes ### - [ ] Feature1, tests, (and when applicable, API doc) - [ ] Feature2, tests, (and when applicable, API doc) ## Comments ## - If this change is a backward incompatible change, why must this change be made. - Interesting edge cases to note here This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] haojin2 commented on issue #11473: batchnorm falls back for sparse arrays.
haojin2 commented on issue #11473: batchnorm falls back for sparse arrays. URL: https://github.com/apache/incubator-mxnet/pull/11473#issuecomment-401208891 I think we do need some extra checks for this to verify that this works This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] andrewfayres opened a new pull request #11477: Copy profiler examples to new location for CI
andrewfayres opened a new pull request #11477: Copy profiler examples to new location for CI URL: https://github.com/apache/incubator-mxnet/pull/11477 ## Description ## As part of the effort to update models to the new inference API we're also moving to a new folder. This example isn't an inference one but still needs to be moved. ## Checklist ## ### Essentials ### Please feel free to remove inapplicable items for your PR. - [ ] The PR title starts with [MXNET-$JIRA_ID], where $JIRA_ID refers to the relevant [JIRA issue](https://issues.apache.org/jira/projects/MXNET/issues) created (except PRs with tiny changes) - [ ] Changes are complete (i.e. I finished coding on this PR) - [ ] All changes have test coverage: - Unit tests are added for small changes to verify correctness (e.g. adding a new operator) - Nightly tests are added for complicated/long-running ones (e.g. changing distributed kvstore) - Build tests will be added for build configuration changes (e.g. adding a new build option with NCCL) - [ ] Code is well-documented: - For user-facing API changes, API doc string has been updated. - For new C++ functions in header files, their functionalities and arguments are documented. - For new examples, README.md is added to explain the what the example does, the source of the dataset, expected performance on test set and reference to the original paper if applicable - Check the API doc at http://mxnet-ci-doc.s3-accelerate.dualstack.amazonaws.com/PR-$PR_ID/$BUILD_ID/index.html - [ ] To the my best knowledge, examples are either not affected by this change, or have been fixed to be compatible with this change ### Changes ### - [ ] Feature1, tests, (and when applicable, API doc) - [ ] Feature2, tests, (and when applicable, API doc) ## Comments ## - If this change is a backward incompatible change, why must this change be made. - Interesting edge cases to note here @lanking520 @nswamy This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] mbaijal opened a new pull request #11476: [Nightly Tests] Fix regex which processes Apache RAT check output
mbaijal opened a new pull request #11476: [Nightly Tests] Fix regex which processes Apache RAT check output URL: https://github.com/apache/incubator-mxnet/pull/11476 ## Description ## I had recently updated the `license_check.sh` script to process the output correctly for an edge case (`20 unknown licenses` was being successfully parsed as `0 unknown license`) However, this was not working as expected due to unnecessary quotes. This PR fixes this. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] ctcyang edited a comment on issue #10696: [MXNET-366]Extend MXNet Distributed Training by AllReduce
ctcyang edited a comment on issue #10696: [MXNET-366]Extend MXNet Distributed Training by AllReduce URL: https://github.com/apache/incubator-mxnet/pull/10696#issuecomment-401201724 I am having trouble building 1444a84 with GPU support, so I was hoping you could help me. `make` seems to cause the following [error](https://github.com/apache/incubator-mxnet/files/2147258/error.txt) during linking phase. Ubuntu 16.04 mpich 3.2 g++ 5.4.0 This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] toddsundsted edited a comment on issue #11398: Floating Point Exception after Array Creation
toddsundsted edited a comment on issue #11398: Floating Point Exception after Array Creation URL: https://github.com/apache/incubator-mxnet/issues/11398#issuecomment-400872858 @frankfliu I have a proposed fix here: https://github.com/apache/incubator-mxnet/pull/11397 this small change ensures that `random_uniform(..)` and similar behave consistently with other methods for `NDArray` creation, like `zeros(...)`. I would love a review and feedback on the proposal. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] hetong007 commented on issue #11474: Fix record io augmentation speed
hetong007 commented on issue #11474: Fix record io augmentation speed URL: https://github.com/apache/incubator-mxnet/pull/11474#issuecomment-401206035 Confirmed that this brings back the speed on both Symbol and Gluon training. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] szha commented on a change in pull request #11364: [MXNET-490] Added OpenLSTMRNN together with benchmarks and Tensorboard callback routines.
szha commented on a change in pull request #11364: [MXNET-490] Added OpenLSTMRNN together with benchmarks and Tensorboard callback routines. URL: https://github.com/apache/incubator-mxnet/pull/11364#discussion_r199017711 ## File path: example/rnn-backends/bucketing/dataset/download_ptb.sh ## @@ -0,0 +1,27 @@ +#!/bin/sh +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# This file downloads the PTB dataset. + +mkdir -p $(cd $(dirname $0) && pwd)/ptb + +cd $(cd $(dirname $0) && pwd)/ptb && \ Review comment: we're moving away from ptb. see https://github.com/apache/incubator-mxnet/pull/11435 This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] piiswrong closed pull request #10524: [MXNET-312] Added Matthew's Correlation Coefficient to metrics
piiswrong closed pull request #10524: [MXNET-312] Added Matthew's Correlation Coefficient to metrics URL: https://github.com/apache/incubator-mxnet/pull/10524 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/python/mxnet/metric.py b/python/mxnet/metric.py index aa3ab44c48a..aa378cae509 100644 --- a/python/mxnet/metric.py +++ b/python/mxnet/metric.py @@ -568,6 +568,27 @@ def fscore(self): else: return 0. +@property +def matthewscc(self): +""" +Calculate the Matthew's Correlation Coefficent +""" +if not self.total_examples: +return 0. + +true_pos = float(self.true_positives) +false_pos = float(self.false_positives) +false_neg = float(self.false_negatives) +true_neg = float(self.true_negatives) +terms = [(true_pos + false_pos), + (true_pos + false_neg), + (true_neg + false_pos), + (true_neg + false_neg)] +denom = 1. +for t in filter(lambda t: t != 0., terms): +denom *= t +return ((true_pos * true_neg) - (false_pos * false_neg)) / math.sqrt(denom) + @property def total_examples(self): return self.false_negatives + self.false_positives + \ @@ -584,7 +605,7 @@ def reset_stats(self): class F1(EvalMetric): """Computes the F1 score of a binary classification problem. -The F1 score is equivalent to weighted average of the precision and recall, +The F1 score is equivalent to harmonic mean of the precision and recall, where the best value is 1.0 and the worst value is 0.0. The formula for F1 score is:: F1 = 2 * (precision * recall) / (precision + recall) @@ -661,6 +682,107 @@ def reset(self): self.metrics.reset_stats() +@register +class MCC(EvalMetric): +"""Computes the Matthews Correlation Coefficient of a binary classification problem. + +While slower to compute than F1 the MCC can give insight that F1 or Accuracy cannot. +For instance, if the network always predicts the same result +then the MCC will immeadiately show this. The MCC is also symetric with respect +to positive and negative categorization, however, there needs to be both +positive and negative examples in the labels or it will always return 0. +MCC of 0 is uncorrelated, 1 is completely correlated, and -1 is negatively correlated. + +.. math:: +\\text{MCC} = \\frac{ TP \\times TN - FP \\times FN } +{\\sqrt{ (TP + FP) ( TP + FN ) ( TN + FP ) ( TN + FN ) } } + +where 0 terms in the denominator are replaced by 1. + +.. note:: + +This version of MCC only supports binary classification. + +Parameters +-- +name : str +Name of this metric instance for display. +output_names : list of str, or None +Name of predictions that should be used when updating with update_dict. +By default include all predictions. +label_names : list of str, or None +Name of labels that should be used when updating with update_dict. +By default include all labels. +average : str, default 'macro' +Strategy to be used for aggregating across mini-batches. +"macro": average the MCC for each batch. +"micro": compute a single MCC across all batches. + +Examples + +In this example the network almost always predicts positive +>>> false_positives = 1000 +>>> false_negatives = 1 +>>> true_positives = 1 +>>> true_negatives = 1 +>>> predicts = [mx.nd.array( +[[.3, .7]]*false_positives + +[[.7, .3]]*true_negatives + +[[.7, .3]]*false_negatives + +[[.3, .7]]*true_positives +)] +>>> labels = [mx.nd.array( +[0.]*(false_positives + true_negatives) + +[1.]*(false_negatives + true_positives) +)] +>>> f1 = mx.metric.F1() +>>> f1.update(preds = predicts, labels = labels) +>>> mcc = mx.metric.MCC() +>>> mcc.update(preds = predicts, labels = labels) +>>> print f1.get() +('f1', 0.95233560306652054) +>>> print mcc.get() +('mcc', 0.01917751877733392) +""" + +def __init__(self, name='mcc', + output_names=None, label_names=None, average="macro"): +self._average = average +self._metrics = _BinaryClassificationMetrics() +EvalMetric.__init__(self, name=name, +output_names=output_names, label_names=label_names) + +def update(self, labels, preds): +"""Updates the internal evaluation result. + +Parameters +-- +labels : list of `NDArray` +The labels of the data. + +preds : list of
[GitHub] anirudhacharya commented on issue #11475: Amending ONNX importer/exporter #11213
anirudhacharya commented on issue #11475: Amending ONNX importer/exporter #11213 URL: https://github.com/apache/incubator-mxnet/issues/11475#issuecomment-401203574 @spidyDev @Roshrini This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] szha closed pull request #11447: [documentation] Update doc for contrib.rand_zipfian
szha closed pull request #11447: [documentation] Update doc for contrib.rand_zipfian URL: https://github.com/apache/incubator-mxnet/pull/11447 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/python/mxnet/ndarray/contrib.py b/python/mxnet/ndarray/contrib.py index ba402e6f3f8..cc66483f00b 100644 --- a/python/mxnet/ndarray/contrib.py +++ b/python/mxnet/ndarray/contrib.py @@ -55,8 +55,7 @@ def rand_zipfian(true_classes, num_sampled, range_max, ctx=None): range_max: int The number of possible classes. ctx : Context -Device context of output. Default is current context. Overridden by -`mu.context` when `mu` is an NDArray. +Device context of output. Default is current context. Returns --- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] szha commented on a change in pull request #10524: [MXNET-312] Added Matthew's Correlation Coefficient to metrics
szha commented on a change in pull request #10524: [MXNET-312] Added Matthew's Correlation Coefficient to metrics URL: https://github.com/apache/incubator-mxnet/pull/10524#discussion_r199016678 ## File path: tests/python/unittest/test_metric.py ## @@ -122,6 +123,54 @@ def test_f1(): np.testing.assert_almost_equal(microF1.get()[1], fscore_total) np.testing.assert_almost_equal(macroF1.get()[1], (fscore1 + fscore2) / 2.) +def test_mcc(): Review comment: I tested that this implementation of micro mcc is consistent with http://scikit-learn.org/stable/modules/generated/sklearn.metrics.matthews_corrcoef.html This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] ctcyang commented on issue #10696: [MXNET-366]Extend MXNet Distributed Training by AllReduce
ctcyang commented on issue #10696: [MXNET-366]Extend MXNet Distributed Training by AllReduce URL: https://github.com/apache/incubator-mxnet/pull/10696#issuecomment-401201724 I am having trouble building 1444a84 with GPU support, so I was hoping your could help me. `make` seems to cause the following [error](https://github.com/apache/incubator-mxnet/files/2147258/error.txt) during linking phase. Ubuntu 16.04 mpich 3.2 g++ 5.4.0 This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[incubator-mxnet] branch master updated: fix. (#11471)
This is an automated email from the ASF dual-hosted git repository. zhasheng pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git The following commit(s) were added to refs/heads/master by this push: new 4cdc150 fix. (#11471) 4cdc150 is described below commit 4cdc150af8250665b7880f8fbda830eb26476fb7 Author: Da Zheng AuthorDate: Thu Jun 28 16:17:17 2018 -0700 fix. (#11471) --- mkldnn.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mkldnn.mk b/mkldnn.mk index f17fdbc..1be0704 100644 --- a/mkldnn.mk +++ b/mkldnn.mk @@ -32,9 +32,9 @@ endif .PHONY: mkldnn mkldnn_clean -mkldnn_build: $(MKLDNNROOT)/lib/libmkldnn.so +mkldnn_build: $(MKLDNN_LIBFILE) -$(MKLDNNROOT)/lib/libmkldnn.so: +$(MKLDNN_LIBFILE): mkdir -p $(MKLDNNROOT) cd $(MKLDNN_SUBMODDIR) && rm -rf external && cd scripts && ./prepare_mkl.sh && cd .. && cp -a external/*/* $(MKLDNNROOT)/. cmake $(MKLDNN_SUBMODDIR) -DCMAKE_INSTALL_PREFIX=$(MKLDNNROOT) -B$(MKLDNN_BUILDDIR) -DARCH_OPT_FLAGS="-mtune=generic" -DWITH_TEST=OFF -DWITH_EXAMPLE=OFF
[GitHub] szha closed pull request #11471: Fix MKLDNN build for Mac.
szha closed pull request #11471: Fix MKLDNN build for Mac. URL: https://github.com/apache/incubator-mxnet/pull/11471 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/mkldnn.mk b/mkldnn.mk index f17fdbce084..1be0704dcde 100644 --- a/mkldnn.mk +++ b/mkldnn.mk @@ -32,9 +32,9 @@ endif .PHONY: mkldnn mkldnn_clean -mkldnn_build: $(MKLDNNROOT)/lib/libmkldnn.so +mkldnn_build: $(MKLDNN_LIBFILE) -$(MKLDNNROOT)/lib/libmkldnn.so: +$(MKLDNN_LIBFILE): mkdir -p $(MKLDNNROOT) cd $(MKLDNN_SUBMODDIR) && rm -rf external && cd scripts && ./prepare_mkl.sh && cd .. && cp -a external/*/* $(MKLDNNROOT)/. cmake $(MKLDNN_SUBMODDIR) -DCMAKE_INSTALL_PREFIX=$(MKLDNNROOT) -B$(MKLDNN_BUILDDIR) -DARCH_OPT_FLAGS="-mtune=generic" -DWITH_TEST=OFF -DWITH_EXAMPLE=OFF This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] szha closed pull request #11444: Fix flaky test test_operator_gpu.test_spatial_transformer_with_type (…
szha closed pull request #11444: Fix flaky test test_operator_gpu.test_spatial_transformer_with_type (… URL: https://github.com/apache/incubator-mxnet/pull/11444 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/tests/python/gpu/test_operator_gpu.py b/tests/python/gpu/test_operator_gpu.py index 45f52b32a34..bc2ed31c9b5 100644 --- a/tests/python/gpu/test_operator_gpu.py +++ b/tests/python/gpu/test_operator_gpu.py @@ -697,8 +697,7 @@ def test_grid_generator_with_type(): check_consistency(sym, ctx_list, grad_req="add") -@unittest.skip("test fails intermittently. temporarily disabled till it gets fixed. tracked at https://github.com/apache/incubator-mxnet/issues/7645;) -@with_seed(1234) +@with_seed() def test_spatial_transformer_with_type(): data = mx.sym.Variable('data') loc = mx.sym.Flatten(data) @@ -707,8 +706,8 @@ def test_spatial_transformer_with_type(): loc = mx.sym.FullyConnected(data=loc, num_hidden=6) sym = mx.sym.SpatialTransformer(data=data, loc=loc, target_shape=(10, 10), transform_type="affine", sampler_type="bilinear") -ctx_list = [{'ctx': mx.gpu(0), 'data': (1, 5, 10, 10), 'type_dict': {'data': np.float32}}, -{'ctx': mx.cpu(0), 'data': (1, 5, 10, 10), 'type_dict': {'data': np.float32}}] +ctx_list = [{'ctx': mx.gpu(0), 'data': (1, 5, 10, 10), 'type_dict': {'data': np.float64}}, +{'ctx': mx.cpu(0), 'data': (1, 5, 10, 10), 'type_dict': {'data': np.float64}}] check_consistency(sym, ctx_list) check_consistency(sym, ctx_list, grad_req="add") This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[incubator-mxnet] branch master updated: Fix flaky test test_operator_gpu.test_spatial_transformer_with_type (#7645) (#11444)
This is an automated email from the ASF dual-hosted git repository. zhasheng pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git The following commit(s) were added to refs/heads/master by this push: new f7a0025 Fix flaky test test_operator_gpu.test_spatial_transformer_with_type (#7645) (#11444) f7a0025 is described below commit f7a00250cf83ba34779b7c6ffc02532d20795f2f Author: ddavydenko AuthorDate: Thu Jun 28 16:16:29 2018 -0700 Fix flaky test test_operator_gpu.test_spatial_transformer_with_type (#7645) (#11444) --- tests/python/gpu/test_operator_gpu.py | 7 +++ 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/python/gpu/test_operator_gpu.py b/tests/python/gpu/test_operator_gpu.py index fd76990..3efc284 100644 --- a/tests/python/gpu/test_operator_gpu.py +++ b/tests/python/gpu/test_operator_gpu.py @@ -701,8 +701,7 @@ def test_grid_generator_with_type(): check_consistency(sym, ctx_list, grad_req="add") -@unittest.skip("test fails intermittently. temporarily disabled till it gets fixed. tracked at https://github.com/apache/incubator-mxnet/issues/7645;) -@with_seed(1234) +@with_seed() def test_spatial_transformer_with_type(): data = mx.sym.Variable('data') loc = mx.sym.Flatten(data) @@ -711,8 +710,8 @@ def test_spatial_transformer_with_type(): loc = mx.sym.FullyConnected(data=loc, num_hidden=6) sym = mx.sym.SpatialTransformer(data=data, loc=loc, target_shape=(10, 10), transform_type="affine", sampler_type="bilinear") -ctx_list = [{'ctx': mx.gpu(0), 'data': (1, 5, 10, 10), 'type_dict': {'data': np.float32}}, -{'ctx': mx.cpu(0), 'data': (1, 5, 10, 10), 'type_dict': {'data': np.float32}}] +ctx_list = [{'ctx': mx.gpu(0), 'data': (1, 5, 10, 10), 'type_dict': {'data': np.float64}}, +{'ctx': mx.cpu(0), 'data': (1, 5, 10, 10), 'type_dict': {'data': np.float64}}] check_consistency(sym, ctx_list) check_consistency(sym, ctx_list, grad_req="add")
[GitHub] szha commented on issue #11213: [MXNET-533] MXNet-ONNX export
szha commented on issue #11213: [MXNET-533] MXNet-ONNX export URL: https://github.com/apache/incubator-mxnet/pull/11213#issuecomment-401200760 @sandeep-krishnamurthy thanks for the efforts. Please respect "request changes" as vetos nonetheless and try and reach @zhreshold, especially given that you sit in the same office. Much appreciated. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] Caenorst commented on a change in pull request #11325: Added TensorRT runtime integration
Caenorst commented on a change in pull request #11325: Added TensorRT runtime integration URL: https://github.com/apache/incubator-mxnet/pull/11325#discussion_r199014063 ## File path: src/executor/tensorrt_pass.cc ## @@ -0,0 +1,583 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2018 by Contributors + * \file tensorrt_pass.cc + * \brief Replace TRT compatible subgraphs by TRT engines + * \author Clement Fuji Tsang + */ + +#if MXNET_USE_TENSORRT + +#include +#include +#include +#include +#include +#include + +#include "./onnx_to_tensorrt.h" +#include "./exec_pass.h" +#include "../operator/contrib/nnvm_to_onnx-inl.h" + +namespace mxnet { +namespace exec { + +using NodePtr = nnvm::NodePtr; + +/*! + * \brief Custom graph class, which will contain bi-directional nodes + * we need to compute DFS and reverse DFS for graph partitioning + */ +class BidirectionalGraph { + public: + struct Node { +nnvm::Node* nnvmptr; +std::vector inputs; +std::vector outputs; + }; + std::vector nodes; + std::unordered_map nnvm2nid; + std::vector outputs; + static const std::unordered_set unconditionalTRTop; + + explicit BidirectionalGraph(const Graph ) { +auto& idx = g.indexed_graph(); +auto num_nodes = idx.num_nodes(); +nodes.reserve(num_nodes); +nnvm2nid.reserve(num_nodes); Review comment: It's actually not node id in the IndexedGraph but the position in the BidirectionalGraph `std::vector` attribute `node` (position This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] zhreshold commented on issue #11213: [MXNET-533] MXNet-ONNX export
zhreshold commented on issue #11213: [MXNET-533] MXNet-ONNX export URL: https://github.com/apache/incubator-mxnet/pull/11213#issuecomment-401197110 I opened a new issue regarding my concerns in #11475 This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] zheng-da commented on a change in pull request #11371: [MXNET-486] Create CPP test for concat MKLDNN operator
zheng-da commented on a change in pull request #11371: [MXNET-486] Create CPP test for concat MKLDNN operator URL: https://github.com/apache/incubator-mxnet/pull/11371#discussion_r199010760 ## File path: tests/cpp/operator/mkldnn.cc ## @@ -804,6 +1012,24 @@ TEST(IMPERATIVE, SumBackwardsOp) { TestOp(attrs, VerifySumBackwardsResult); } +TEST(IMPERATIVE, ConcatOp) { + for (int num_inputs = 2; num_inputs < 4; num_inputs++) { +for (int dim = 0; dim < 5; dim++) { + OpAttrs attrs = GetConcatOp(num_inputs, dim); + TestConcatOp(attrs, VerifyConcatResult); +} + } +} + +TEST(IMPERATIVE, ConcatBackwardsOp) { + for (int num_inputs = 2; num_inputs < 3; num_inputs++) { Review comment: the same comment for here. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] aaronmarkham commented on a change in pull request #11304: [MXNET-594] Added Learning Rate Finder tutorial
aaronmarkham commented on a change in pull request #11304: [MXNET-594] Added Learning Rate Finder tutorial URL: https://github.com/apache/incubator-mxnet/pull/11304#discussion_r199007868 ## File path: docs/tutorials/gluon/learning_rate_finder.md ## @@ -0,0 +1,322 @@ + +# Learning Rate Finder + +Setting the learning rate for stochastic gradient descent (SGD) is crucially important when training neural network because it controls both the speed of convergence and the ultimate performance of the network. Set the learning too low and you could be twiddling your thumbs for quite some time as the parameters update very slowly. Set it too high and the updates will skip over optimal solutions, or worse the optimizer might not converge at all! + +Leslie Smith from the U.S. Naval Research Laboratory presented a method for finding a good learning rate in a paper called ["Cyclical Learning Rates for Training Neural Networks"](https://arxiv.org/abs/1506.01186). We take a look at the central idea of the paper, cyclical learning rate schedules, in the tutorial called 'Advanced Learning Rate Schedules', but in this tutorial we implement a 'Learning Rate Finder' in MXNet with the Gluon API that you can use while training your own networks. + +## Simple Idea + +Given an initialized network, a defined loss and a training dataset we take the following steps: + +1. train one batch at a time (a.k.a. an iteration) Review comment: nit: Train Start Record This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] aaronmarkham commented on a change in pull request #11304: [MXNET-594] Added Learning Rate Finder tutorial
aaronmarkham commented on a change in pull request #11304: [MXNET-594] Added Learning Rate Finder tutorial URL: https://github.com/apache/incubator-mxnet/pull/11304#discussion_r199008464 ## File path: docs/tutorials/gluon/learning_rate_finder.md ## @@ -0,0 +1,322 @@ + +# Learning Rate Finder + +Setting the learning rate for stochastic gradient descent (SGD) is crucially important when training neural network because it controls both the speed of convergence and the ultimate performance of the network. Set the learning too low and you could be twiddling your thumbs for quite some time as the parameters update very slowly. Set it too high and the updates will skip over optimal solutions, or worse the optimizer might not converge at all! + +Leslie Smith from the U.S. Naval Research Laboratory presented a method for finding a good learning rate in a paper called ["Cyclical Learning Rates for Training Neural Networks"](https://arxiv.org/abs/1506.01186). We take a look at the central idea of the paper, cyclical learning rate schedules, in the tutorial called 'Advanced Learning Rate Schedules', but in this tutorial we implement a 'Learning Rate Finder' in MXNet with the Gluon API that you can use while training your own networks. + +## Simple Idea + +Given an initialized network, a defined loss and a training dataset we take the following steps: + +1. train one batch at a time (a.k.a. an iteration) +2. start with a very small learning rate (e.g. 0.01) and slowly increase it every iteration +3. record the training loss and continue until we see the training loss diverge + +We then analyse the results by plotting a graph of the learning rate against the training loss as seen below (taking note of the log scales). + +![png](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/doc/tutorials/lr_finder/finder_plot.png) + +As expected, for very small learning rates we don't see much change in the loss as the parameter updates are negligible. At a learning rate of 0.001, we start to see the loss fall. Setting the initial learning rate here is reasonable, but we still have the potential to learn faster. We observe a drop in the loss up until 0.1 where the loss appears to diverge. We want to set the initial learning rate as high as possible before the loss becomes unstable, so we choose a learning rate of 0.05. Review comment: Could you add event pointers on the chart for increased clarity. --> at 0.001 loss falls --> at 0.1 divergence (loss increases) --> at 0.05 loss seems lowest (right?) This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] zheng-da commented on issue #11448: fail to fall back when sparse arrays are passed to MKLDNN-enabled operators.
zheng-da commented on issue #11448: fail to fall back when sparse arrays are passed to MKLDNN-enabled operators. URL: https://github.com/apache/incubator-mxnet/issues/11448#issuecomment-401195601 i created a PR for batchnorm. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] hetong007 opened a new pull request #11474: Fix record io augmentation speed
hetong007 opened a new pull request #11474: Fix record io augmentation speed URL: https://github.com/apache/incubator-mxnet/pull/11474 ## Description ## 1. Fixing https://github.com/apache/incubator-mxnet/issues/11469 2. Fixing default augmentation setting in current image classification example. ## Checklist ## ### Essentials ### Please feel free to remove inapplicable items for your PR. - [X] Changes are complete (i.e. I finished coding on this PR) - [X] To the my best knowledge, examples are either not affected by this change, or have been fixed to be compatible with this change This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] zheng-da opened a new pull request #11473: batchnorm falls back for sparse arrays.
zheng-da opened a new pull request #11473: batchnorm falls back for sparse arrays. URL: https://github.com/apache/incubator-mxnet/pull/11473 ## Description ## (Brief description on what this PR is about) ## Checklist ## ### Essentials ### Please feel free to remove inapplicable items for your PR. - [ ] The PR title starts with [MXNET-$JIRA_ID], where $JIRA_ID refers to the relevant [JIRA issue](https://issues.apache.org/jira/projects/MXNET/issues) created (except PRs with tiny changes) - [ ] Changes are complete (i.e. I finished coding on this PR) - [ ] All changes have test coverage: - Unit tests are added for small changes to verify correctness (e.g. adding a new operator) - Nightly tests are added for complicated/long-running ones (e.g. changing distributed kvstore) - Build tests will be added for build configuration changes (e.g. adding a new build option with NCCL) - [ ] Code is well-documented: - For user-facing API changes, API doc string has been updated. - For new C++ functions in header files, their functionalities and arguments are documented. - For new examples, README.md is added to explain the what the example does, the source of the dataset, expected performance on test set and reference to the original paper if applicable - Check the API doc at http://mxnet-ci-doc.s3-accelerate.dualstack.amazonaws.com/PR-$PR_ID/$BUILD_ID/index.html - [ ] To the my best knowledge, examples are either not affected by this change, or have been fixed to be compatible with this change This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] rahul003 commented on issue #11469: Performance regression in augmentation
rahul003 commented on issue #11469: Performance regression in augmentation URL: https://github.com/apache/incubator-mxnet/issues/11469#issuecomment-401194743 That was it. Was accidentally performing an unnecessary affine transform before cropping. Tong is sending out a PR to fix it. Will close that issue after that PR goes in. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] rahul003 edited a comment on issue #11469: Performance regression in augmentation
rahul003 edited a comment on issue #11469: Performance regression in augmentation URL: https://github.com/apache/incubator-mxnet/issues/11469#issuecomment-401194743 That was it. Was accidentally performing an unnecessary affine transform before cropping. Tong is sending out a PR to fix it. Will close the issue after that PR goes in. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] gigasquid commented on a change in pull request #11205: Clojure Contrib Package
gigasquid commented on a change in pull request #11205: Clojure Contrib Package URL: https://github.com/apache/incubator-mxnet/pull/11205#discussion_r199008534 ## File path: contrib/clojure-package/README.md ## @@ -0,0 +1,204 @@ +# Clojure MXNet + +A clojure package to the MXNet Deep Learning library + +## Introduction + +MXNet is a first class, modern deep learning library that AWS has officially picked as its chosen library. It supports multiple languages on a first class basis and is incubating as an Apache project. + +The motivation for creating a Clojure package is to be able to open the deep learning library to the Clojure ecosystem and build bridges for future development and innovation for the community. It provides all the needed tools including low level and high level apis, dynamic graphs, and things like GAN and natural language support. + +For high leverage, the Clojure package has been built on the existing Scala package using interop. This has allowed rapid development and close parity with the Scala functionality. This also leaves the door open to directly developing code against the jni-bindings with Clojure in the future in an incremental fashion, using the test suites as a refactoring guide. + +## Current State and Plans + +The Clojure package is nearing the end of its first development milestone which is to achieve a close parity with the Scala package and to potentially be included into the main project for official Clojure language support. + +What is needed now is alpha testing on both OSX and Linux to discover any bugs, rough edges, and generally harden it before an official PR is opened on the main project. + +Help with this effort is greatly appreciated and contributors will be recognized in the project README. + +Testing instructions can be found in the testing.md + +## Getting Started + +The following systems are supported: + +- OSX cpu +- Linux cpu +- Linux gpu + +There are two ways of getting going. The first way is the easiest and that is to use the pre-built jars from Maven. The second way is to build from source. In both cases, you will need to load the prereqs and dependencies, (like opencv). + +It's been tested on AWS Deep Learning AMI and OSX High Sierra 10.13.4 + + +### Prerequisites + +**If you are using the AWS Deep Learning Ubuntu or Linux AMI you should be good to go without doing anything on this step.** + + +Follow the instructions from https://mxnet.incubator.apache.org/install/osx_setup.html or https://mxnet.incubator.apache.org/install/ubuntu_setup.html +about _Prepare Environment for GPU Installation_ +and _Install MXNet dependencies_ + + + Cloning the repo and running from source + +To use the prebuilt jars, you will need to replace the native version of the line in the project dependencies with your configuration. + +`[org.apache.mxnet/mxnet-full_2.11-linux-x86_64-gpu "1.2.0"]` +or +`[org.apache.mxnet/mxnet-full_2.11-linux-x86_64-cpu "1.2.0"]` +or +`[org.apache.mxnet/mxnet-full_2.11-osx-x86_64-cpu "1.2.0"]` + + +```clojure + +(ns tutorial.ndarray + (:require [org.apache.clojure-mxnet.ndarray :as ndarray] +[org.apache.clojure-mxnet.context :as context])) + +;;Create NDArray +(def a (ndarray/zeros [100 50])) ;;all zero arrray of dimension 100 x 50 +(def b (ndarray/ones [256 32 128 1])) ;; all one array of dimension +(def c (ndarray/array [1 2 3 4 5 6] [2 3])) ;; array with contents of a shape 2 x 3 + +;;; There are also ways to convert to a vec or get the shape as an object or vec +(ndarray/->vec c) ;=> [1.0 2.0 3.0 4.0 5.0 6.0] +``` + +See the examples/tutorial section for more. + + +The jars from maven with the needed MXNet native binaries in it. On startup, the native libraries are extracted from the jar and copied into a temporary location on your path. On termination, they are deleted. + +If you want details on the flags (opencv verison and cuda version of the jars), they are documented here https://cwiki.apache.org/confluence/display/MXNET/MXNet-Scala+Release+Process + + +### Build from MXNET Source + +Checkout the latest sha from the main package + +`git clone --recursive https://github.com/dmlc/mxnet ~/mxnet` +`cd ~/mxnet` + + +`git checkout tags/1.2.0 -b release-1.2.0` + +`git submodule update --init --recursive` + +Sometimes it useful to use this script to clean hard +https://gist.github.com/nicktoumpelis/11214362 + + +Go here to do the base package installation https://mxnet.incubator.apache.org/install/index.html + + Run `make scalapkg` then `make scalainstall` + +then replace the correct jar for your architecture in the project.clj, example `[ml.dmlc.mxnet/mxnet-full_2.11-osx-x86_64-cpu "1.0.1-SNAPSHOT"]` + + Test your installation + +To test your installation, you should run `lein test`. This will run the test suite (CPU) for the clojure package. + + + Generation of NDArray and Symbol apis + +The bulk of the ndarray and symbol apis are generated via java reflection
[GitHub] sandeep-krishnamurthy edited a comment on issue #11406: [MXNET-599] Partial shape infer for Slice
sandeep-krishnamurthy edited a comment on issue #11406: [MXNET-599] Partial shape infer for Slice URL: https://github.com/apache/incubator-mxnet/pull/11406#issuecomment-401194229 Thanks a lot @rahul003 - This is very useful fix for all Keras-MXNet users using slice operation. Overall looks good to me, except 1 change that Jun mentioned about unknown dimension should be unknown. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] sandeep-krishnamurthy commented on issue #11406: [MXNET-599] Partial shape infer for Slice
sandeep-krishnamurthy commented on issue #11406: [MXNET-599] Partial shape infer for Slice URL: https://github.com/apache/incubator-mxnet/pull/11406#issuecomment-401194229 Thanks a lot @rahul003 - This is very useful fix for all Keras-MXNet users using slice operation. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] mkolod commented on a change in pull request #11325: Added TensorRT runtime integration
mkolod commented on a change in pull request #11325: Added TensorRT runtime integration URL: https://github.com/apache/incubator-mxnet/pull/11325#discussion_r199008230 ## File path: src/executor/tensorrt_pass.cc ## @@ -0,0 +1,583 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2018 by Contributors + * \file tensorrt_pass.cc + * \brief Replace TRT compatible subgraphs by TRT engines + * \author Clement Fuji Tsang + */ + +#if MXNET_USE_TENSORRT + +#include +#include +#include +#include +#include +#include + +#include "./onnx_to_tensorrt.h" +#include "./exec_pass.h" +#include "../operator/contrib/nnvm_to_onnx-inl.h" + +namespace mxnet { +namespace exec { + +using NodePtr = nnvm::NodePtr; + +/*! + * \brief Custom graph class, which will contain bi-directional nodes + * we need to compute DFS and reverse DFS for graph partitioning + */ +class BidirectionalGraph { + public: + struct Node { +nnvm::Node* nnvmptr; +std::vector inputs; +std::vector outputs; + }; + std::vector nodes; + std::unordered_map nnvm2nid; + std::vector outputs; + static const std::unordered_set unconditionalTRTop; + + explicit BidirectionalGraph(const Graph ) { +auto& idx = g.indexed_graph(); +auto num_nodes = idx.num_nodes(); +nodes.reserve(num_nodes); +nnvm2nid.reserve(num_nodes); +outputs.reserve(idx.outputs().size()); +DFSVisit(g.outputs, [this](const nnvm::NodePtr& n) { + BidirectionalGraph::Node new_node; + new_node.nnvmptr = n.get(); + nnvm2nid[n.get()] = static_cast(nodes.size()); + nodes.emplace_back(std::move(new_node)); +}); +for (const auto& it : nnvm2nid) { + nnvm::Node* nnvmnode = it.first; + uint32_t nid = it.second; + for (auto& n : nnvmnode->inputs) { +uint32_t input_nid = nnvm2nid[n.node.get()]; +nodes[input_nid].outputs.emplace_back([nid]); +nodes[nid].inputs.emplace_back([input_nid]); + } +} +for (auto& e : g.outputs) { + uint32_t nid = nnvm2nid[e.node.get()]; + outputs.emplace_back([nid]); +} + } + + template + void DFS(const std::vector& heads, bool reverse, FVisit fvisit) { +std::unordered_set visited; +std::deque stack(heads.begin(), heads.end()); Review comment: Good catch, will change to std::stack in this case. There were other cases for std::deque in the code, but this wasn't one of them. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] sandeep-krishnamurthy commented on a change in pull request #11406: [MXNET-599] Partial shape infer for Slice
sandeep-krishnamurthy commented on a change in pull request #11406: [MXNET-599] Partial shape infer for Slice URL: https://github.com/apache/incubator-mxnet/pull/11406#discussion_r199007699 ## File path: src/operator/tensor/matrix_op-inl.h ## @@ -674,13 +675,23 @@ inline void SetSliceOpOutputDimSize(const index_t i, const int b, const int e, const int s, TShape* oshape) { if (s > 0) { -CHECK_LT(b, e) << "slicing with begin=[" << i << "]=" << b << ", end[" << i << "]=" +CHECK_LE(b, e) << "slicing with begin=[" << i << "]=" << b << ", end[" << i << "]=" << e << ", and step[" << i << "]=" << s << " is invalid"; -(*oshape)[i] = (e - b - 1) / s + 1; +if (e == b) { + // for partial shape infer + (*oshape)[i] = 0; +} else { + (*oshape)[i] = (e - b - 1) / s + 1; Review comment: Yes that is correct. Even if one of the shape is unknown (0) we cannot/should not infer output shape. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] mkolod commented on a change in pull request #11325: Added TensorRT runtime integration
mkolod commented on a change in pull request #11325: Added TensorRT runtime integration URL: https://github.com/apache/incubator-mxnet/pull/11325#discussion_r199006909 ## File path: src/executor/graph_executor.cc ## @@ -940,6 +968,91 @@ void GraphExecutor::FinishInitGraph(nnvm::Symbol symbol, this->InitOpSegs(); } + +Graph GraphExecutor::ReinitGraph(Graph&& g, const Context _ctx, Review comment: Will do. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] aaronmarkham commented on issue #11287: [MXNET-548] fixed path for auto_module_index.js
aaronmarkham commented on issue #11287: [MXNET-548] fixed path for auto_module_index.js URL: https://github.com/apache/incubator-mxnet/pull/11287#issuecomment-401192228 5th restart due to flaky CI... This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services