This is an automated email from the ASF dual-hosted git repository.
skm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new 0a2419f [MXNET-1291] solve pylint errors in examples with issue
no.12205 (#13815)
0a2419f is described below
commit 0a2419ffbc7b94448110bf20e52d83557ccf441f
Author: Neil Chien <[email protected]>
AuthorDate: Mon Feb 11 14:24:46 2019 +0800
[MXNET-1291] solve pylint errors in examples with issue no.12205 (#13815)
* Unify the style here
Unify the style here and remove the testing 'print' code segment.
* Unify the description of comment
Change the description of comment from "multi-layer perceptron" to "Get
multi-layer perceptron"
* Unify the style of comments
Unify the style of comments suggested by @sandeep-krishnamurthy
* git pull the lastest code from master of incubator-mxnet
* Complete rebase
* Solve PEP8 [C0304 ] Final newline missing
Sovle example/deep-embedded-clustering/solver.py(150): [C0304 ] Final
newline missing
---
example/bayesian-methods/algos.py | 24 ++--
example/bayesian-methods/bdk_demo.py | 101 ++++++++-------
example/bayesian-methods/data_loader.py | 5 +-
example/bayesian-methods/utils.py | 38 ++++--
example/caffe/caffe_net.py | 52 +++++---
example/caffe/data.py | 50 +++----
example/caffe/train_model.py | 24 ++--
example/capsnet/capsulelayers.py | 8 +-
example/capsnet/capsulenet.py | 71 ++++++----
.../data_helpers.py | 65 +++++-----
.../cnn_chinese_text_classification/text_cnn.py | 144 ++++++++++++++++-----
example/cnn_text_classification/data_helpers.py | 57 ++++----
example/deep-embedded-clustering/model.py | 5 +-
example/deep-embedded-clustering/solver.py | 5 +-
14 files changed, 396 insertions(+), 253 deletions(-)
diff --git a/example/bayesian-methods/algos.py
b/example/bayesian-methods/algos.py
index f7b3620..29ba3ec 100644
--- a/example/bayesian-methods/algos.py
+++ b/example/bayesian-methods/algos.py
@@ -14,13 +14,13 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-
+"""Create implementation of algorithms of HMC, stepHMC, SGD, SGLD and
DistilledSGLD"""
from __future__ import print_function
+import time
+import numpy
import mxnet as mx
import mxnet.ndarray as nd
-import time
-import logging
-from utils import *
+from utils import copy_param, get_executor, sample_test_regression,
sample_test_acc
def calc_potential(exe, params, label_name, noise_precision, prior_precision):
@@ -35,6 +35,7 @@ def calc_potential(exe, params, label_name, noise_precision,
prior_precision):
def calc_grad(exe, exe_grads, params, X, Y, label_name=None, outgrad_f=None):
+ """Calculate gradient"""
exe.copy_params_from(params)
exe.arg_dict['data'][:] = X
if outgrad_f is None:
@@ -48,8 +49,8 @@ def calc_grad(exe, exe_grads, params, X, Y, label_name=None,
outgrad_f=None):
v.wait_to_read()
-def step_HMC(exe, exe_params, exe_grads, label_key, noise_precision,
prior_precision, L=10,
- eps=1E-6):
+def step_HMC(exe, exe_params, exe_grads, label_key, noise_precision,
prior_precision, L=10, eps=1E-6):
+ """Generate the implementation of step HMC"""
init_params = {k: v.copyto(v.context) for k, v in exe_params.items()}
end_params = {k: v.copyto(v.context) for k, v in exe_params.items()}
init_momentums = {k: mx.random.normal(0, 1, v.shape) for k, v in
init_params.items()}
@@ -102,6 +103,7 @@ def step_HMC(exe, exe_params, exe_grads, label_key,
noise_precision, prior_preci
def HMC(sym, data_inputs, X, Y, X_test, Y_test, sample_num,
initializer=None, noise_precision=1 / 9.0, prior_precision=0.1,
learning_rate=1E-6, L=10, dev=mx.gpu()):
+ """Generate the implementation of HMC"""
label_key = list(set(data_inputs.keys()) - set(['data']))[0]
exe, exe_params, exe_grads, _ = get_executor(sym, dev, data_inputs,
initializer)
exe.arg_dict['data'][:] = X
@@ -134,6 +136,7 @@ def SGD(sym, data_inputs, X, Y, X_test, Y_test,
total_iter_num,
out_grad_f=None,
initializer=None,
minibatch_size=100, dev=mx.gpu()):
+ """Generate the implementation of SGD"""
if out_grad_f is None:
label_key = list(set(data_inputs.keys()) - set(['data']))[0]
exe, params, params_grad, _ = get_executor(sym, dev, data_inputs,
initializer)
@@ -173,6 +176,7 @@ def SGLD(sym, X, Y, X_test, Y_test, total_iter_num,
initializer=None,
minibatch_size=100, thin_interval=100, burn_in_iter_num=1000,
task='classification',
dev=mx.gpu()):
+ """Generate the implementation of SGLD"""
if out_grad_f is None:
label_key = list(set(data_inputs.keys()) - set(['data']))[0]
exe, params, params_grad, _ = get_executor(sym, dev, data_inputs,
initializer)
@@ -200,7 +204,7 @@ def SGLD(sym, X, Y, X_test, Y_test, total_iter_num,
if i < burn_in_iter_num:
continue
else:
- if 0 == (i - burn_in_iter_num) % thin_interval:
+ if (i - burn_in_iter_num) % thin_interval == 0:
if optimizer.lr_scheduler is not None:
lr = optimizer.lr_scheduler(optimizer.num_update)
else:
@@ -238,6 +242,7 @@ def DistilledSGLD(teacher_sym, student_sym,
minibatch_size=100,
task='classification',
dev=mx.gpu()):
+ """Generate the implementation of DistilledSGLD"""
teacher_exe, teacher_params, teacher_params_grad, _ = \
get_executor(teacher_sym, dev, teacher_data_inputs,
teacher_initializer)
student_exe, student_params, student_params_grad, _ = \
@@ -323,13 +328,14 @@ def DistilledSGLD(teacher_sym, student_sym,
sample_test_acc(teacher_exe, X=X, Y=Y, label_num=10,
minibatch_size=minibatch_size)
print("Student: Test ACC %d/%d=%f, Train ACC %d/%d=%f" %
(test_correct, test_total,
- test_acc, train_correct,
train_total, train_acc))
+
test_acc, train_correct,
+
train_total, train_acc))
print("Teacher: Test ACC %d/%d=%f, Train ACC %d/%d=%f" \
% (teacher_test_correct, teacher_test_total,
teacher_test_acc,
teacher_train_correct, teacher_train_total,
teacher_train_acc))
else:
print("Current Iter Num: %d" % (i + 1), "Time Spent: %f" %
(end - start), "MSE:",
- sample_test_regression(exe=student_exe, X=X_test,
Y=Y_test,
+ sample_test_regression(exe=student_exe, X=X_test,
Y=Y_test,
minibatch_size=minibatch_size,
save_path='regression_DSGLD.txt'))
start = time.time()
diff --git a/example/bayesian-methods/bdk_demo.py
b/example/bayesian-methods/bdk_demo.py
index cd39bfd..83a4319 100644
--- a/example/bayesian-methods/bdk_demo.py
+++ b/example/bayesian-methods/bdk_demo.py
@@ -14,21 +14,21 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-
+"""Run Stochastic Gradient Langevin Dynamics (SGLD) and Bayesian Dark
Knowledge (BDK)"""
from __future__ import print_function
-import mxnet as mx
-import mxnet.ndarray as nd
+import argparse
+import time
import numpy
-import logging
import matplotlib.pyplot as plt
-from scipy.stats import gaussian_kde
-import argparse
-from algos import *
-from data_loader import *
-from utils import *
+import mxnet as mx
+import mxnet.ndarray as nd
+from algos import HMC, SGD, SGLD, DistilledSGLD
+from data_loader import load_mnist, load_toy, load_synthetic
+from utils import BiasXavier, SGLDScheduler
class CrossEntropySoftmax(mx.operator.NumpyOp):
+ """Calculate CrossEntropy softmax function"""
def __init__(self):
super(CrossEntropySoftmax, self).__init__(False)
@@ -58,6 +58,7 @@ class CrossEntropySoftmax(mx.operator.NumpyOp):
class LogSoftmax(mx.operator.NumpyOp):
+ """Generate helper functions to evaluate softmax loss function"""
def __init__(self):
super(LogSoftmax, self).__init__(False)
@@ -103,6 +104,7 @@ def regression_student_grad(student_outputs, teacher_pred,
teacher_noise_precisi
def get_mnist_sym(output_op=None, num_hidden=400):
+ """Get symbol of mnist"""
net = mx.symbol.Variable('data')
net = mx.symbol.FullyConnected(data=net, name='mnist_fc1',
num_hidden=num_hidden)
net = mx.symbol.Activation(data=net, name='mnist_relu1', act_type="relu")
@@ -117,6 +119,7 @@ def get_mnist_sym(output_op=None, num_hidden=400):
def synthetic_grad(X, theta, sigma1, sigma2, sigmax, rescale_grad=1.0,
grad=None):
+ """Get synthetic gradient value"""
if grad is None:
grad = nd.empty(theta.shape, theta.context)
theta1 = theta.asnumpy()[0]
@@ -128,17 +131,16 @@ def synthetic_grad(X, theta, sigma1, sigma2, sigmax,
rescale_grad=1.0, grad=None
-(X - theta1 - theta2) ** 2 / (2 * vx))
grad_npy = numpy.zeros(theta.shape)
grad_npy[0] = -rescale_grad * ((numpy.exp(-(X - theta1) ** 2 / (2 * vx)) *
(X - theta1) / vx
- + numpy.exp(-(X - theta1 - theta2) ** 2 /
(2 * vx)) * (
- X - theta1 - theta2) / vx) /
denominator).sum() \
- + theta1 / v1
- grad_npy[1] = -rescale_grad * ((numpy.exp(-(X - theta1 - theta2) ** 2 / (2
* vx)) * (
- X - theta1 - theta2) / vx) / denominator).sum() \
- + theta2 / v2
+ + numpy.exp(-(X - theta1 - theta2) ** 2 /
(2 * vx)) *
+ (X - theta1 - theta2) / vx) /
denominator).sum() + theta1 / v1
+ grad_npy[1] = -rescale_grad * ((numpy.exp(-(X - theta1 - theta2) ** 2 / (2
* vx)) *
+ (X - theta1 - theta2) / vx) /
denominator).sum() + theta2 / v2
grad[:] = grad_npy
return grad
def get_toy_sym(teacher=True, teacher_noise_precision=None):
+ """Get toy symbol"""
if teacher:
net = mx.symbol.Variable('data')
net = mx.symbol.FullyConnected(data=net, name='teacher_fc1',
num_hidden=100)
@@ -160,8 +162,9 @@ def dev(gpu_id=None):
return mx.gpu(gpu_id) if gpu_id else mx.cpu()
-def run_mnist_SGD(training_num=50000, gpu_id=None):
- X, Y, X_test, Y_test = load_mnist(training_num)
+
+def run_mnist_SGD(num_training=50000, gpu_id=None):
+ X, Y, X_test, Y_test = load_mnist(num_training)
minibatch_size = 100
net = get_mnist_sym()
data_shape = (minibatch_size,) + X.shape[1::]
@@ -175,8 +178,8 @@ def run_mnist_SGD(training_num=50000, gpu_id=None):
lr=5E-6, prior_precision=1.0, minibatch_size=100)
-def run_mnist_SGLD(training_num=50000, gpu_id=None):
- X, Y, X_test, Y_test = load_mnist(training_num)
+def run_mnist_SGLD(num_training=50000, gpu_id=None):
+ X, Y, X_test, Y_test = load_mnist(num_training)
minibatch_size = 100
net = get_mnist_sym()
data_shape = (minibatch_size,) + X.shape[1::]
@@ -191,10 +194,11 @@ def run_mnist_SGLD(training_num=50000, gpu_id=None):
thin_interval=100, burn_in_iter_num=1000)
-def run_mnist_DistilledSGLD(training_num=50000, gpu_id=None):
- X, Y, X_test, Y_test = load_mnist(training_num)
+def run_mnist_DistilledSGLD(num_training=50000, gpu_id=None):
+ """Run DistilledSGLD on mnist dataset"""
+ X, Y, X_test, Y_test = load_mnist(num_training)
minibatch_size = 100
- if training_num >= 10000:
+ if num_training >= 10000:
num_hidden = 800
total_iter_num = 1000000
teacher_learning_rate = 1E-6
@@ -235,6 +239,7 @@ def run_mnist_DistilledSGLD(training_num=50000,
gpu_id=None):
def run_toy_SGLD(gpu_id=None):
+ """Run SGLD on toy dataset"""
X, Y, X_test, Y_test = load_toy()
minibatch_size = 1
teacher_noise_precision = 1.0 / 9.0
@@ -243,20 +248,26 @@ def run_toy_SGLD(gpu_id=None):
data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)),
'teacher_output_label': nd.zeros((minibatch_size, 1),
ctx=dev(gpu_id))}
initializer = mx.init.Uniform(0.07)
- exe, params, _ = \
- SGLD(sym=net, data_inputs=data_inputs,
- X=X, Y=Y, X_test=X_test, Y_test=Y_test, total_iter_num=50000,
- initializer=initializer,
- learning_rate=1E-4,
- # lr_scheduler=mx.lr_scheduler.FactorScheduler(100000,
0.5),
- prior_precision=0.1,
- burn_in_iter_num=1000,
- thin_interval=10,
- task='regression',
- minibatch_size=minibatch_size, dev=dev(gpu_id))
-
-
-def run_toy_DistilledSGLD(gpu_id=None):
+ exe, params, _ = SGLD(sym=net,
+ data_inputs=data_inputs,
+ X=X,
+ Y=Y,
+ X_test=X_test,
+ Y_test=Y_test,
+ total_iter_num=50000,
+ initializer=initializer,
+ learning_rate=1E-4,
+ #
lr_scheduler=mx.lr_scheduler.FactorScheduler(100000, 0.5),
+ prior_precision=0.1,
+ burn_in_iter_num=1000,
+ thin_interval=10,
+ task='regression',
+ minibatch_size=minibatch_size,
+ dev=dev(gpu_id)) #
disable=unbalanced-tuple-unpacking
+
+
+def run_toy_DistilledSGLD(gpu_id):
+ """Run DistilledSGLD on toy dataset"""
X, Y, X_test, Y_test = load_toy()
minibatch_size = 1
teacher_noise_precision = 1.0
@@ -288,6 +299,7 @@ def run_toy_DistilledSGLD(gpu_id=None):
def run_toy_HMC(gpu_id=None):
+ """Run HMC on toy dataset"""
X, Y, X_test, Y_test = load_toy()
minibatch_size = Y.shape[0]
noise_precision = 1 / 9.0
@@ -302,6 +314,7 @@ def run_toy_HMC(gpu_id=None):
def run_synthetic_SGLD():
+ """Run synthetic SGLD"""
theta1 = 0
theta2 = 1
sigma1 = numpy.sqrt(10)
@@ -322,14 +335,14 @@ def run_synthetic_SGLD():
grad = nd.empty((2,), mx.cpu())
samples = numpy.zeros((2, total_iter_num))
start = time.time()
- for i in xrange(total_iter_num):
+ for i in range(total_iter_num):
if (i + 1) % 100000 == 0:
end = time.time()
print("Iter:%d, Time spent: %f" % (i + 1, end - start))
start = time.time()
ind = numpy.random.randint(0, X.shape[0])
- synthetic_grad(X[ind], theta, sigma1, sigma2, sigmax, rescale_grad=
- X.shape[0] / float(minibatch_size), grad=grad)
+ synthetic_grad(X[ind], theta, sigma1, sigma2, sigmax,
+ rescale_grad=X.shape[0] / float(minibatch_size),
grad=grad)
updater('theta', grad, theta)
samples[:, i] = theta.asnumpy()
plt.hist2d(samples[0, :], samples[1, :], (200, 200), cmap=plt.cm.jet)
@@ -354,18 +367,18 @@ if __name__ == '__main__':
args = parser.parse_args()
training_num = args.training
if args.dataset == 1:
- if 0 == args.algorithm:
+ if args.algorithm == 0:
run_mnist_SGD(training_num, gpu_id=args.gpu)
- elif 1 == args.algorithm:
+ elif args.algorithm == 1:
run_mnist_SGLD(training_num, gpu_id=args.gpu)
else:
run_mnist_DistilledSGLD(training_num, gpu_id=args.gpu)
elif args.dataset == 0:
- if 1 == args.algorithm:
+ if args.algorithm == 1:
run_toy_SGLD(gpu_id=args.gpu)
- elif 2 == args.algorithm:
+ elif args.algorithm == 2:
run_toy_DistilledSGLD(gpu_id=args.gpu)
- elif 3 == args.algorithm:
+ elif args.algorithm == 3:
run_toy_HMC(gpu_id=args.gpu)
else:
run_synthetic_SGLD()
diff --git a/example/bayesian-methods/data_loader.py
b/example/bayesian-methods/data_loader.py
index 92ca0cf..a0e71bb 100644
--- a/example/bayesian-methods/data_loader.py
+++ b/example/bayesian-methods/data_loader.py
@@ -14,14 +14,15 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-
+"""Create helper functions to load mnist dataset and toy dataset"""
from __future__ import print_function
-import numpy
import os
import ssl
+import numpy
def load_mnist(training_num=50000):
+ """Load mnist dataset"""
data_path = os.path.join(os.path.dirname(os.path.realpath('__file__')),
'mnist.npz')
if not os.path.isfile(data_path):
from six.moves import urllib
diff --git a/example/bayesian-methods/utils.py
b/example/bayesian-methods/utils.py
index a274437..b0ea1f3 100644
--- a/example/bayesian-methods/utils.py
+++ b/example/bayesian-methods/utils.py
@@ -14,11 +14,10 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-
+"""Generate helper functions to Stochastic Gradient Langevin Dynamics (SGLD)
and Bayesian Dark Knowledge (BDK)"""
+import numpy
import mxnet as mx
import mxnet.ndarray as nd
-import numpy
-import logging
class BiasXavier(mx.initializer.Xavier):
@@ -26,7 +25,9 @@ class BiasXavier(mx.initializer.Xavier):
scale = numpy.sqrt(self.magnitude / arr.shape[0])
mx.random.uniform(-scale, scale, out=arr)
+
class SGLDScheduler(mx.lr_scheduler.LRScheduler):
+ """Create SGLDScheduler class"""
def __init__(self, begin_rate, end_rate, total_iter_num, factor):
super(SGLDScheduler, self).__init__()
if factor >= 1.0:
@@ -44,7 +45,9 @@ class SGLDScheduler(mx.lr_scheduler.LRScheduler):
self.count += 1
return self.base_lr
+
def get_executor(sym, ctx, data_inputs, initializer=None):
+ """Get executor to Stochastic Gradient Langevin Dynamics and/or Bayesian
Dark Knowledge"""
data_shapes = {k: v.shape for k, v in data_inputs.items()}
arg_names = sym.list_arguments()
aux_names = sym.list_auxiliary_states()
@@ -62,14 +65,18 @@ def get_executor(sym, ctx, data_inputs, initializer=None):
initializer(k, v)
return exe, params, params_grad, aux_states
+
def copy_param(exe, new_param=None):
+ """Create copy of parameters"""
if new_param is None:
- new_param = {k: nd.empty(v.shape, ctx=mx.cpu()) for k,v in
exe.arg_dict.items()}
+ new_param = {k: nd.empty(v.shape, ctx=mx.cpu()) for k, v in
exe.arg_dict.items()}
for k, v in new_param.items():
exe.arg_dict[k].copyto(v)
return new_param
+
def sample_test_acc(exe, X, Y, sample_pool=None, label_num=None,
minibatch_size=100):
+ """Generate sample test to evaluate accuracy"""
if label_num is None:
pred = numpy.zeros((X.shape[0],)).astype('float32')
else:
@@ -89,12 +96,12 @@ def sample_test_acc(exe, X, Y, sample_pool=None,
label_num=None, minibatch_size=
else:
old_param = copy_param(exe)
for sample in sample_pool:
- if type(sample) is list:
+ if isinstance(sample, list):
denominator += sample[0]
else:
denominator += 1.0
for sample in sample_pool:
- if type(sample) is list:
+ if isinstance(sample, list):
ratio = sample[0]/denominator
param = sample[1]
else:
@@ -118,11 +125,12 @@ def sample_test_acc(exe, X, Y, sample_pool=None,
label_num=None, minibatch_size=
def sample_test_regression(exe, X, Y, sample_pool=None, minibatch_size=100,
save_path="regression.txt"):
+ """Generate a sample test regression"""
old_param = copy_param(exe)
if sample_pool is not None:
pred = numpy.zeros(Y.shape + (len(sample_pool),))
ratio = numpy.zeros((len(sample_pool),))
- if type(sample_pool[0]) is list:
+ if isinstance(sample_pool[0], list):
denominator = sum(sample[0] for sample in sample_pool)
for i, sample in enumerate(sample_pool):
ratio[i] = sample[0]/float(denominator)
@@ -130,7 +138,7 @@ def sample_test_regression(exe, X, Y, sample_pool=None,
minibatch_size=100, save
ratio[:] = 1.0/ Y.shape[0]
iterator = mx.io.NDArrayIter(data=X, label=Y,
batch_size=minibatch_size, shuffle=False)
for i, sample in enumerate(sample_pool):
- if type(sample) is list:
+ if isinstance(sample, list):
sample_param = sample[1]
else:
sample_param = sample
@@ -146,7 +154,7 @@ def sample_test_regression(exe, X, Y, sample_pool=None,
minibatch_size=100, save
curr_instance += batch_len
mean = pred.mean(axis=2)
var = pred.std(axis=2)**2
- #print numpy.concatenate((Y, mean), axis=1)
+ # print numpy.concatenate((Y, mean), axis=1)
mse = numpy.square(Y.reshape((Y.shape[0], )) -
mean.reshape((mean.shape[0], ))).mean()
numpy.savetxt(save_path, numpy.concatenate((mean, var), axis=1))
else:
@@ -157,15 +165,19 @@ def sample_test_regression(exe, X, Y, sample_pool=None,
minibatch_size=100, save
for batch in iterator:
exe.arg_dict['data'][:] = batch.data[0]
exe.forward(is_train=False)
- mean_var[curr_instance:curr_instance + minibatch_size - batch.pad,
0] = exe.outputs[0].asnumpy()[:minibatch_size - batch.pad].flatten()
- mean_var[curr_instance:curr_instance + minibatch_size - batch.pad,
1] = numpy.exp(exe.outputs[1].asnumpy())[:minibatch_size - batch.pad].flatten()
+ mean_var[curr_instance:curr_instance + minibatch_size - batch.pad,
0] =\
+ exe.outputs[0].asnumpy()[:minibatch_size - batch.pad].flatten()
+ mean_var[curr_instance:curr_instance + minibatch_size - batch.pad,
1] = \
+ numpy.exp(exe.outputs[1].asnumpy())[:minibatch_size -
batch.pad].flatten()
curr_instance += minibatch_size - batch.pad
mse = numpy.square(Y.reshape((Y.shape[0],)) - mean_var[:, 0]).mean()
numpy.savetxt(save_path, mean_var)
exe.copy_params_from(old_param)
return mse
+
def pred_test(testing_data, exe, param_list=None, save_path=""):
+ """Generate prediction on testset"""
ret = numpy.zeros((testing_data.shape[0], 2))
if param_list is None:
for i in range(testing_data.shape[0]):
@@ -177,8 +189,8 @@ def pred_test(testing_data, exe, param_list=None,
save_path=""):
else:
for i in range(testing_data.shape[0]):
pred = numpy.zeros((len(param_list),))
- for j in range(len(param_list)):
- exe.copy_params_from(param_list[j])
+ for (j, param) in enumerate(param_list):
+ exe.copy_params_from(param)
exe.arg_dict['data'][:] = testing_data[i, 0]
exe.forward(is_train=False)
pred[j] = exe.outputs[0].asnumpy()
diff --git a/example/caffe/caffe_net.py b/example/caffe/caffe_net.py
index 0459c90..6796fca 100644
--- a/example/caffe/caffe_net.py
+++ b/example/caffe/caffe_net.py
@@ -14,64 +14,80 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-
+"""Generate helper functions to load Caffe into MXNet"""
+import argparse
import mxnet as mx
from data import get_iterator
-import argparse
import train_model
+
def get_mlp():
- """
- multi-layer perceptron
- """
+ """Get multi-layer perceptron"""
data = mx.symbol.Variable('data')
- fc1 = mx.symbol.CaffeOp(data_0=data, num_weight=2, name='fc1',
prototxt="layer{type:\"InnerProduct\" inner_product_param{num_output: 128} }")
+ fc1 = mx.symbol.CaffeOp(data_0=data, num_weight=2, name='fc1',
+ prototxt="layer{type:\"InnerProduct\"
inner_product_param{num_output: 128} }")
act1 = mx.symbol.CaffeOp(data_0=fc1, prototxt="layer{type:\"TanH\"}")
- fc2 = mx.symbol.CaffeOp(data_0=act1, num_weight=2, name='fc2',
prototxt="layer{type:\"InnerProduct\" inner_product_param{num_output: 64} }")
+ fc2 = mx.symbol.CaffeOp(data_0=act1, num_weight=2, name='fc2',
+ prototxt="layer{type:\"InnerProduct\"
inner_product_param{num_output: 64} }")
act2 = mx.symbol.CaffeOp(data_0=fc2, prototxt="layer{type:\"TanH\"}")
- fc3 = mx.symbol.CaffeOp(data_0=act2, num_weight=2, name='fc3',
prototxt="layer{type:\"InnerProduct\" inner_product_param{num_output: 10}}")
+ fc3 = mx.symbol.CaffeOp(data_0=act2, num_weight=2, name='fc3',
+ prototxt="layer{type:\"InnerProduct\"
inner_product_param{num_output: 10}}")
if use_caffe_loss:
label = mx.symbol.Variable('softmax_label')
- mlp = mx.symbol.CaffeLoss(data=fc3, label=label, grad_scale=1,
name='softmax', prototxt="layer{type:\"SoftmaxWithLoss\"}")
+ mlp = mx.symbol.CaffeLoss(data=fc3, label=label, grad_scale=1,
name='softmax',
+ prototxt="layer{type:\"SoftmaxWithLoss\"}")
else:
mlp = mx.symbol.SoftmaxOutput(data=fc3, name='softmax')
return mlp
+
def get_lenet():
- """
- LeCun, Yann, Leon Bottou, Yoshua Bengio, and Patrick
+ """LeCun, Yann, Leon Bottou, Yoshua Bengio, and Patrick
Haffner. "Gradient-based learning applied to document recognition."
Proceedings of the IEEE (1998)
"""
data = mx.symbol.Variable('data')
# first conv
- conv1 = mx.symbol.CaffeOp(data_0=data, num_weight=2,
prototxt="layer{type:\"Convolution\" convolution_param { num_output: 20
kernel_size: 5 stride: 1} }")
+ conv1 = mx.symbol.CaffeOp(data_0=data, num_weight=2,
+ prototxt="layer{type:\"Convolution\" "
+ "convolution_param { num_output: 20
kernel_size: 5 stride: 1} }")
act1 = mx.symbol.CaffeOp(data_0=conv1, prototxt="layer{type:\"TanH\"}")
- pool1 = mx.symbol.CaffeOp(data_0=act1, prototxt="layer{type:\"Pooling\"
pooling_param { pool: MAX kernel_size: 2 stride: 2}}")
+ pool1 = mx.symbol.CaffeOp(data_0=act1,
+ prototxt="layer{type:\"Pooling\" pooling_param {
pool: MAX kernel_size: 2 stride: 2}}")
# second conv
- conv2 = mx.symbol.CaffeOp(data_0=pool1, num_weight=2,
prototxt="layer{type:\"Convolution\" convolution_param { num_output: 50
kernel_size: 5 stride: 1} }")
+ conv2 = mx.symbol.CaffeOp(data_0=pool1, num_weight=2,
+ prototxt="layer{type:\"Convolution\" "
+ "convolution_param { num_output: 50
kernel_size: 5 stride: 1} }")
act2 = mx.symbol.CaffeOp(data_0=conv2, prototxt="layer{type:\"TanH\"}")
- pool2 = mx.symbol.CaffeOp(data_0=act2, prototxt="layer{type:\"Pooling\"
pooling_param { pool: MAX kernel_size: 2 stride: 2}}")
+ pool2 = mx.symbol.CaffeOp(data_0=act2,
+ prototxt="layer{type:\"Pooling\" pooling_param {
pool: MAX kernel_size: 2 stride: 2}}")
- fc1 = mx.symbol.CaffeOp(data_0=pool2, num_weight=2,
prototxt="layer{type:\"InnerProduct\" inner_product_param{num_output: 500} }")
+ fc1 = mx.symbol.CaffeOp(data_0=pool2, num_weight=2,
+ prototxt="layer{type:\"InnerProduct\"
inner_product_param{num_output: 500} }")
act3 = mx.symbol.CaffeOp(data_0=fc1, prototxt="layer{type:\"TanH\"}")
# second fullc
- fc2 = mx.symbol.CaffeOp(data_0=act3, num_weight=2,
prototxt="layer{type:\"InnerProduct\"inner_product_param{num_output: 10} }")
+ fc2 = mx.symbol.CaffeOp(data_0=act3, num_weight=2,
+
prototxt="layer{type:\"InnerProduct\"inner_product_param{num_output: 10} }")
if use_caffe_loss:
label = mx.symbol.Variable('softmax_label')
- lenet = mx.symbol.CaffeLoss(data=fc2, label=label, grad_scale=1,
name='softmax', prototxt="layer{type:\"SoftmaxWithLoss\"}")
+ lenet = mx.symbol.CaffeLoss(data=fc2, label=label, grad_scale=1,
name='softmax',
+ prototxt="layer{type:\"SoftmaxWithLoss\"}")
else:
lenet = mx.symbol.SoftmaxOutput(data=fc2, name='softmax')
return lenet
+
def get_network_from_json_file(file_name):
network = mx.sym.load(file_name)
return network
+
def parse_args():
+ """Parse the arguments
+ """
parser = argparse.ArgumentParser(description='train an image classifier on
mnist')
parser.add_argument('--network', type=str, default='lenet',
help='the cnn to use (mlp | lenet | <path to network
json file>')
diff --git a/example/caffe/data.py b/example/caffe/data.py
index 15276c4..f6bbc0f 100644
--- a/example/caffe/data.py
+++ b/example/caffe/data.py
@@ -14,42 +14,44 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-
+"""Create the helper functions to mnist dataset for Caffe operators in MXNet"""
import mxnet as mx
from mxnet.test_utils import get_mnist_ubyte
+
def get_iterator(data_shape, use_caffe_data):
+ """Generate the iterator of mnist dataset"""
def get_iterator_impl_mnist(args, kv):
"""return train and val iterators for mnist"""
# download data
get_mnist_ubyte()
flat = False if len(data_shape) != 1 else True
- train = mx.io.MNISTIter(
- image = "data/train-images-idx3-ubyte",
- label = "data/train-labels-idx1-ubyte",
- input_shape = data_shape,
- batch_size = args.batch_size,
- shuffle = True,
- flat = flat,
- num_parts = kv.num_workers,
- part_index = kv.rank)
+ train = mx.io.MNISTIter(
+ image="data/train-images-idx3-ubyte",
+ label="data/train-labels-idx1-ubyte",
+ input_shape=data_shape,
+ batch_size=args.batch_size,
+ shuffle=True,
+ flat=flat,
+ num_parts=kv.num_workers,
+ part_index=kv.rank)
val = mx.io.MNISTIter(
- image = "data/t10k-images-idx3-ubyte",
- label = "data/t10k-labels-idx1-ubyte",
- input_shape = data_shape,
- batch_size = args.batch_size,
- flat = flat,
- num_parts = kv.num_workers,
- part_index = kv.rank)
+ image="data/t10k-images-idx3-ubyte",
+ label="data/t10k-labels-idx1-ubyte",
+ input_shape=data_shape,
+ batch_size=args.batch_size,
+ flat=flat,
+ num_parts=kv.num_workers,
+ part_index=kv.rank)
return (train, val)
def get_iterator_impl_caffe(args, kv):
flat = False if len(data_shape) != 1 else True
train = mx.io.CaffeDataIter(
- prototxt =
+ prototxt=
'layer { \
name: "mnist" \
type: "Data" \
@@ -67,13 +69,13 @@ def get_iterator(data_shape, use_caffe_data):
backend: LMDB \
} \
}',
- flat = flat,
- num_examples = 60000
+ flat=flat,
+ num_examples=60000
# float32 is the default, so left out here in order to illustrate
)
val = mx.io.CaffeDataIter(
- prototxt =
+ prototxt=
'layer { \
name: "mnist" \
type: "Data" \
@@ -91,9 +93,9 @@ def get_iterator(data_shape, use_caffe_data):
backend: LMDB \
} \
}',
- flat = flat,
- num_examples = 10000,
- dtype = "float32" # float32 is the default
+ flat=flat,
+ num_examples=10000,
+ dtype="float32" # float32 is the default
)
return train, val
diff --git a/example/caffe/train_model.py b/example/caffe/train_model.py
index 4290e71..16b1867 100644
--- a/example/caffe/train_model.py
+++ b/example/caffe/train_model.py
@@ -14,12 +14,14 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-
-import mxnet as mx
-import logging
+"""Train module with using Caffe operator in MXNet"""
import os
+import logging
+import mxnet as mx
+
def fit(args, network, data_loader, eval_metrics=None,
batch_end_callback=None):
+ """Train the model with using Caffe operator in MXNet"""
# kvstore
kv = mx.kvstore.create(args.kv_store)
@@ -74,8 +76,8 @@ def fit(args, network, data_loader, eval_metrics=None,
batch_end_callback=None):
if 'lr_factor' in args and args.lr_factor < 1:
model_args['lr_scheduler'] = mx.lr_scheduler.FactorScheduler(
- step = max(int(epoch_size * args.lr_factor_epoch), 1),
- factor = args.lr_factor)
+ step=max(int(epoch_size * args.lr_factor_epoch), 1),
+ factor=args.lr_factor)
if 'clip_gradient' in args and args.clip_gradient is not None:
model_args['clip_gradient'] = args.clip_gradient
@@ -85,12 +87,11 @@ def fit(args, network, data_loader, eval_metrics=None,
batch_end_callback=None):
args.gpus is None or len(args.gpus.split(',')) is 1):
kv = None
-
mod = mx.mod.Module(network, context=devs)
if eval_metrics is None:
eval_metrics = ['accuracy']
- ## TopKAccuracy only allows top_k > 1
+ # TopKAccuracy only allows top_k > 1
for top_k in [5, 10, 20]:
eval_metrics.append(mx.metric.create('top_k_accuracy',
top_k=top_k))
@@ -102,8 +103,7 @@ def fit(args, network, data_loader, eval_metrics=None,
batch_end_callback=None):
batch_end_callback.append(mx.callback.Speedometer(args.batch_size, 50))
mod.fit(train_data=train, eval_metric=eval_metrics, eval_data=val,
optimizer='sgd',
- optimizer_params={'learning_rate':args.lr, 'momentum': 0.9, 'wd':
0.00001},
- num_epoch=args.num_epochs, batch_end_callback=batch_end_callback,
- initializer=mx.init.Xavier(factor_type="in", magnitude=2.34),
- kvstore=kv, epoch_end_callback=checkpoint, **model_args)
-
+ optimizer_params={'learning_rate':args.lr, 'momentum': 0.9, 'wd':
0.00001},
+ num_epoch=args.num_epochs, batch_end_callback=batch_end_callback,
+ initializer=mx.init.Xavier(factor_type="in", magnitude=2.34),
+ kvstore=kv, epoch_end_callback=checkpoint, **model_args)
diff --git a/example/capsnet/capsulelayers.py b/example/capsnet/capsulelayers.py
index 5ac4fad..077a400 100644
--- a/example/capsnet/capsulelayers.py
+++ b/example/capsnet/capsulelayers.py
@@ -14,7 +14,7 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-
+"""Create layers of capsule net"""
import mxnet as mx
@@ -41,8 +41,7 @@ def primary_caps(data, dim_vector, n_channels, kernel,
strides, name=''):
class CapsuleLayer:
- """
- The capsule layer with dynamic routing.
+ """The capsule layer with dynamic routing.
[batch_size, input_num_capsule, input_dim_vector] => [batch_size,
num_capsule, dim_vector]
"""
@@ -98,7 +97,8 @@ class CapsuleLayer:
mx.sym.sum(mx.sym.broadcast_mul(c, inputs_hat_stopped,
name='broadcast_mul_' + str(i)),
axis=1, keepdims=True,
name='sum_' + str(i)), name='output_' + str(i),
squash_axis=4)
- bias_ = bias_ + mx.sym.sum(mx.sym.broadcast_mul(c,
inputs_hat_stopped, name='bias_broadcast_mul' + str(i)),
+ bias_ = bias_ + mx.sym.sum(mx.sym.broadcast_mul(c,
inputs_hat_stopped,
+
name='bias_broadcast_mul' + str(i)),
axis=4,
keepdims=True, name='bias_' +
str(i))
diff --git a/example/capsnet/capsulenet.py b/example/capsnet/capsulenet.py
index 6710875..05df9cd 100644
--- a/example/capsnet/capsulenet.py
+++ b/example/capsnet/capsulenet.py
@@ -14,24 +14,27 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-import mxnet as mx
-import numpy as np
+"""Generate MXNet implementation of CapsNet"""
import os
import re
import gzip
import struct
+import numpy as np
import scipy.ndimage as ndi
+import mxnet as mx
from capsulelayers import primary_caps, CapsuleLayer
from mxboard import SummaryWriter
+
def margin_loss(y_true, y_pred):
loss = y_true * mx.sym.square(mx.sym.maximum(0., 0.9 - y_pred)) +\
0.5 * (1 - y_true) * mx.sym.square(mx.sym.maximum(0., y_pred - 0.1))
return mx.sym.mean(data=mx.sym.sum(loss, 1))
-def capsnet(batch_size, n_class, num_routing,recon_loss_weight):
+def capsnet(batch_size, n_class, num_routing, recon_loss_weight):
+ """Create CapsNet"""
# data.shape = [batch_size, 1, 28, 28]
data = mx.sym.Variable('data')
@@ -107,7 +110,8 @@ def read_data(label_url, image_url):
label = np.fromstring(flbl.read(), dtype=np.int8)
with gzip.open(download_data(image_url), 'rb') as fimg:
magic, num, rows, cols = struct.unpack(">IIII", fimg.read(16))
- image = np.fromstring(fimg.read(), dtype=np.uint8).reshape(len(label),
rows, cols)
+ image = np.fromstring(fimg.read(), dtype=np.uint8)
+ np.reshape(image, len(label), (rows, cols))
return label, image
@@ -116,10 +120,11 @@ def to4d(img):
class LossMetric(mx.metric.EvalMetric):
- def __init__(self, batch_size, num_gpu):
+ """Evaluate the loss function"""
+ def __init__(self, batch_size, num_gpus):
super(LossMetric, self).__init__('LossMetric')
self.batch_size = batch_size
- self.num_gpu = num_gpu
+ self.num_gpu = num_gpus
self.sum_metric = 0
self.num_inst = 0
self.loss = 0.0
@@ -130,6 +135,7 @@ class LossMetric(mx.metric.EvalMetric):
self.n_batch = 0
def update(self, labels, preds):
+ """Update the hyper-parameters and loss of CapsNet"""
batch_sum_metric = 0
batch_num_inst = 0
for label, pred_outcaps in zip(labels[0], preds[0]):
@@ -146,7 +152,7 @@ class LossMetric(mx.metric.EvalMetric):
self.batch_sum_metric = batch_sum_metric
self.batch_num_inst = batch_num_inst
self.batch_loss = batch_loss
- self.n_batch += 1
+ self.n_batch += 1
def get_name_value(self):
acc = float(self.sum_metric)/float(self.num_inst)
@@ -184,6 +190,7 @@ class SimpleLRScheduler(mx.lr_scheduler.LRScheduler):
def do_training(num_epoch, optimizer, kvstore, learning_rate, model_prefix,
decay):
+ """Run training to CapsNet"""
summary_writer = SummaryWriter(args.tblog_dir)
lr_scheduler = SimpleLRScheduler(learning_rate)
optimizer_params = {'lr_scheduler': lr_scheduler}
@@ -218,7 +225,8 @@ def do_training(num_epoch, optimizer, kvstore,
learning_rate, model_prefix, deca
summary_writer.add_scalar('val_loss', val_loss, n_epoch)
summary_writer.add_scalar('val_recon_err', val_recon_err, n_epoch)
- print('Epoch[%d] train acc: %.4f loss: %.6f recon_err: %.6f' %
(n_epoch, train_acc, train_loss, train_recon_err))
+ print('Epoch[%d] train acc: %.4f loss: %.6f recon_err: %.6f' %
(n_epoch, train_acc, train_loss,
+
train_recon_err))
print('Epoch[%d] val acc: %.4f loss: %.6f recon_err: %.6f' % (n_epoch,
val_acc, val_loss, val_recon_err))
print('SAVE CHECKPOINT')
@@ -227,10 +235,8 @@ def do_training(num_epoch, optimizer, kvstore,
learning_rate, model_prefix, deca
lr_scheduler.learning_rate = learning_rate * (decay ** n_epoch)
-def apply_transform(x,
- transform_matrix,
- fill_mode='nearest',
- cval=0.):
+def apply_transform(x, transform_matrix, fill_mode='nearest', cval=0.):
+ """Apply transform on nd.array"""
x = np.rollaxis(x, 0, 0)
final_affine_matrix = transform_matrix[:2, :2]
final_offset = transform_matrix[:2, 2]
@@ -255,30 +261,45 @@ def random_shift(x, width_shift_fraction,
height_shift_fraction):
x = apply_transform(x, shift_matrix, 'nearest')
return x
+
def _shuffle(data, idx):
"""Shuffle the data."""
shuffle_data = []
- for k, v in data:
- shuffle_data.append((k, mx.ndarray.array(v.asnumpy()[idx], v.context)))
+ for idx_k, idx_v in data:
+ shuffle_data.append((idx_k, mx.ndarray.array(idx_v.asnumpy()[idx],
idx_v.context)))
return shuffle_data
+
class MNISTCustomIter(mx.io.NDArrayIter):
-
+ """Create custom iterator of mnist dataset"""
+ def __init__(self, data, label, batch_size, shuffle):
+ self.data = data
+ self.label = label
+ self.batch_size = batch_size
+ self.shuffle = shuffle
+ self.cursor = None
+
def reset(self):
+ """Reset class MNISTCustomIter(mx.io.NDArrayIter):"""
# shuffle data
if self.is_train:
np.random.shuffle(self.idx)
self.data = _shuffle(self.data, self.idx)
self.label = _shuffle(self.label, self.idx)
+
if self.last_batch_handle == 'roll_over' and self.cursor >
self.num_data:
- self.cursor = -self.batch_size +
(self.cursor%self.num_data)%self.batch_size
+ self.cursor = -self.batch_size + (self.cursor % self.num_data) %
self.batch_size
else:
self.cursor = -self.batch_size
+
def set_is_train(self, is_train):
+ """Set training flag"""
self.is_train = is_train
+
def next(self):
+ """Generate next of iterator"""
if self.iter_next():
if self.is_train:
data_raw_list = self.getdata()
@@ -288,8 +309,7 @@ class MNISTCustomIter(mx.io.NDArrayIter):
return mx.io.DataBatch(data=[mx.nd.array(data_shifted)],
label=self.getlabel(),
pad=self.getpad(), index=None)
else:
- return mx.io.DataBatch(data=self.getdata(),
label=self.getlabel(), \
- pad=self.getpad(), index=None)
+ return mx.io.DataBatch(data=self.getdata(),
label=self.getlabel(), pad=self.getpad(), index=None)
else:
raise StopIteration
@@ -298,10 +318,9 @@ class MNISTCustomIter(mx.io.NDArrayIter):
if __name__ == "__main__":
# Read mnist data set
path = 'http://yann.lecun.com/exdb/mnist/'
- (train_lbl, train_img) = read_data(
- path + 'train-labels-idx1-ubyte.gz', path +
'train-images-idx3-ubyte.gz')
- (val_lbl, val_img) = read_data(
- path + 't10k-labels-idx1-ubyte.gz', path + 't10k-images-idx3-ubyte.gz')
+ (train_lbl, train_img) = read_data(path + 'train-labels-idx1-ubyte.gz',
path + 'train-images-idx3-ubyte.gz')
+ (val_lbl, val_img) = read_data(path + 't10k-labels-idx1-ubyte.gz', path +
't10k-images-idx3-ubyte.gz')
+
# set batch size
import argparse
parser = argparse.ArgumentParser()
@@ -331,10 +350,13 @@ if __name__ == "__main__":
# generate train_iter, val_iter
train_iter = MNISTCustomIter(data=to4d(train_img), label=train_lbl,
batch_size=int(args.batch_size), shuffle=True)
train_iter.set_is_train(True)
- val_iter = MNISTCustomIter(data=to4d(val_img), label=val_lbl,
batch_size=int(args.batch_size),)
+ val_iter = MNISTCustomIter(data=to4d(val_img), label=val_lbl,
batch_size=int(args.batch_size), shuffle=True)
val_iter.set_is_train(False)
# define capsnet
- final_net = capsnet(batch_size=int(args.batch_size/num_gpu), n_class=10,
num_routing=args.num_routing, recon_loss_weight=args.recon_loss_weight)
+ final_net = capsnet(batch_size=int(args.batch_size/num_gpu),
+ n_class=10,
+ num_routing=args.num_routing,
+ recon_loss_weight=args.recon_loss_weight)
# set metric
loss_metric = LossMetric(args.batch_size/num_gpu, 1)
@@ -343,5 +365,6 @@ if __name__ == "__main__":
module.bind(data_shapes=train_iter.provide_data,
label_shapes=val_iter.provide_label,
for_training=True)
+
do_training(num_epoch=args.num_epoch, optimizer='adam', kvstore='device',
learning_rate=args.lr,
model_prefix=args.model_prefix, decay=args.decay)
diff --git a/example/cnn_chinese_text_classification/data_helpers.py
b/example/cnn_chinese_text_classification/data_helpers.py
index b3a13de..49bb3d5 100644
--- a/example/cnn_chinese_text_classification/data_helpers.py
+++ b/example/cnn_chinese_text_classification/data_helpers.py
@@ -14,6 +14,9 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
+
+"""Help functions to support for implementing CNN + Highway Network for
Chinese Text Classification in MXNet"""
+
import codecs
import itertools
import os
@@ -27,8 +30,7 @@ import word2vec
def clean_str(string):
- """
- Tokenization/string cleaning for all datasets except for SST.
+ """Tokenization/string cleaning for all datasets except for SST.
Original taken from
https://github.com/yoonkim/CNN_sentence/blob/master/process_data.py
"""
string = re.sub(r"[^A-Za-z0-9(),!?\'\`]", " ", string)
@@ -40,27 +42,28 @@ def clean_str(string):
string = re.sub(r"\'ll", " \'ll", string)
string = re.sub(r",", " , ", string)
string = re.sub(r"!", " ! ", string)
- string = re.sub(r"\(", " \( ", string)
- string = re.sub(r"\)", " \) ", string)
- string = re.sub(r"\?", " \? ", string)
+ string = re.sub(r"\(", r" \( ", string)
+ string = re.sub(r"\)", r" \) ", string)
+ string = re.sub(r"\?", r" \? ", string)
string = re.sub(r"\s{2,}", " ", string)
return string.strip().lower()
def get_chinese_text():
+ """Download the chinese_text dataset and unzip it"""
if not os.path.isdir("data/"):
os.system("mkdir data/")
if (not os.path.exists('data/pos.txt')) or \
(not os.path.exists('data/neg')):
- os.system("wget -q
https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/example/chinese_text.zip
-P data/")
+ os.system("wget -q
https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/example/chinese_text.zip
"
+ "-P data/")
os.chdir("./data")
os.system("unzip -u chinese_text.zip")
os.chdir("..")
def load_data_and_labels():
- """
- Loads MR polarity data from files, splits the data into words and
generates labels.
+ """Loads MR polarity data from files, splits the data into words and
generates labels.
Returns split sentences and labels.
"""
# download dataset
@@ -86,14 +89,14 @@ def load_data_and_labels():
def pad_sentences(sentences, padding_word="</s>"):
- """
- Pads all sentences to the same length. The length is defined by the
longest sentence.
+ """Pads all sentences to the same length. The length is defined by the
longest sentence.
Returns padded sentences.
"""
sequence_length = max(len(x) for x in sentences)
padded_sentences = []
- for i in range(len(sentences)):
- sentence = sentences[i]
+ for i, element in enumerate(sentences):
+ print(i, element)
+ sentence = element
num_padding = sequence_length - len(sentence)
new_sentence = sentence + [padding_word] * num_padding
padded_sentences.append(new_sentence)
@@ -101,8 +104,7 @@ def pad_sentences(sentences, padding_word="</s>"):
def build_vocab(sentences):
- """
- Builds a vocabulary mapping from word to index based on the sentences.
+ """Builds a vocabulary mapping from word to index based on the sentences.
Returns vocabulary mapping and inverse vocabulary mapping.
"""
# Build vocabulary
@@ -115,45 +117,41 @@ def build_vocab(sentences):
def build_input_data(sentences, labels, vocabulary):
- """
- Maps sentencs and labels to vectors based on a vocabulary.
- """
+ """Maps sentences and labels to vectors based on a vocabulary."""
x = np.array([[vocabulary[word] for word in sentence] for sentence in
sentences])
y = np.array(labels)
return [x, y]
-def build_input_data_with_word2vec(sentences, labels, word2vec):
- """Map sentences and labels to vectors based on a pretrained word2vec"""
+def build_input_data_with_word2vec(sentences, labels, word2vec_list):
+ """Map sentences and labels to vectors based on a pre-trained word2vec"""
x_vec = []
for sent in sentences:
vec = []
for word in sent:
- if word in word2vec:
- vec.append(word2vec[word])
+ if word in word2vec_list:
+ vec.append(word2vec_list[word])
else:
- vec.append(word2vec['</s>'])
+ vec.append(word2vec_list['</s>'])
x_vec.append(vec)
x_vec = np.array(x_vec)
y_vec = np.array(labels)
return [x_vec, y_vec]
-def load_data_with_word2vec(word2vec):
- """
- Loads and preprocessed data for the MR dataset.
+def load_data_with_word2vec(word2vec_list):
+ """Loads and preprocessed data for the MR dataset.
Returns input vectors, labels, vocabulary, and inverse vocabulary.
"""
# Load and preprocess data
sentences, labels = load_data_and_labels()
sentences_padded = pad_sentences(sentences)
# vocabulary, vocabulary_inv = build_vocab(sentences_padded)
- return build_input_data_with_word2vec(sentences_padded, labels, word2vec)
+ return build_input_data_with_word2vec(sentences_padded, labels,
word2vec_list)
def load_data():
- """
- Loads and preprocessed data for the MR dataset.
+ """Loads and preprocessed data for the MR dataset.
Returns input vectors, labels, vocabulary, and inverse vocabulary.
"""
# Load and preprocess data
@@ -165,9 +163,7 @@ def load_data():
def batch_iter(data, batch_size, num_epochs):
- """
- Generates a batch iterator for a dataset.
- """
+ """Generates a batch iterator for a dataset."""
data = np.array(data)
data_size = len(data)
num_batches_per_epoch = int(len(data) / batch_size) + 1
@@ -182,18 +178,19 @@ def batch_iter(data, batch_size, num_epochs):
def load_pretrained_word2vec(infile):
+ """Load the pre-trained word2vec from file."""
if isinstance(infile, str):
infile = open(infile)
- word2vec = {}
+ word2vec_list = {}
for idx, line in enumerate(infile):
if idx == 0:
vocab_size, dim = line.strip().split()
else:
tks = line.strip().split()
- word2vec[tks[0]] = map(float, tks[1:])
+ word2vec_list[tks[0]] = map(float, tks[1:])
- return word2vec
+ return word2vec_list
def load_google_word2vec(path):
diff --git a/example/cnn_chinese_text_classification/text_cnn.py
b/example/cnn_chinese_text_classification/text_cnn.py
index 4598a52..ce70681 100644
--- a/example/cnn_chinese_text_classification/text_cnn.py
+++ b/example/cnn_chinese_text_classification/text_cnn.py
@@ -20,12 +20,14 @@
# -*- coding: utf-8 -*-
-import sys, os
-import mxnet as mx
-import numpy as np
-import argparse
+"""Implementing CNN + Highway Network for Chinese Text Classification in
MXNet"""
+
+import os
+import sys
import logging
-import time
+import argparse
+import numpy as np
+import mxnet as mx
from mxnet import random
from mxnet.initializer import Xavier, Initializer
@@ -63,12 +65,28 @@ parser.add_argument('--save-period', type=int, default=10,
def save_model():
+ """Save cnn model
+
+ Returns
+ ----------
+ callback: A callback function that can be passed as epoch_end_callback to
fit
+ """
if not os.path.exists("checkpoint"):
os.mkdir("checkpoint")
return mx.callback.do_checkpoint("checkpoint/checkpoint", args.save_period)
def highway(data):
+ """Construct highway net
+
+ Parameters
+ ----------
+ data:
+
+ Returns
+ ----------
+ Highway Networks
+ """
_data = data
high_weight = mx.sym.Variable('high_weight')
high_bias = mx.sym.Variable('high_bias')
@@ -85,20 +103,41 @@ def highway(data):
def data_iter(batch_size, num_embed, pre_trained_word2vec=False):
+ """Construct data iter
+
+ Parameters
+ ----------
+ batch_size: int
+ num_embed: int
+ pre_trained_word2vec: boolean
+ identify the pre-trained layers or not
+ Returns
+ ----------
+ train_set: DataIter
+ Train DataIter
+ valid: DataIter
+ Valid DataIter
+ sentences_size: int
+ array dimensions
+ embedded_size: int
+ array dimensions
+ vocab_size: int
+ array dimensions
+ """
logger.info('Loading data...')
if pre_trained_word2vec:
word2vec = data_helpers.load_pretrained_word2vec('data/rt.vec')
x, y = data_helpers.load_data_with_word2vec(word2vec)
- # reshpae for convolution input
+ # reshape for convolution input
x = np.reshape(x, (x.shape[0], 1, x.shape[1], x.shape[2]))
- embed_size = x.shape[-1]
- sentence_size = x.shape[2]
- vocab_size = -1
+ embedded_size = x.shape[-1]
+ sentences_size = x.shape[2]
+ vocabulary_size = -1
else:
x, y, vocab, vocab_inv = data_helpers.load_data()
- embed_size = num_embed
- sentence_size = x.shape[1]
- vocab_size = len(vocab)
+ embedded_size = num_embed
+ sentences_size = x.shape[1]
+ vocabulary_size = len(vocab)
# randomly shuffle data
np.random.seed(10)
@@ -109,30 +148,55 @@ def data_iter(batch_size, num_embed,
pre_trained_word2vec=False):
# split train/valid set
x_train, x_dev = x_shuffled[:-1000], x_shuffled[-1000:]
y_train, y_dev = y_shuffled[:-1000], y_shuffled[-1000:]
- logger.info('Train/Valid split: %d/%d' % (len(y_train), len(y_dev)))
+ logger.info('Train/Valid split: %d/%d', len(y_train), len(y_dev))
logger.info('train shape: %(shape)s', {'shape': x_train.shape})
logger.info('valid shape: %(shape)s', {'shape': x_dev.shape})
- logger.info('sentence max words: %(shape)s', {'shape': sentence_size})
- logger.info('embedding size: %(msg)s', {'msg': embed_size})
- logger.info('vocab size: %(msg)s', {'msg': vocab_size})
+ logger.info('sentence max words: %(shape)s', {'shape': sentences_size})
+ logger.info('embedding size: %(msg)s', {'msg': embedded_size})
+ logger.info('vocab size: %(msg)s', {'msg': vocabulary_size})
- train = mx.io.NDArrayIter(
+ train_set = mx.io.NDArrayIter(
x_train, y_train, batch_size, shuffle=True)
valid = mx.io.NDArrayIter(
x_dev, y_dev, batch_size)
- return (train, valid, sentence_size, embed_size, vocab_size)
+ return train_set, valid, sentences_size, embedded_size, vocabulary_size
-def sym_gen(batch_size, sentence_size, num_embed, vocab_size,
- num_label=2, filter_list=[3, 4, 5], num_filter=100,
+def sym_gen(batch_size, sentences_size, num_embed, vocabulary_size,
+ num_label=2, filter_list=None, num_filter=100,
dropout=0.0, pre_trained_word2vec=False):
+ """Generate network symbol
+
+ Parameters
+ ----------
+ batch_size: int
+ sentences_size: int
+ num_embed: int
+ vocabulary_size: int
+ num_label: int
+ filter_list: list
+ num_filter: int
+ dropout: int
+ pre_trained_word2vec: boolean
+ identify the pre-trained layers or not
+ Returns
+ ----------
+ sm: symbol
+ data: list of str
+ data names
+ softmax_label: list of str
+ label names
+ """
input_x = mx.sym.Variable('data')
input_y = mx.sym.Variable('softmax_label')
# embedding layer
if not pre_trained_word2vec:
- embed_layer = mx.sym.Embedding(data=input_x, input_dim=vocab_size,
output_dim=num_embed, name='vocab_embed')
- conv_input = mx.sym.Reshape(data=embed_layer,
target_shape=(batch_size, 1, sentence_size, num_embed))
+ embed_layer = mx.sym.Embedding(data=input_x,
+ input_dim=vocabulary_size,
+ output_dim=num_embed,
+ name='vocab_embed')
+ conv_input = mx.sym.Reshape(data=embed_layer,
target_shape=(batch_size, 1, sentences_size, num_embed))
else:
conv_input = input_x
@@ -141,7 +205,7 @@ def sym_gen(batch_size, sentence_size, num_embed,
vocab_size,
for i, filter_size in enumerate(filter_list):
convi = mx.sym.Convolution(data=conv_input, kernel=(filter_size,
num_embed), num_filter=num_filter)
relui = mx.sym.Activation(data=convi, act_type='relu')
- pooli = mx.sym.Pooling(data=relui, pool_type='max',
kernel=(sentence_size - filter_size + 1, 1), stride=(1, 1))
+ pooli = mx.sym.Pooling(data=relui, pool_type='max',
kernel=(sentences_size - filter_size + 1, 1), stride=(1, 1))
pooled_outputs.append(pooli)
# combine all pooled outputs
@@ -170,10 +234,27 @@ def sym_gen(batch_size, sentence_size, num_embed,
vocab_size,
return sm, ('data',), ('softmax_label',)
-def train(symbol, train_iter, valid_iter, data_names, label_names):
- devs = mx.cpu() if args.gpus is None or args.gpus is '' else [
- mx.gpu(int(i)) for i in args.gpus.split(',')]
- module = mx.mod.Module(symbol, data_names=data_names,
label_names=label_names, context=devs)
+def train(symbol_data, train_iterator, valid_iterator, data_column_names,
target_names):
+ """Train cnn model
+
+ Parameters
+ ----------
+ symbol_data: symbol
+ train_iterator: DataIter
+ Train DataIter
+ valid_iterator: DataIter
+ Valid DataIter
+ data_column_names: list of str
+ Defaults to ('data') for a typical model used in image
classification
+ target_names: list of str
+ Defaults to ('softmax_label') for a typical model used in
image classification
+ """
+ devs = mx.cpu() # default setting
+ if args.gpus is not None:
+ for i in args.gpus.split(','):
+ mx.gpu(int(i))
+ devs = mx.gpu()
+ module = mx.mod.Module(symbol_data, data_names=data_column_names,
label_names=target_names, context=devs)
init_params = {
'vocab_embed_weight': {'uniform': 0.1},
@@ -185,7 +266,7 @@ def train(symbol, train_iter, valid_iter, data_names,
label_names):
'cls_weight': {'uniform': 0.1}, 'cls_bias': {'costant': 0},
}
# custom init_params
- module.bind(data_shapes=train_iter.provide_data,
label_shapes=train_iter.provide_label)
+ module.bind(data_shapes=train_iterator.provide_data,
label_shapes=train_iterator.provide_label)
module.init_params(CustomInit(init_params))
lr_sch = mx.lr_scheduler.FactorScheduler(step=25000, factor=0.999)
module.init_optimizer(
@@ -195,8 +276,8 @@ def train(symbol, train_iter, valid_iter, data_names,
label_names):
return mx.nd.norm(d) / np.sqrt(d.size)
mon = mx.mon.Monitor(25000, norm_stat)
- module.fit(train_data=train_iter,
- eval_data=valid_iter,
+ module.fit(train_data=train_iterator,
+ eval_data=valid_iterator,
eval_metric='acc',
kvstore=args.kv_store,
monitor=mon,
@@ -207,8 +288,7 @@ def train(symbol, train_iter, valid_iter, data_names,
label_names):
@mx.init.register
class CustomInit(Initializer):
- """
-
https://mxnet.incubator.apache.org/api/python/optimization.html#mxnet.initializer.register
+
"""https://mxnet.incubator.apache.org/api/python/optimization.html#mxnet.initializer.register
Create and register a custom initializer that
Initialize the weight and bias with custom requirements
diff --git a/example/cnn_text_classification/data_helpers.py
b/example/cnn_text_classification/data_helpers.py
index b6fe1e6..093da7b 100644
--- a/example/cnn_text_classification/data_helpers.py
+++ b/example/cnn_text_classification/data_helpers.py
@@ -15,6 +15,8 @@
# specific language governing permissions and limitations
# under the License.
+"""Help functions to support for implementing CNN + Highway Network for Text
Classification in MXNet"""
+
import itertools
import os
import re
@@ -27,8 +29,7 @@ import word2vec
def clean_str(string):
- """
- Tokenization/string cleaning for all datasets except for SST.
+ """Tokenization/string cleaning for all datasets except for SST.
Original taken from
https://github.com/yoonkim/CNN_sentence/blob/master/process_data.py
"""
string = re.sub(r"[^A-Za-z0-9(),!?\'\`]", " ", string)
@@ -40,16 +41,15 @@ def clean_str(string):
string = re.sub(r"\'ll", " \'ll", string)
string = re.sub(r",", " , ", string)
string = re.sub(r"!", " ! ", string)
- string = re.sub(r"\(", " \( ", string)
- string = re.sub(r"\)", " \) ", string)
- string = re.sub(r"\?", " \? ", string)
+ string = re.sub(r"\(", r" \( ", string)
+ string = re.sub(r"\)", r" \) ", string)
+ string = re.sub(r"\?", r" \? ", string)
string = re.sub(r"\s{2,}", " ", string)
return string.strip().lower()
def load_data_and_labels():
- """
- Loads MR polarity data from files, splits the data into words and
generates labels.
+ """Loads MR polarity data from files, splits the data into words and
generates labels.
Returns split sentences and labels.
"""
# Load data from files
@@ -75,14 +75,12 @@ def load_data_and_labels():
def pad_sentences(sentences, padding_word="</s>"):
- """
- Pads all sentences to the same length. The length is defined by the
longest sentence.
+ """Pads all sentences to the same length. The length is defined by the
longest sentence.
Returns padded sentences.
"""
sequence_length = max(len(x) for x in sentences)
padded_sentences = []
- for i in range(len(sentences)):
- sentence = sentences[i]
+ for i, sentence in enumerate(sentences):
num_padding = sequence_length - len(sentence)
new_sentence = sentence + [padding_word] * num_padding
padded_sentences.append(new_sentence)
@@ -90,8 +88,7 @@ def pad_sentences(sentences, padding_word="</s>"):
def build_vocab(sentences):
- """
- Builds a vocabulary mapping from word to index based on the sentences.
+ """Builds a vocabulary mapping from word to index based on the sentences.
Returns vocabulary mapping and inverse vocabulary mapping.
"""
# Build vocabulary
@@ -104,44 +101,41 @@ def build_vocab(sentences):
def build_input_data(sentences, labels, vocabulary):
- """
- Maps sentencs and labels to vectors based on a vocabulary.
- """
+ """Maps sentencs and labels to vectors based on a vocabulary."""
x = np.array([[vocabulary[word] for word in sentence] for sentence in
sentences])
y = np.array(labels)
return [x, y]
-def build_input_data_with_word2vec(sentences, labels, word2vec):
+
+def build_input_data_with_word2vec(sentences, labels, word2vec_list):
"""Map sentences and labels to vectors based on a pretrained word2vec"""
x_vec = []
for sent in sentences:
vec = []
for word in sent:
- if word in word2vec:
- vec.append(word2vec[word])
+ if word in word2vec_list:
+ vec.append(word2vec_list[word])
else:
- vec.append(word2vec['</s>'])
+ vec.append(word2vec_list['</s>'])
x_vec.append(vec)
x_vec = np.array(x_vec)
y_vec = np.array(labels)
return [x_vec, y_vec]
-def load_data_with_word2vec(word2vec):
- """
- Loads and preprocessed data for the MR dataset.
+def load_data_with_word2vec(word2vec_list):
+ """Loads and preprocessed data for the MR dataset.
Returns input vectors, labels, vocabulary, and inverse vocabulary.
"""
# Load and preprocess data
sentences, labels = load_data_and_labels()
sentences_padded = pad_sentences(sentences)
# vocabulary, vocabulary_inv = build_vocab(sentences_padded)
- return build_input_data_with_word2vec(sentences_padded, labels, word2vec)
+ return build_input_data_with_word2vec(sentences_padded, labels,
word2vec_list)
def load_data():
- """
- Loads and preprocessed data for the MR dataset.
+ """Loads and preprocessed data for the MR dataset.
Returns input vectors, labels, vocabulary, and inverse vocabulary.
"""
# Load and preprocess data
@@ -153,9 +147,7 @@ def load_data():
def batch_iter(data, batch_size, num_epochs):
- """
- Generates a batch iterator for a dataset.
- """
+ """Generates a batch iterator for a dataset."""
data = np.array(data)
data_size = len(data)
num_batches_per_epoch = int(len(data)/batch_size) + 1
@@ -170,18 +162,19 @@ def batch_iter(data, batch_size, num_epochs):
def load_pretrained_word2vec(infile):
+ """Load the pre-trained word2vec from file."""
if isinstance(infile, str):
infile = open(infile)
- word2vec = {}
+ word2vec_list = {}
for idx, line in enumerate(infile):
if idx == 0:
vocab_size, dim = line.strip().split()
else:
tks = line.strip().split()
- word2vec[tks[0]] = map(float, tks[1:])
+ word2vec_list[tks[0]] = map(float, tks[1:])
- return word2vec
+ return word2vec_list
def load_google_word2vec(path):
diff --git a/example/deep-embedded-clustering/model.py
b/example/deep-embedded-clustering/model.py
index 9b6185c..b388c55 100644
--- a/example/deep-embedded-clustering/model.py
+++ b/example/deep-embedded-clustering/model.py
@@ -18,8 +18,9 @@
# pylint: disable=missing-docstring
from __future__ import print_function
-import mxnet as mx
import numpy as np
+import mxnet as mx
+
try:
import cPickle as pickle
except ImportError:
@@ -53,7 +54,7 @@ def extract_feature(sym, args, auxs, data_iter, N,
xpu=mx.cpu()):
class MXModel(object):
- def __init__(self, xpu=mx.cpu(), *args, **kwargs):
+ def __init__(self, *args, xpu=mx.cpu(), **kwargs):
self.xpu = xpu
self.loss = None
self.args = {}
diff --git a/example/deep-embedded-clustering/solver.py
b/example/deep-embedded-clustering/solver.py
index 567c78e..79fe5c6 100644
--- a/example/deep-embedded-clustering/solver.py
+++ b/example/deep-embedded-clustering/solver.py
@@ -19,9 +19,8 @@
from __future__ import print_function
import logging
-
-import mxnet as mx
import numpy as np
+import mxnet as mx
class Monitor(object):
@@ -148,4 +147,4 @@ class Solver(object):
if self.iter_end_callback is not None:
if self.iter_end_callback(i):
return
- exe.outputs[0].wait_to_read()
\ No newline at end of file
+ exe.outputs[0].wait_to_read()