cchung100m closed pull request #13815: [issue_12205 - PART1] solve pylint
errors in examples with issue no.12205
URL: https://github.com/apache/incubator-mxnet/pull/13815
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/example/autoencoder/mnist_sae.py b/example/autoencoder/mnist_sae.py
index 886f2a16a86..04e4e75f665 100644
--- a/example/autoencoder/mnist_sae.py
+++ b/example/autoencoder/mnist_sae.py
@@ -21,8 +21,8 @@
import argparse
import logging
-import mxnet as mx
import numpy as np
+import mxnet as mx
import data
from autoencoder import AutoEncoderModel
diff --git a/example/autoencoder/model.py b/example/autoencoder/model.py
index 9b6185c9fd1..b388c551387 100644
--- a/example/autoencoder/model.py
+++ b/example/autoencoder/model.py
@@ -18,8 +18,9 @@
# pylint: disable=missing-docstring
from __future__ import print_function
-import mxnet as mx
import numpy as np
+import mxnet as mx
+
try:
import cPickle as pickle
except ImportError:
@@ -53,7 +54,7 @@ def extract_feature(sym, args, auxs, data_iter, N,
xpu=mx.cpu()):
class MXModel(object):
- def __init__(self, xpu=mx.cpu(), *args, **kwargs):
+ def __init__(self, *args, xpu=mx.cpu(), **kwargs):
self.xpu = xpu
self.loss = None
self.args = {}
diff --git a/example/autoencoder/solver.py b/example/autoencoder/solver.py
index 0c990ce7423..79fe5c69add 100644
--- a/example/autoencoder/solver.py
+++ b/example/autoencoder/solver.py
@@ -19,9 +19,8 @@
from __future__ import print_function
import logging
-
-import mxnet as mx
import numpy as np
+import mxnet as mx
class Monitor(object):
diff --git a/example/bayesian-methods/algos.py
b/example/bayesian-methods/algos.py
index f7b36207079..91f17f88681 100644
--- a/example/bayesian-methods/algos.py
+++ b/example/bayesian-methods/algos.py
@@ -14,13 +14,15 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-
+"""
+Create implementation of algorithms of HMC, stepHMC, SGD, SGLD and
DistilledSGLD
+"""
from __future__ import print_function
+import time
+import numpy
import mxnet as mx
import mxnet.ndarray as nd
-import time
-import logging
-from utils import *
+from utils import copy_param, get_executor, sample_test_regression,
sample_test_acc
def calc_potential(exe, params, label_name, noise_precision, prior_precision):
@@ -35,6 +37,9 @@ def calc_potential(exe, params, label_name, noise_precision,
prior_precision):
def calc_grad(exe, exe_grads, params, X, Y, label_name=None, outgrad_f=None):
+ """
+ Calculate gradient
+ """
exe.copy_params_from(params)
exe.arg_dict['data'][:] = X
if outgrad_f is None:
@@ -48,8 +53,10 @@ def calc_grad(exe, exe_grads, params, X, Y, label_name=None,
outgrad_f=None):
v.wait_to_read()
-def step_HMC(exe, exe_params, exe_grads, label_key, noise_precision,
prior_precision, L=10,
- eps=1E-6):
+def step_HMC(exe, exe_params, exe_grads, label_key, noise_precision,
prior_precision, L=10, eps=1E-6):
+ """
+ Generate the implementation of step HMC
+ """
init_params = {k: v.copyto(v.context) for k, v in exe_params.items()}
end_params = {k: v.copyto(v.context) for k, v in exe_params.items()}
init_momentums = {k: mx.random.normal(0, 1, v.shape) for k, v in
init_params.items()}
@@ -102,6 +109,9 @@ def step_HMC(exe, exe_params, exe_grads, label_key,
noise_precision, prior_preci
def HMC(sym, data_inputs, X, Y, X_test, Y_test, sample_num,
initializer=None, noise_precision=1 / 9.0, prior_precision=0.1,
learning_rate=1E-6, L=10, dev=mx.gpu()):
+ """
+ Generate the implementation of HMC
+ """
label_key = list(set(data_inputs.keys()) - set(['data']))[0]
exe, exe_params, exe_grads, _ = get_executor(sym, dev, data_inputs,
initializer)
exe.arg_dict['data'][:] = X
@@ -134,6 +144,9 @@ def SGD(sym, data_inputs, X, Y, X_test, Y_test,
total_iter_num,
out_grad_f=None,
initializer=None,
minibatch_size=100, dev=mx.gpu()):
+ """
+ Generate the implementation of SGD
+ """
if out_grad_f is None:
label_key = list(set(data_inputs.keys()) - set(['data']))[0]
exe, params, params_grad, _ = get_executor(sym, dev, data_inputs,
initializer)
@@ -173,6 +186,9 @@ def SGLD(sym, X, Y, X_test, Y_test, total_iter_num,
initializer=None,
minibatch_size=100, thin_interval=100, burn_in_iter_num=1000,
task='classification',
dev=mx.gpu()):
+ """
+ Generate the implementation of SGLD
+ """
if out_grad_f is None:
label_key = list(set(data_inputs.keys()) - set(['data']))[0]
exe, params, params_grad, _ = get_executor(sym, dev, data_inputs,
initializer)
@@ -200,7 +216,7 @@ def SGLD(sym, X, Y, X_test, Y_test, total_iter_num,
if i < burn_in_iter_num:
continue
else:
- if 0 == (i - burn_in_iter_num) % thin_interval:
+ if (i - burn_in_iter_num) % thin_interval == 0:
if optimizer.lr_scheduler is not None:
lr = optimizer.lr_scheduler(optimizer.num_update)
else:
@@ -238,6 +254,9 @@ def DistilledSGLD(teacher_sym, student_sym,
minibatch_size=100,
task='classification',
dev=mx.gpu()):
+ """
+ Generate the implementation of DistilledSGLD
+ """
teacher_exe, teacher_params, teacher_params_grad, _ = \
get_executor(teacher_sym, dev, teacher_data_inputs,
teacher_initializer)
student_exe, student_params, student_params_grad, _ = \
@@ -323,13 +342,14 @@ def DistilledSGLD(teacher_sym, student_sym,
sample_test_acc(teacher_exe, X=X, Y=Y, label_num=10,
minibatch_size=minibatch_size)
print("Student: Test ACC %d/%d=%f, Train ACC %d/%d=%f" %
(test_correct, test_total,
- test_acc, train_correct,
train_total, train_acc))
+
test_acc, train_correct,
+
train_total, train_acc))
print("Teacher: Test ACC %d/%d=%f, Train ACC %d/%d=%f" \
% (teacher_test_correct, teacher_test_total,
teacher_test_acc,
teacher_train_correct, teacher_train_total,
teacher_train_acc))
else:
print("Current Iter Num: %d" % (i + 1), "Time Spent: %f" %
(end - start), "MSE:",
- sample_test_regression(exe=student_exe, X=X_test,
Y=Y_test,
+ sample_test_regression(exe=student_exe, X=X_test,
Y=Y_test,
minibatch_size=minibatch_size,
save_path='regression_DSGLD.txt'))
start = time.time()
diff --git a/example/bayesian-methods/bdk_demo.py
b/example/bayesian-methods/bdk_demo.py
index 145dac10e2a..3c3d1f8c2f1 100644
--- a/example/bayesian-methods/bdk_demo.py
+++ b/example/bayesian-methods/bdk_demo.py
@@ -14,21 +14,25 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-
+"""
+Run Stochastic Gradient Langevin Dynamics (SGLD) and Bayesian Dark Knowledge
(BDK)
+"""
from __future__ import print_function
-import mxnet as mx
-import mxnet.ndarray as nd
+import argparse
+import time
import numpy
-import logging
import matplotlib.pyplot as plt
-from scipy.stats import gaussian_kde
-import argparse
-from algos import *
-from data_loader import *
-from utils import *
+import mxnet as mx
+import mxnet.ndarray as nd
+from algos import HMC, SGD, SGLD, DistilledSGLD
+from data_loader import load_mnist, load_toy, load_synthetic
+from utils import BiasXavier, SGLDScheduler
class CrossEntropySoftmax(mx.operator.NumpyOp):
+ """
+ Calculate CrossEntropy softmax function
+ """
def __init__(self):
super(CrossEntropySoftmax, self).__init__(False)
@@ -58,6 +62,9 @@ def backward(self, out_grad, in_data, out_data, in_grad):
class LogSoftmax(mx.operator.NumpyOp):
+ """
+ Generate helper functions to evaluate softmax loss function
+ """
def __init__(self):
super(LogSoftmax, self).__init__(False)
@@ -103,6 +110,9 @@ def regression_student_grad(student_outputs, teacher_pred,
teacher_noise_precisi
def get_mnist_sym(output_op=None, num_hidden=400):
+ """
+ get symbol of mnist
+ """
net = mx.symbol.Variable('data')
net = mx.symbol.FullyConnected(data=net, name='mnist_fc1',
num_hidden=num_hidden)
net = mx.symbol.Activation(data=net, name='mnist_relu1', act_type="relu")
@@ -117,6 +127,9 @@ def get_mnist_sym(output_op=None, num_hidden=400):
def synthetic_grad(X, theta, sigma1, sigma2, sigmax, rescale_grad=1.0,
grad=None):
+ """
+ Get synthetic gradient value
+ """
if grad is None:
grad = nd.empty(theta.shape, theta.context)
theta1 = theta.asnumpy()[0]
@@ -128,17 +141,18 @@ def synthetic_grad(X, theta, sigma1, sigma2, sigmax,
rescale_grad=1.0, grad=None
-(X - theta1 - theta2) ** 2 / (2 * vx))
grad_npy = numpy.zeros(theta.shape)
grad_npy[0] = -rescale_grad * ((numpy.exp(-(X - theta1) ** 2 / (2 * vx)) *
(X - theta1) / vx
- + numpy.exp(-(X - theta1 - theta2) ** 2 /
(2 * vx)) * (
- X - theta1 - theta2) / vx) /
denominator).sum() \
- + theta1 / v1
- grad_npy[1] = -rescale_grad * ((numpy.exp(-(X - theta1 - theta2) ** 2 / (2
* vx)) * (
- X - theta1 - theta2) / vx) / denominator).sum() \
- + theta2 / v2
+ + numpy.exp(-(X - theta1 - theta2) ** 2 /
(2 * vx)) *
+ (X - theta1 - theta2) / vx) /
denominator).sum() + theta1 / v1
+ grad_npy[1] = -rescale_grad * ((numpy.exp(-(X - theta1 - theta2) ** 2 / (2
* vx)) *
+ (X - theta1 - theta2) / vx) /
denominator).sum() + theta2 / v2
grad[:] = grad_npy
return grad
def get_toy_sym(teacher=True, teacher_noise_precision=None):
+ """
+ Get toy symbol
+ """
if teacher:
net = mx.symbol.Variable('data')
net = mx.symbol.FullyConnected(data=net, name='teacher_fc1',
num_hidden=100)
@@ -160,8 +174,8 @@ def dev():
return mx.gpu()
-def run_mnist_SGD(training_num=50000):
- X, Y, X_test, Y_test = load_mnist(training_num)
+def run_mnist_SGD(num_training=50000):
+ X, Y, X_test, Y_test = load_mnist(num_training)
minibatch_size = 100
net = get_mnist_sym()
data_shape = (minibatch_size,) + X.shape[1::]
@@ -175,8 +189,8 @@ def run_mnist_SGD(training_num=50000):
lr=5E-6, prior_precision=1.0, minibatch_size=100)
-def run_mnist_SGLD(training_num=50000):
- X, Y, X_test, Y_test = load_mnist(training_num)
+def run_mnist_SGLD(num_training=50000):
+ X, Y, X_test, Y_test = load_mnist(num_training)
minibatch_size = 100
net = get_mnist_sym()
data_shape = (minibatch_size,) + X.shape[1::]
@@ -191,10 +205,13 @@ def run_mnist_SGLD(training_num=50000):
thin_interval=100, burn_in_iter_num=1000)
-def run_mnist_DistilledSGLD(training_num=50000):
- X, Y, X_test, Y_test = load_mnist(training_num)
+def run_mnist_DistilledSGLD(num_training=50000):
+ """
+ Run DistilledSGLD on mnist dataset
+ """
+ X, Y, X_test, Y_test = load_mnist(num_training)
minibatch_size = 100
- if training_num >= 10000:
+ if num_training >= 10000:
num_hidden = 800
total_iter_num = 1000000
teacher_learning_rate = 1E-6
@@ -235,6 +252,9 @@ def run_mnist_DistilledSGLD(training_num=50000):
def run_toy_SGLD():
+ """
+ Run SGLD on toy dataset
+ """
X, Y, X_test, Y_test = load_toy()
minibatch_size = 1
teacher_noise_precision = 1.0 / 9.0
@@ -243,20 +263,28 @@ def run_toy_SGLD():
data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
'teacher_output_label': nd.zeros((minibatch_size, 1),
ctx=dev())}
initializer = mx.init.Uniform(0.07)
- exe, params, _ = \
- SGLD(sym=net, data_inputs=data_inputs,
- X=X, Y=Y, X_test=X_test, Y_test=Y_test, total_iter_num=50000,
- initializer=initializer,
- learning_rate=1E-4,
- # lr_scheduler=mx.lr_scheduler.FactorScheduler(100000,
0.5),
- prior_precision=0.1,
- burn_in_iter_num=1000,
- thin_interval=10,
- task='regression',
- minibatch_size=minibatch_size, dev=dev())
+ exe, params = SGLD(sym=net,
+ data_inputs=data_inputs,
+ X=X,
+ Y=Y,
+ X_test=X_test,
+ Y_test=Y_test,
+ total_iter_num=50000,
+ initializer=initializer,
+ learning_rate=1E-4,
+ # lr_scheduler=mx.lr_scheduler.FactorScheduler(100000,
0.5),
+ prior_precision=0.1,
+ burn_in_iter_num=1000,
+ thin_interval=10,
+ task='regression',
+ minibatch_size=minibatch_size,
+ dev=dev()) # disable=unbalanced-tuple-unpacking
def run_toy_DistilledSGLD():
+ """
+ Run DistilledSGLD on toy dataset
+ """
X, Y, X_test, Y_test = load_toy()
minibatch_size = 1
teacher_noise_precision = 1.0
@@ -288,6 +316,9 @@ def run_toy_DistilledSGLD():
def run_toy_HMC():
+ """
+ Run HMC on toy dataset
+ """
X, Y, X_test, Y_test = load_toy()
minibatch_size = Y.shape[0]
noise_precision = 1 / 9.0
@@ -302,6 +333,9 @@ def run_toy_HMC():
def run_synthetic_SGLD():
+ """
+ Run synthetic SGLD
+ """
theta1 = 0
theta2 = 1
sigma1 = numpy.sqrt(10)
@@ -322,14 +356,14 @@ def run_synthetic_SGLD():
grad = nd.empty((2,), mx.cpu())
samples = numpy.zeros((2, total_iter_num))
start = time.time()
- for i in xrange(total_iter_num):
+ for i in range(total_iter_num):
if (i + 1) % 100000 == 0:
end = time.time()
print("Iter:%d, Time spent: %f" % (i + 1, end - start))
start = time.time()
ind = numpy.random.randint(0, X.shape[0])
- synthetic_grad(X[ind], theta, sigma1, sigma2, sigmax, rescale_grad=
- X.shape[0] / float(minibatch_size), grad=grad)
+ synthetic_grad(X[ind], theta, sigma1, sigma2, sigmax,
+ rescale_grad=X.shape[0] / float(minibatch_size),
grad=grad)
updater('theta', grad, theta)
samples[:, i] = theta.asnumpy()
plt.hist2d(samples[0, :], samples[1, :], (200, 200), cmap=plt.cm.jet)
@@ -353,18 +387,18 @@ def run_synthetic_SGLD():
args = parser.parse_args()
training_num = args.training
if args.dataset == 1:
- if 0 == args.algorithm:
+ if args.algorithm == 0:
run_mnist_SGD(training_num)
- elif 1 == args.algorithm:
+ elif args.algorithm == 1:
run_mnist_SGLD(training_num)
else:
run_mnist_DistilledSGLD(training_num)
elif args.dataset == 0:
- if 1 == args.algorithm:
+ if args.algorithm == 1:
run_toy_SGLD()
- elif 2 == args.algorithm:
+ elif args.algorithm == 2:
run_toy_DistilledSGLD()
- elif 3 == args.algorithm:
+ elif args.algorithm == 3:
run_toy_HMC()
else:
run_synthetic_SGLD()
diff --git a/example/bayesian-methods/data_loader.py
b/example/bayesian-methods/data_loader.py
index 92ca0cfb3a6..a800fe72d6a 100644
--- a/example/bayesian-methods/data_loader.py
+++ b/example/bayesian-methods/data_loader.py
@@ -14,14 +14,19 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-
+"""
+Create helper functions to load mnist dataset and toy dataset
+"""
from __future__ import print_function
-import numpy
import os
import ssl
+import numpy
def load_mnist(training_num=50000):
+ """
+ Load mnist dataset
+ """
data_path = os.path.join(os.path.dirname(os.path.realpath('__file__')),
'mnist.npz')
if not os.path.isfile(data_path):
from six.moves import urllib
diff --git a/example/bayesian-methods/utils.py
b/example/bayesian-methods/utils.py
index a2744373e87..e1e6e34fc61 100644
--- a/example/bayesian-methods/utils.py
+++ b/example/bayesian-methods/utils.py
@@ -14,11 +14,12 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-
+"""
+Generate helper functions to Stochastic Gradient Langevin Dynamics (SGLD) and
Bayesian Dark Knowledge (BDK)
+"""
+import numpy
import mxnet as mx
import mxnet.ndarray as nd
-import numpy
-import logging
class BiasXavier(mx.initializer.Xavier):
@@ -26,7 +27,11 @@ def _init_bias(self, _, arr):
scale = numpy.sqrt(self.magnitude / arr.shape[0])
mx.random.uniform(-scale, scale, out=arr)
+
class SGLDScheduler(mx.lr_scheduler.LRScheduler):
+ """
+ Create SGLDScheduler class
+ """
def __init__(self, begin_rate, end_rate, total_iter_num, factor):
super(SGLDScheduler, self).__init__()
if factor >= 1.0:
@@ -44,7 +49,11 @@ def __call__(self, num_update):
self.count += 1
return self.base_lr
+
def get_executor(sym, ctx, data_inputs, initializer=None):
+ """
+ Get executor to Stochastic Gradient Langevin Dynamics and/or Bayesian Dark
Knowledge
+ """
data_shapes = {k: v.shape for k, v in data_inputs.items()}
arg_names = sym.list_arguments()
aux_names = sym.list_auxiliary_states()
@@ -62,14 +71,22 @@ def get_executor(sym, ctx, data_inputs, initializer=None):
initializer(k, v)
return exe, params, params_grad, aux_states
+
def copy_param(exe, new_param=None):
+ """
+ Create copy of parameters
+ """
if new_param is None:
- new_param = {k: nd.empty(v.shape, ctx=mx.cpu()) for k,v in
exe.arg_dict.items()}
+ new_param = {k: nd.empty(v.shape, ctx=mx.cpu()) for k, v in
exe.arg_dict.items()}
for k, v in new_param.items():
exe.arg_dict[k].copyto(v)
return new_param
+
def sample_test_acc(exe, X, Y, sample_pool=None, label_num=None,
minibatch_size=100):
+ """
+ Generate sample test to evaluate accuracy
+ """
if label_num is None:
pred = numpy.zeros((X.shape[0],)).astype('float32')
else:
@@ -89,12 +106,12 @@ def sample_test_acc(exe, X, Y, sample_pool=None,
label_num=None, minibatch_size=
else:
old_param = copy_param(exe)
for sample in sample_pool:
- if type(sample) is list:
+ if isinstance(sample, list):
denominator += sample[0]
else:
denominator += 1.0
for sample in sample_pool:
- if type(sample) is list:
+ if isinstance(sample, list):
ratio = sample[0]/denominator
param = sample[1]
else:
@@ -118,11 +135,14 @@ def sample_test_acc(exe, X, Y, sample_pool=None,
label_num=None, minibatch_size=
def sample_test_regression(exe, X, Y, sample_pool=None, minibatch_size=100,
save_path="regression.txt"):
+ """
+ Generate a sample test regression
+ """
old_param = copy_param(exe)
if sample_pool is not None:
pred = numpy.zeros(Y.shape + (len(sample_pool),))
ratio = numpy.zeros((len(sample_pool),))
- if type(sample_pool[0]) is list:
+ if isinstance(sample_pool[0], list):
denominator = sum(sample[0] for sample in sample_pool)
for i, sample in enumerate(sample_pool):
ratio[i] = sample[0]/float(denominator)
@@ -130,7 +150,7 @@ def sample_test_regression(exe, X, Y, sample_pool=None,
minibatch_size=100, save
ratio[:] = 1.0/ Y.shape[0]
iterator = mx.io.NDArrayIter(data=X, label=Y,
batch_size=minibatch_size, shuffle=False)
for i, sample in enumerate(sample_pool):
- if type(sample) is list:
+ if isinstance(sample, list):
sample_param = sample[1]
else:
sample_param = sample
@@ -146,7 +166,7 @@ def sample_test_regression(exe, X, Y, sample_pool=None,
minibatch_size=100, save
curr_instance += batch_len
mean = pred.mean(axis=2)
var = pred.std(axis=2)**2
- #print numpy.concatenate((Y, mean), axis=1)
+ # print numpy.concatenate((Y, mean), axis=1)
mse = numpy.square(Y.reshape((Y.shape[0], )) -
mean.reshape((mean.shape[0], ))).mean()
numpy.savetxt(save_path, numpy.concatenate((mean, var), axis=1))
else:
@@ -157,15 +177,21 @@ def sample_test_regression(exe, X, Y, sample_pool=None,
minibatch_size=100, save
for batch in iterator:
exe.arg_dict['data'][:] = batch.data[0]
exe.forward(is_train=False)
- mean_var[curr_instance:curr_instance + minibatch_size - batch.pad,
0] = exe.outputs[0].asnumpy()[:minibatch_size - batch.pad].flatten()
- mean_var[curr_instance:curr_instance + minibatch_size - batch.pad,
1] = numpy.exp(exe.outputs[1].asnumpy())[:minibatch_size - batch.pad].flatten()
+ mean_var[curr_instance:curr_instance + minibatch_size - batch.pad,
0] =\
+ exe.outputs[0].asnumpy()[:minibatch_size - batch.pad].flatten()
+ mean_var[curr_instance:curr_instance + minibatch_size - batch.pad,
1] = \
+ numpy.exp(exe.outputs[1].asnumpy())[:minibatch_size -
batch.pad].flatten()
curr_instance += minibatch_size - batch.pad
mse = numpy.square(Y.reshape((Y.shape[0],)) - mean_var[:, 0]).mean()
numpy.savetxt(save_path, mean_var)
exe.copy_params_from(old_param)
return mse
+
def pred_test(testing_data, exe, param_list=None, save_path=""):
+ """
+ Generate prediction on testset
+ """
ret = numpy.zeros((testing_data.shape[0], 2))
if param_list is None:
for i in range(testing_data.shape[0]):
@@ -177,8 +203,8 @@ def pred_test(testing_data, exe, param_list=None,
save_path=""):
else:
for i in range(testing_data.shape[0]):
pred = numpy.zeros((len(param_list),))
- for j in range(len(param_list)):
- exe.copy_params_from(param_list[j])
+ for (j, param) in enumerate(param_list):
+ exe.copy_params_from(param)
exe.arg_dict['data'][:] = testing_data[i, 0]
exe.forward(is_train=False)
pred[j] = exe.outputs[0].asnumpy()
diff --git a/example/caffe/caffe_net.py b/example/caffe/caffe_net.py
index 0459c901e1c..aa607a235f8 100644
--- a/example/caffe/caffe_net.py
+++ b/example/caffe/caffe_net.py
@@ -14,29 +14,37 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-
+"""
+Generate helper functions to load Caffe into MXNet
+"""
+import argparse
import mxnet as mx
from data import get_iterator
-import argparse
import train_model
+
def get_mlp():
"""
multi-layer perceptron
"""
data = mx.symbol.Variable('data')
- fc1 = mx.symbol.CaffeOp(data_0=data, num_weight=2, name='fc1',
prototxt="layer{type:\"InnerProduct\" inner_product_param{num_output: 128} }")
+ fc1 = mx.symbol.CaffeOp(data_0=data, num_weight=2, name='fc1',
+ prototxt="layer{type:\"InnerProduct\"
inner_product_param{num_output: 128} }")
act1 = mx.symbol.CaffeOp(data_0=fc1, prototxt="layer{type:\"TanH\"}")
- fc2 = mx.symbol.CaffeOp(data_0=act1, num_weight=2, name='fc2',
prototxt="layer{type:\"InnerProduct\" inner_product_param{num_output: 64} }")
+ fc2 = mx.symbol.CaffeOp(data_0=act1, num_weight=2, name='fc2',
+ prototxt="layer{type:\"InnerProduct\"
inner_product_param{num_output: 64} }")
act2 = mx.symbol.CaffeOp(data_0=fc2, prototxt="layer{type:\"TanH\"}")
- fc3 = mx.symbol.CaffeOp(data_0=act2, num_weight=2, name='fc3',
prototxt="layer{type:\"InnerProduct\" inner_product_param{num_output: 10}}")
+ fc3 = mx.symbol.CaffeOp(data_0=act2, num_weight=2, name='fc3',
+ prototxt="layer{type:\"InnerProduct\"
inner_product_param{num_output: 10}}")
if use_caffe_loss:
label = mx.symbol.Variable('softmax_label')
- mlp = mx.symbol.CaffeLoss(data=fc3, label=label, grad_scale=1,
name='softmax', prototxt="layer{type:\"SoftmaxWithLoss\"}")
+ mlp = mx.symbol.CaffeLoss(data=fc3, label=label, grad_scale=1,
name='softmax',
+ prototxt="layer{type:\"SoftmaxWithLoss\"}")
else:
mlp = mx.symbol.SoftmaxOutput(data=fc3, name='softmax')
return mlp
+
def get_lenet():
"""
LeCun, Yann, Leon Bottou, Yoshua Bengio, and Patrick
@@ -46,32 +54,46 @@ def get_lenet():
data = mx.symbol.Variable('data')
# first conv
- conv1 = mx.symbol.CaffeOp(data_0=data, num_weight=2,
prototxt="layer{type:\"Convolution\" convolution_param { num_output: 20
kernel_size: 5 stride: 1} }")
+ conv1 = mx.symbol.CaffeOp(data_0=data, num_weight=2,
+ prototxt="layer{type:\"Convolution\" "
+ "convolution_param { num_output: 20
kernel_size: 5 stride: 1} }")
act1 = mx.symbol.CaffeOp(data_0=conv1, prototxt="layer{type:\"TanH\"}")
- pool1 = mx.symbol.CaffeOp(data_0=act1, prototxt="layer{type:\"Pooling\"
pooling_param { pool: MAX kernel_size: 2 stride: 2}}")
+ pool1 = mx.symbol.CaffeOp(data_0=act1,
+ prototxt="layer{type:\"Pooling\" pooling_param {
pool: MAX kernel_size: 2 stride: 2}}")
# second conv
- conv2 = mx.symbol.CaffeOp(data_0=pool1, num_weight=2,
prototxt="layer{type:\"Convolution\" convolution_param { num_output: 50
kernel_size: 5 stride: 1} }")
+ conv2 = mx.symbol.CaffeOp(data_0=pool1, num_weight=2,
+ prototxt="layer{type:\"Convolution\" "
+ "convolution_param { num_output: 50
kernel_size: 5 stride: 1} }")
act2 = mx.symbol.CaffeOp(data_0=conv2, prototxt="layer{type:\"TanH\"}")
- pool2 = mx.symbol.CaffeOp(data_0=act2, prototxt="layer{type:\"Pooling\"
pooling_param { pool: MAX kernel_size: 2 stride: 2}}")
+ pool2 = mx.symbol.CaffeOp(data_0=act2,
+ prototxt="layer{type:\"Pooling\" pooling_param {
pool: MAX kernel_size: 2 stride: 2}}")
- fc1 = mx.symbol.CaffeOp(data_0=pool2, num_weight=2,
prototxt="layer{type:\"InnerProduct\" inner_product_param{num_output: 500} }")
+ fc1 = mx.symbol.CaffeOp(data_0=pool2, num_weight=2,
+ prototxt="layer{type:\"InnerProduct\"
inner_product_param{num_output: 500} }")
act3 = mx.symbol.CaffeOp(data_0=fc1, prototxt="layer{type:\"TanH\"}")
# second fullc
- fc2 = mx.symbol.CaffeOp(data_0=act3, num_weight=2,
prototxt="layer{type:\"InnerProduct\"inner_product_param{num_output: 10} }")
+ fc2 = mx.symbol.CaffeOp(data_0=act3, num_weight=2,
+
prototxt="layer{type:\"InnerProduct\"inner_product_param{num_output: 10} }")
if use_caffe_loss:
label = mx.symbol.Variable('softmax_label')
- lenet = mx.symbol.CaffeLoss(data=fc2, label=label, grad_scale=1,
name='softmax', prototxt="layer{type:\"SoftmaxWithLoss\"}")
+ lenet = mx.symbol.CaffeLoss(data=fc2, label=label, grad_scale=1,
name='softmax',
+ prototxt="layer{type:\"SoftmaxWithLoss\"}")
else:
lenet = mx.symbol.SoftmaxOutput(data=fc2, name='softmax')
return lenet
+
def get_network_from_json_file(file_name):
network = mx.sym.load(file_name)
return network
+
def parse_args():
+ """
+ Parse the arguments
+ """
parser = argparse.ArgumentParser(description='train an image classifier on
mnist')
parser.add_argument('--network', type=str, default='lenet',
help='the cnn to use (mlp | lenet | <path to network
json file>')
diff --git a/example/caffe/data.py b/example/caffe/data.py
index 15276c42360..d1760204682 100644
--- a/example/caffe/data.py
+++ b/example/caffe/data.py
@@ -14,42 +14,48 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-
+"""
+Create the helper functions to mnist dataset for Caffe operators in MXNet
+"""
import mxnet as mx
from mxnet.test_utils import get_mnist_ubyte
+
def get_iterator(data_shape, use_caffe_data):
+ """
+ Generate the iterator of mnist dataset
+ """
def get_iterator_impl_mnist(args, kv):
"""return train and val iterators for mnist"""
# download data
get_mnist_ubyte()
flat = False if len(data_shape) != 1 else True
- train = mx.io.MNISTIter(
- image = "data/train-images-idx3-ubyte",
- label = "data/train-labels-idx1-ubyte",
- input_shape = data_shape,
- batch_size = args.batch_size,
- shuffle = True,
- flat = flat,
- num_parts = kv.num_workers,
- part_index = kv.rank)
+ train = mx.io.MNISTIter(
+ image="data/train-images-idx3-ubyte",
+ label="data/train-labels-idx1-ubyte",
+ input_shape=data_shape,
+ batch_size=args.batch_size,
+ shuffle=True,
+ flat=flat,
+ num_parts=kv.num_workers,
+ part_index=kv.rank)
val = mx.io.MNISTIter(
- image = "data/t10k-images-idx3-ubyte",
- label = "data/t10k-labels-idx1-ubyte",
- input_shape = data_shape,
- batch_size = args.batch_size,
- flat = flat,
- num_parts = kv.num_workers,
- part_index = kv.rank)
+ image="data/t10k-images-idx3-ubyte",
+ label="data/t10k-labels-idx1-ubyte",
+ input_shape=data_shape,
+ batch_size=args.batch_size,
+ flat=flat,
+ num_parts=kv.num_workers,
+ part_index=kv.rank)
return (train, val)
def get_iterator_impl_caffe(args, kv):
flat = False if len(data_shape) != 1 else True
train = mx.io.CaffeDataIter(
- prototxt =
+ prototxt=
'layer { \
name: "mnist" \
type: "Data" \
@@ -67,13 +73,13 @@ def get_iterator_impl_caffe(args, kv):
backend: LMDB \
} \
}',
- flat = flat,
- num_examples = 60000
+ flat=flat,
+ num_examples=60000
# float32 is the default, so left out here in order to illustrate
)
val = mx.io.CaffeDataIter(
- prototxt =
+ prototxt=
'layer { \
name: "mnist" \
type: "Data" \
@@ -91,9 +97,9 @@ def get_iterator_impl_caffe(args, kv):
backend: LMDB \
} \
}',
- flat = flat,
- num_examples = 10000,
- dtype = "float32" # float32 is the default
+ flat=flat,
+ num_examples=10000,
+ dtype="float32" # float32 is the default
)
return train, val
diff --git a/example/caffe/train_model.py b/example/caffe/train_model.py
index 4290e71063e..a600cd07124 100644
--- a/example/caffe/train_model.py
+++ b/example/caffe/train_model.py
@@ -14,12 +14,18 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-
-import mxnet as mx
-import logging
+"""
+Train module with using Caffe operator in MXNet
+"""
import os
+import logging
+import mxnet as mx
+
def fit(args, network, data_loader, eval_metrics=None,
batch_end_callback=None):
+ """
+ Train the model with using Caffe operator in MXNet
+ """
# kvstore
kv = mx.kvstore.create(args.kv_store)
@@ -74,8 +80,8 @@ def fit(args, network, data_loader, eval_metrics=None,
batch_end_callback=None):
if 'lr_factor' in args and args.lr_factor < 1:
model_args['lr_scheduler'] = mx.lr_scheduler.FactorScheduler(
- step = max(int(epoch_size * args.lr_factor_epoch), 1),
- factor = args.lr_factor)
+ step=max(int(epoch_size * args.lr_factor_epoch), 1),
+ factor=args.lr_factor)
if 'clip_gradient' in args and args.clip_gradient is not None:
model_args['clip_gradient'] = args.clip_gradient
@@ -85,12 +91,11 @@ def fit(args, network, data_loader, eval_metrics=None,
batch_end_callback=None):
args.gpus is None or len(args.gpus.split(',')) is 1):
kv = None
-
mod = mx.mod.Module(network, context=devs)
if eval_metrics is None:
eval_metrics = ['accuracy']
- ## TopKAccuracy only allows top_k > 1
+ # TopKAccuracy only allows top_k > 1
for top_k in [5, 10, 20]:
eval_metrics.append(mx.metric.create('top_k_accuracy',
top_k=top_k))
@@ -102,8 +107,7 @@ def fit(args, network, data_loader, eval_metrics=None,
batch_end_callback=None):
batch_end_callback.append(mx.callback.Speedometer(args.batch_size, 50))
mod.fit(train_data=train, eval_metric=eval_metrics, eval_data=val,
optimizer='sgd',
- optimizer_params={'learning_rate':args.lr, 'momentum': 0.9, 'wd':
0.00001},
- num_epoch=args.num_epochs, batch_end_callback=batch_end_callback,
- initializer=mx.init.Xavier(factor_type="in", magnitude=2.34),
- kvstore=kv, epoch_end_callback=checkpoint, **model_args)
-
+ optimizer_params={'learning_rate':args.lr, 'momentum': 0.9, 'wd':
0.00001},
+ num_epoch=args.num_epochs, batch_end_callback=batch_end_callback,
+ initializer=mx.init.Xavier(factor_type="in", magnitude=2.34),
+ kvstore=kv, epoch_end_callback=checkpoint, **model_args)
diff --git a/example/capsnet/capsulelayers.py b/example/capsnet/capsulelayers.py
index 5ac4fad4914..774625c71ac 100644
--- a/example/capsnet/capsulelayers.py
+++ b/example/capsnet/capsulelayers.py
@@ -14,7 +14,9 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-
+"""
+Create layers of capsule net
+"""
import mxnet as mx
@@ -98,7 +100,8 @@ def __call__(self, data):
mx.sym.sum(mx.sym.broadcast_mul(c, inputs_hat_stopped,
name='broadcast_mul_' + str(i)),
axis=1, keepdims=True,
name='sum_' + str(i)), name='output_' + str(i),
squash_axis=4)
- bias_ = bias_ + mx.sym.sum(mx.sym.broadcast_mul(c,
inputs_hat_stopped, name='bias_broadcast_mul' + str(i)),
+ bias_ = bias_ + mx.sym.sum(mx.sym.broadcast_mul(c,
inputs_hat_stopped,
+
name='bias_broadcast_mul' + str(i)),
axis=4,
keepdims=True, name='bias_' +
str(i))
diff --git a/example/capsnet/capsulenet.py b/example/capsnet/capsulenet.py
index 67108757bf3..a0695f67dd3 100644
--- a/example/capsnet/capsulenet.py
+++ b/example/capsnet/capsulenet.py
@@ -14,24 +14,31 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-import mxnet as mx
-import numpy as np
+"""
+Generate MXNet implementation of CapsNet
+"""
import os
import re
import gzip
import struct
+import numpy as np
import scipy.ndimage as ndi
+import mxnet as mx
from capsulelayers import primary_caps, CapsuleLayer
from mxboard import SummaryWriter
+
def margin_loss(y_true, y_pred):
loss = y_true * mx.sym.square(mx.sym.maximum(0., 0.9 - y_pred)) +\
0.5 * (1 - y_true) * mx.sym.square(mx.sym.maximum(0., y_pred - 0.1))
return mx.sym.mean(data=mx.sym.sum(loss, 1))
-def capsnet(batch_size, n_class, num_routing,recon_loss_weight):
+def capsnet(batch_size, n_class, num_routing, recon_loss_weight):
+ """
+ Create CapsNet
+ """
# data.shape = [batch_size, 1, 28, 28]
data = mx.sym.Variable('data')
@@ -107,7 +114,8 @@ def read_data(label_url, image_url):
label = np.fromstring(flbl.read(), dtype=np.int8)
with gzip.open(download_data(image_url), 'rb') as fimg:
magic, num, rows, cols = struct.unpack(">IIII", fimg.read(16))
- image = np.fromstring(fimg.read(), dtype=np.uint8).reshape(len(label),
rows, cols)
+ image = np.fromstring(fimg.read(), dtype=np.uint8)
+ np.reshape(image, len(label), (rows, cols))
return label, image
@@ -116,10 +124,13 @@ def to4d(img):
class LossMetric(mx.metric.EvalMetric):
- def __init__(self, batch_size, num_gpu):
+ """
+ Evaluate the loss function
+ """
+ def __init__(self, batch_size, num_gpus):
super(LossMetric, self).__init__('LossMetric')
self.batch_size = batch_size
- self.num_gpu = num_gpu
+ self.num_gpu = num_gpus
self.sum_metric = 0
self.num_inst = 0
self.loss = 0.0
@@ -130,6 +141,9 @@ def __init__(self, batch_size, num_gpu):
self.n_batch = 0
def update(self, labels, preds):
+ """
+ Update the hyper-parameters and loss of CapsNet
+ """
batch_sum_metric = 0
batch_num_inst = 0
for label, pred_outcaps in zip(labels[0], preds[0]):
@@ -146,7 +160,7 @@ def update(self, labels, preds):
self.batch_sum_metric = batch_sum_metric
self.batch_num_inst = batch_num_inst
self.batch_loss = batch_loss
- self.n_batch += 1
+ self.n_batch += 1
def get_name_value(self):
acc = float(self.sum_metric)/float(self.num_inst)
@@ -184,6 +198,9 @@ def __call__(self, num_update):
def do_training(num_epoch, optimizer, kvstore, learning_rate, model_prefix,
decay):
+ """
+ Run training to CapsNet
+ """
summary_writer = SummaryWriter(args.tblog_dir)
lr_scheduler = SimpleLRScheduler(learning_rate)
optimizer_params = {'lr_scheduler': lr_scheduler}
@@ -218,7 +235,8 @@ def do_training(num_epoch, optimizer, kvstore,
learning_rate, model_prefix, deca
summary_writer.add_scalar('val_loss', val_loss, n_epoch)
summary_writer.add_scalar('val_recon_err', val_recon_err, n_epoch)
- print('Epoch[%d] train acc: %.4f loss: %.6f recon_err: %.6f' %
(n_epoch, train_acc, train_loss, train_recon_err))
+ print('Epoch[%d] train acc: %.4f loss: %.6f recon_err: %.6f' %
(n_epoch, train_acc, train_loss,
+
train_recon_err))
print('Epoch[%d] val acc: %.4f loss: %.6f recon_err: %.6f' % (n_epoch,
val_acc, val_loss, val_recon_err))
print('SAVE CHECKPOINT')
@@ -227,10 +245,10 @@ def do_training(num_epoch, optimizer, kvstore,
learning_rate, model_prefix, deca
lr_scheduler.learning_rate = learning_rate * (decay ** n_epoch)
-def apply_transform(x,
- transform_matrix,
- fill_mode='nearest',
- cval=0.):
+def apply_transform(x, transform_matrix, fill_mode='nearest', cval=0.):
+ """
+ Apply transform on nd.array
+ """
x = np.rollaxis(x, 0, 0)
final_affine_matrix = transform_matrix[:2, :2]
final_offset = transform_matrix[:2, 2]
@@ -255,30 +273,53 @@ def random_shift(x, width_shift_fraction,
height_shift_fraction):
x = apply_transform(x, shift_matrix, 'nearest')
return x
+
def _shuffle(data, idx):
"""Shuffle the data."""
shuffle_data = []
- for k, v in data:
- shuffle_data.append((k, mx.ndarray.array(v.asnumpy()[idx], v.context)))
+ for idx_k, idx_v in data:
+ shuffle_data.append((idx_k, mx.ndarray.array(idx_v.asnumpy()[idx],
idx_v.context)))
return shuffle_data
+
class MNISTCustomIter(mx.io.NDArrayIter):
-
+ """
+ Create custom iterator of mnist dataset
+ """
+ def __init__(self, data, label, batch_size, shuffle):
+ self.data = data
+ self.label = label
+ self.batch_size = batch_size
+ self.shuffle = shuffle
+ self.cursor = None
+
def reset(self):
+ """
+ Reset class MNISTCustomIter(mx.io.NDArrayIter):
+ """
# shuffle data
if self.is_train:
np.random.shuffle(self.idx)
self.data = _shuffle(self.data, self.idx)
self.label = _shuffle(self.label, self.idx)
+
if self.last_batch_handle == 'roll_over' and self.cursor >
self.num_data:
- self.cursor = -self.batch_size +
(self.cursor%self.num_data)%self.batch_size
+ self.cursor = -self.batch_size + (self.cursor % self.num_data) %
self.batch_size
else:
self.cursor = -self.batch_size
+
def set_is_train(self, is_train):
+ """
+ Set training flag
+ """
self.is_train = is_train
+
def next(self):
+ """
+ Generate next of iterator
+ """
if self.iter_next():
if self.is_train:
data_raw_list = self.getdata()
@@ -288,8 +329,7 @@ def next(self):
return mx.io.DataBatch(data=[mx.nd.array(data_shifted)],
label=self.getlabel(),
pad=self.getpad(), index=None)
else:
- return mx.io.DataBatch(data=self.getdata(),
label=self.getlabel(), \
- pad=self.getpad(), index=None)
+ return mx.io.DataBatch(data=self.getdata(),
label=self.getlabel(), pad=self.getpad(), index=None)
else:
raise StopIteration
@@ -298,10 +338,9 @@ def next(self):
if __name__ == "__main__":
# Read mnist data set
path = 'http://yann.lecun.com/exdb/mnist/'
- (train_lbl, train_img) = read_data(
- path + 'train-labels-idx1-ubyte.gz', path +
'train-images-idx3-ubyte.gz')
- (val_lbl, val_img) = read_data(
- path + 't10k-labels-idx1-ubyte.gz', path + 't10k-images-idx3-ubyte.gz')
+ (train_lbl, train_img) = read_data(path + 'train-labels-idx1-ubyte.gz',
path + 'train-images-idx3-ubyte.gz')
+ (val_lbl, val_img) = read_data(path + 't10k-labels-idx1-ubyte.gz', path +
't10k-images-idx3-ubyte.gz')
+
# set batch size
import argparse
parser = argparse.ArgumentParser()
@@ -331,10 +370,13 @@ def next(self):
# generate train_iter, val_iter
train_iter = MNISTCustomIter(data=to4d(train_img), label=train_lbl,
batch_size=int(args.batch_size), shuffle=True)
train_iter.set_is_train(True)
- val_iter = MNISTCustomIter(data=to4d(val_img), label=val_lbl,
batch_size=int(args.batch_size),)
+ val_iter = MNISTCustomIter(data=to4d(val_img), label=val_lbl,
batch_size=int(args.batch_size), shuffle=True)
val_iter.set_is_train(False)
# define capsnet
- final_net = capsnet(batch_size=int(args.batch_size/num_gpu), n_class=10,
num_routing=args.num_routing, recon_loss_weight=args.recon_loss_weight)
+ final_net = capsnet(batch_size=int(args.batch_size/num_gpu),
+ n_class=10,
+ num_routing=args.num_routing,
+ recon_loss_weight=args.recon_loss_weight)
# set metric
loss_metric = LossMetric(args.batch_size/num_gpu, 1)
@@ -343,5 +385,6 @@ def next(self):
module.bind(data_shapes=train_iter.provide_data,
label_shapes=val_iter.provide_label,
for_training=True)
+
do_training(num_epoch=args.num_epoch, optimizer='adam', kvstore='device',
learning_rate=args.lr,
model_prefix=args.model_prefix, decay=args.decay)
diff --git a/example/cnn_chinese_text_classification/data_helpers.py
b/example/cnn_chinese_text_classification/data_helpers.py
index b3a13deec77..7030a98171e 100644
--- a/example/cnn_chinese_text_classification/data_helpers.py
+++ b/example/cnn_chinese_text_classification/data_helpers.py
@@ -14,6 +14,9 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
+
+"""Help functions to support for implementing CNN + Highway Network for
Chinese Text Classification in MXNet"""
+
import codecs
import itertools
import os
@@ -40,19 +43,23 @@ def clean_str(string):
string = re.sub(r"\'ll", " \'ll", string)
string = re.sub(r",", " , ", string)
string = re.sub(r"!", " ! ", string)
- string = re.sub(r"\(", " \( ", string)
- string = re.sub(r"\)", " \) ", string)
- string = re.sub(r"\?", " \? ", string)
+ string = re.sub(r"\(", r" \( ", string)
+ string = re.sub(r"\)", r" \) ", string)
+ string = re.sub(r"\?", r" \? ", string)
string = re.sub(r"\s{2,}", " ", string)
return string.strip().lower()
def get_chinese_text():
+ """
+ Download the chinese_text dataset and unzip it
+ """
if not os.path.isdir("data/"):
os.system("mkdir data/")
if (not os.path.exists('data/pos.txt')) or \
(not os.path.exists('data/neg')):
- os.system("wget -q
https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/example/chinese_text.zip
-P data/")
+ os.system("wget -q
https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/example/chinese_text.zip
"
+ "-P data/")
os.chdir("./data")
os.system("unzip -u chinese_text.zip")
os.chdir("..")
@@ -92,8 +99,9 @@ def pad_sentences(sentences, padding_word="</s>"):
"""
sequence_length = max(len(x) for x in sentences)
padded_sentences = []
- for i in range(len(sentences)):
- sentence = sentences[i]
+ for i, element in enumerate(sentences):
+ print(i, element)
+ sentence = element
num_padding = sequence_length - len(sentence)
new_sentence = sentence + [padding_word] * num_padding
padded_sentences.append(new_sentence)
@@ -116,30 +124,30 @@ def build_vocab(sentences):
def build_input_data(sentences, labels, vocabulary):
"""
- Maps sentencs and labels to vectors based on a vocabulary.
+ Maps sentences and labels to vectors based on a vocabulary.
"""
x = np.array([[vocabulary[word] for word in sentence] for sentence in
sentences])
y = np.array(labels)
return [x, y]
-def build_input_data_with_word2vec(sentences, labels, word2vec):
- """Map sentences and labels to vectors based on a pretrained word2vec"""
+def build_input_data_with_word2vec(sentences, labels, word2vec_list):
+ """Map sentences and labels to vectors based on a pre-trained word2vec"""
x_vec = []
for sent in sentences:
vec = []
for word in sent:
- if word in word2vec:
- vec.append(word2vec[word])
+ if word in word2vec_list:
+ vec.append(word2vec_list[word])
else:
- vec.append(word2vec['</s>'])
+ vec.append(word2vec_list['</s>'])
x_vec.append(vec)
x_vec = np.array(x_vec)
y_vec = np.array(labels)
return [x_vec, y_vec]
-def load_data_with_word2vec(word2vec):
+def load_data_with_word2vec(word2vec_list):
"""
Loads and preprocessed data for the MR dataset.
Returns input vectors, labels, vocabulary, and inverse vocabulary.
@@ -148,7 +156,7 @@ def load_data_with_word2vec(word2vec):
sentences, labels = load_data_and_labels()
sentences_padded = pad_sentences(sentences)
# vocabulary, vocabulary_inv = build_vocab(sentences_padded)
- return build_input_data_with_word2vec(sentences_padded, labels, word2vec)
+ return build_input_data_with_word2vec(sentences_padded, labels,
word2vec_list)
def load_data():
@@ -182,18 +190,21 @@ def batch_iter(data, batch_size, num_epochs):
def load_pretrained_word2vec(infile):
+ """
+ Load the pre-trained word2vec from file.
+ """
if isinstance(infile, str):
infile = open(infile)
- word2vec = {}
+ word2vec_list = {}
for idx, line in enumerate(infile):
if idx == 0:
vocab_size, dim = line.strip().split()
else:
tks = line.strip().split()
- word2vec[tks[0]] = map(float, tks[1:])
+ word2vec_list[tks[0]] = map(float, tks[1:])
- return word2vec
+ return word2vec_list
def load_google_word2vec(path):
diff --git a/example/cnn_chinese_text_classification/text_cnn.py
b/example/cnn_chinese_text_classification/text_cnn.py
index 4598a52e667..f98302aa182 100644
--- a/example/cnn_chinese_text_classification/text_cnn.py
+++ b/example/cnn_chinese_text_classification/text_cnn.py
@@ -20,12 +20,14 @@
# -*- coding: utf-8 -*-
-import sys, os
-import mxnet as mx
-import numpy as np
-import argparse
+"""Implementing CNN + Highway Network for Chinese Text Classification in
MXNet"""
+
+import os
+import sys
import logging
-import time
+import argparse
+import numpy as np
+import mxnet as mx
from mxnet import random
from mxnet.initializer import Xavier, Initializer
@@ -63,12 +65,30 @@
def save_model():
+ """
+ Save cnn model
+
+ Returns
+ ----------
+ callback: A callback function that can be passed as epoch_end_callback to
fit
+ """
if not os.path.exists("checkpoint"):
os.mkdir("checkpoint")
return mx.callback.do_checkpoint("checkpoint/checkpoint", args.save_period)
def highway(data):
+ """
+ Construct highway net
+
+ Parameters
+ ----------
+ data:
+
+ Returns
+ ----------
+ Highway Networks
+ """
_data = data
high_weight = mx.sym.Variable('high_weight')
high_bias = mx.sym.Variable('high_bias')
@@ -85,20 +105,42 @@ def highway(data):
def data_iter(batch_size, num_embed, pre_trained_word2vec=False):
+ """
+ Construct data iter
+
+ Parameters
+ ----------
+ batch_size: int
+ num_embed: int
+ pre_trained_word2vec: boolean
+ identify the pre-trained layers or not
+ Returns
+ ----------
+ train_set: DataIter
+ Train DataIter
+ valid: DataIter
+ Valid DataIter
+ sentences_size: int
+ array dimensions
+ embedded_size: int
+ array dimensions
+ vocab_size: int
+ array dimensions
+ """
logger.info('Loading data...')
if pre_trained_word2vec:
word2vec = data_helpers.load_pretrained_word2vec('data/rt.vec')
x, y = data_helpers.load_data_with_word2vec(word2vec)
- # reshpae for convolution input
+ # reshape for convolution input
x = np.reshape(x, (x.shape[0], 1, x.shape[1], x.shape[2]))
- embed_size = x.shape[-1]
- sentence_size = x.shape[2]
- vocab_size = -1
+ embedded_size = x.shape[-1]
+ sentences_size = x.shape[2]
+ vocabulary_size = -1
else:
x, y, vocab, vocab_inv = data_helpers.load_data()
- embed_size = num_embed
- sentence_size = x.shape[1]
- vocab_size = len(vocab)
+ embedded_size = num_embed
+ sentences_size = x.shape[1]
+ vocabulary_size = len(vocab)
# randomly shuffle data
np.random.seed(10)
@@ -109,30 +151,56 @@ def data_iter(batch_size, num_embed,
pre_trained_word2vec=False):
# split train/valid set
x_train, x_dev = x_shuffled[:-1000], x_shuffled[-1000:]
y_train, y_dev = y_shuffled[:-1000], y_shuffled[-1000:]
- logger.info('Train/Valid split: %d/%d' % (len(y_train), len(y_dev)))
+ logger.info('Train/Valid split: %d/%d', len(y_train), len(y_dev))
logger.info('train shape: %(shape)s', {'shape': x_train.shape})
logger.info('valid shape: %(shape)s', {'shape': x_dev.shape})
- logger.info('sentence max words: %(shape)s', {'shape': sentence_size})
- logger.info('embedding size: %(msg)s', {'msg': embed_size})
- logger.info('vocab size: %(msg)s', {'msg': vocab_size})
+ logger.info('sentence max words: %(shape)s', {'shape': sentences_size})
+ logger.info('embedding size: %(msg)s', {'msg': embedded_size})
+ logger.info('vocab size: %(msg)s', {'msg': vocabulary_size})
- train = mx.io.NDArrayIter(
+ train_set = mx.io.NDArrayIter(
x_train, y_train, batch_size, shuffle=True)
valid = mx.io.NDArrayIter(
x_dev, y_dev, batch_size)
- return (train, valid, sentence_size, embed_size, vocab_size)
+ return train_set, valid, sentences_size, embedded_size, vocabulary_size
-def sym_gen(batch_size, sentence_size, num_embed, vocab_size,
- num_label=2, filter_list=[3, 4, 5], num_filter=100,
+def sym_gen(batch_size, sentences_size, num_embed, vocabulary_size,
+ num_label=2, filter_list=None, num_filter=100,
dropout=0.0, pre_trained_word2vec=False):
+ """
+ Generate network symbol
+
+ Parameters
+ ----------
+ batch_size: int
+ sentences_size: int
+ num_embed: int
+ vocabulary_size: int
+ num_label: int
+ filter_list: list
+ num_filter: int
+ dropout: int
+ pre_trained_word2vec: boolean
+ identify the pre-trained layers or not
+ Returns
+ ----------
+ sm: symbol
+ data: list of str
+ data names
+ softmax_label: list of str
+ label names
+ """
input_x = mx.sym.Variable('data')
input_y = mx.sym.Variable('softmax_label')
# embedding layer
if not pre_trained_word2vec:
- embed_layer = mx.sym.Embedding(data=input_x, input_dim=vocab_size,
output_dim=num_embed, name='vocab_embed')
- conv_input = mx.sym.Reshape(data=embed_layer,
target_shape=(batch_size, 1, sentence_size, num_embed))
+ embed_layer = mx.sym.Embedding(data=input_x,
+ input_dim=vocabulary_size,
+ output_dim=num_embed,
+ name='vocab_embed')
+ conv_input = mx.sym.Reshape(data=embed_layer,
target_shape=(batch_size, 1, sentences_size, num_embed))
else:
conv_input = input_x
@@ -141,7 +209,7 @@ def sym_gen(batch_size, sentence_size, num_embed,
vocab_size,
for i, filter_size in enumerate(filter_list):
convi = mx.sym.Convolution(data=conv_input, kernel=(filter_size,
num_embed), num_filter=num_filter)
relui = mx.sym.Activation(data=convi, act_type='relu')
- pooli = mx.sym.Pooling(data=relui, pool_type='max',
kernel=(sentence_size - filter_size + 1, 1), stride=(1, 1))
+ pooli = mx.sym.Pooling(data=relui, pool_type='max',
kernel=(sentences_size - filter_size + 1, 1), stride=(1, 1))
pooled_outputs.append(pooli)
# combine all pooled outputs
@@ -170,10 +238,28 @@ def sym_gen(batch_size, sentence_size, num_embed,
vocab_size,
return sm, ('data',), ('softmax_label',)
-def train(symbol, train_iter, valid_iter, data_names, label_names):
- devs = mx.cpu() if args.gpus is None or args.gpus is '' else [
- mx.gpu(int(i)) for i in args.gpus.split(',')]
- module = mx.mod.Module(symbol, data_names=data_names,
label_names=label_names, context=devs)
+def train(symbol_data, train_iterator, valid_iterator, data_column_names,
target_names):
+ """
+ Train cnn model
+
+ Parameters
+ ----------
+ symbol_data: symbol
+ train_iterator: DataIter
+ Train DataIter
+ valid_iterator: DataIter
+ Valid DataIter
+ data_column_names: list of str
+ Defaults to ('data') for a typical model used in image
classification
+ target_names: list of str
+ Defaults to ('softmax_label') for a typical model used in
image classification
+ """
+ devs = mx.cpu() # default setting
+ if args.gpus is not None:
+ for i in args.gpus.split(','):
+ mx.gpu(int(i))
+ devs = mx.gpu()
+ module = mx.mod.Module(symbol_data, data_names=data_column_names,
label_names=target_names, context=devs)
init_params = {
'vocab_embed_weight': {'uniform': 0.1},
@@ -185,7 +271,7 @@ def train(symbol, train_iter, valid_iter, data_names,
label_names):
'cls_weight': {'uniform': 0.1}, 'cls_bias': {'costant': 0},
}
# custom init_params
- module.bind(data_shapes=train_iter.provide_data,
label_shapes=train_iter.provide_label)
+ module.bind(data_shapes=train_iterator.provide_data,
label_shapes=train_iterator.provide_label)
module.init_params(CustomInit(init_params))
lr_sch = mx.lr_scheduler.FactorScheduler(step=25000, factor=0.999)
module.init_optimizer(
@@ -195,8 +281,8 @@ def norm_stat(d):
return mx.nd.norm(d) / np.sqrt(d.size)
mon = mx.mon.Monitor(25000, norm_stat)
- module.fit(train_data=train_iter,
- eval_data=valid_iter,
+ module.fit(train_data=train_iterator,
+ eval_data=valid_iterator,
eval_metric='acc',
kvstore=args.kv_store,
monitor=mon,
diff --git a/example/cnn_text_classification/data_helpers.py
b/example/cnn_text_classification/data_helpers.py
index b6fe1e6917a..948b95105c8 100644
--- a/example/cnn_text_classification/data_helpers.py
+++ b/example/cnn_text_classification/data_helpers.py
@@ -15,6 +15,8 @@
# specific language governing permissions and limitations
# under the License.
+"""Help functions to support for implementing CNN + Highway Network for Text
Classification in MXNet"""
+
import itertools
import os
import re
@@ -40,9 +42,9 @@ def clean_str(string):
string = re.sub(r"\'ll", " \'ll", string)
string = re.sub(r",", " , ", string)
string = re.sub(r"!", " ! ", string)
- string = re.sub(r"\(", " \( ", string)
- string = re.sub(r"\)", " \) ", string)
- string = re.sub(r"\?", " \? ", string)
+ string = re.sub(r"\(", r" \( ", string)
+ string = re.sub(r"\)", r" \) ", string)
+ string = re.sub(r"\?", r" \? ", string)
string = re.sub(r"\s{2,}", " ", string)
return string.strip().lower()
@@ -81,8 +83,8 @@ def pad_sentences(sentences, padding_word="</s>"):
"""
sequence_length = max(len(x) for x in sentences)
padded_sentences = []
- for i in range(len(sentences)):
- sentence = sentences[i]
+ for i, sentence in enumerate(sentences):
+ print(i, sentence)
num_padding = sequence_length - len(sentence)
new_sentence = sentence + [padding_word] * num_padding
padded_sentences.append(new_sentence)
@@ -111,23 +113,24 @@ def build_input_data(sentences, labels, vocabulary):
y = np.array(labels)
return [x, y]
-def build_input_data_with_word2vec(sentences, labels, word2vec):
+
+def build_input_data_with_word2vec(sentences, labels, word2vec_list):
"""Map sentences and labels to vectors based on a pretrained word2vec"""
x_vec = []
for sent in sentences:
vec = []
for word in sent:
- if word in word2vec:
- vec.append(word2vec[word])
+ if word in word2vec_list:
+ vec.append(word2vec_list[word])
else:
- vec.append(word2vec['</s>'])
+ vec.append(word2vec_list['</s>'])
x_vec.append(vec)
x_vec = np.array(x_vec)
y_vec = np.array(labels)
return [x_vec, y_vec]
-def load_data_with_word2vec(word2vec):
+def load_data_with_word2vec(word2vec_list):
"""
Loads and preprocessed data for the MR dataset.
Returns input vectors, labels, vocabulary, and inverse vocabulary.
@@ -136,7 +139,7 @@ def load_data_with_word2vec(word2vec):
sentences, labels = load_data_and_labels()
sentences_padded = pad_sentences(sentences)
# vocabulary, vocabulary_inv = build_vocab(sentences_padded)
- return build_input_data_with_word2vec(sentences_padded, labels, word2vec)
+ return build_input_data_with_word2vec(sentences_padded, labels,
word2vec_list)
def load_data():
@@ -170,18 +173,21 @@ def batch_iter(data, batch_size, num_epochs):
def load_pretrained_word2vec(infile):
+ """
+ Load the pre-trained word2vec from file.
+ """
if isinstance(infile, str):
infile = open(infile)
- word2vec = {}
+ word2vec_list = {}
for idx, line in enumerate(infile):
if idx == 0:
vocab_size, dim = line.strip().split()
else:
tks = line.strip().split()
- word2vec[tks[0]] = map(float, tks[1:])
+ word2vec_list[tks[0]] = map(float, tks[1:])
- return word2vec
+ return word2vec_list
def load_google_word2vec(path):
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services